aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-11-12 11:52:47 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2010-11-12 11:52:47 -0500
commit8a9f772c14f85e2a580baadc50c194835da2d4e5 (patch)
tree4ac04e465fa8295944f997fb517dc9904bb8e4f3
parent25a34554d600b799cbf5159bef372b02d3b4e1c6 (diff)
parentcedb4a7d9f6aedb0dce94d6285b69dcb3c10fa05 (diff)
Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block
* 'for-linus' of git://git.kernel.dk/linux-2.6-block: (27 commits) block: remove unused copy_io_context() Documentation: remove anticipatory scheduler info block: remove REQ_HARDBARRIER ioprio: rcu_read_lock/unlock protect find_task_by_vpid call (V2) ioprio: fix RCU locking around task dereference block: ioctl: fix information leak to userland block: read i_size with i_size_read() cciss: fix proc warning on attempt to remove non-existant directory bio: take care not overflow page count when mapping/copying user data block: limit vec count in bio_kmalloc() and bio_alloc_map_data() block: take care not to overflow when calculating total iov length block: check for proper length of iov entries in blk_rq_map_user_iov() cciss: remove controllers supported by hpsa cciss: use usleep_range not msleep for small sleeps cciss: limit commands allocated on reset_devices cciss: Use kernel provided PCI state save and restore functions cciss: fix board status waiting code drbd: Removed checks for REQ_HARDBARRIER on incomming BIOs drbd: REQ_HARDBARRIER -> REQ_FUA transition for meta data accesses drbd: Removed the BIO_RW_BARRIER support form the receiver/epoch code ...
-rw-r--r--Documentation/block/switching-sched.txt8
-rw-r--r--Documentation/kernel-parameters.txt2
-rw-r--r--Documentation/rbtree.txt4
-rw-r--r--block/blk-core.c11
-rw-r--r--block/blk-ioc.c14
-rw-r--r--block/blk-map.c2
-rw-r--r--block/compat_ioctl.c4
-rw-r--r--block/elevator.c4
-rw-r--r--block/ioctl.c7
-rw-r--r--block/scsi_ioctl.c34
-rw-r--r--drivers/block/aoe/aoeblk.c3
-rw-r--r--drivers/block/cciss.c131
-rw-r--r--drivers/block/cciss.h4
-rw-r--r--drivers/block/drbd/drbd_actlog.c42
-rw-r--r--drivers/block/drbd/drbd_int.h52
-rw-r--r--drivers/block/drbd/drbd_main.c148
-rw-r--r--drivers/block/drbd/drbd_nl.c25
-rw-r--r--drivers/block/drbd/drbd_proc.c1
-rw-r--r--drivers/block/drbd/drbd_receiver.c217
-rw-r--r--drivers/block/drbd/drbd_req.c38
-rw-r--r--drivers/block/drbd/drbd_worker.c23
-rw-r--r--drivers/block/loop.c6
-rw-r--r--drivers/block/xen-blkfront.c2
-rw-r--r--drivers/md/md.c20
-rw-r--r--drivers/scsi/scsi_error.c18
-rw-r--r--drivers/usb/storage/uas.c5
-rw-r--r--fs/bio.c23
-rw-r--r--fs/ioprio.c18
-rw-r--r--include/linux/bio.h4
-rw-r--r--include/linux/blk_types.h6
-rw-r--r--include/linux/blkdev.h3
-rw-r--r--include/linux/drbd.h2
-rw-r--r--include/linux/iocontext.h1
-rw-r--r--kernel/trace/blktrace.c4
34 files changed, 367 insertions, 519 deletions
diff --git a/Documentation/block/switching-sched.txt b/Documentation/block/switching-sched.txt
index d5af3f630814..71cfbdc0f74d 100644
--- a/Documentation/block/switching-sched.txt
+++ b/Documentation/block/switching-sched.txt
@@ -16,7 +16,7 @@ you can do so by typing:
16As of the Linux 2.6.10 kernel, it is now possible to change the 16As of the Linux 2.6.10 kernel, it is now possible to change the
17IO scheduler for a given block device on the fly (thus making it possible, 17IO scheduler for a given block device on the fly (thus making it possible,
18for instance, to set the CFQ scheduler for the system default, but 18for instance, to set the CFQ scheduler for the system default, but
19set a specific device to use the anticipatory or noop schedulers - which 19set a specific device to use the deadline or noop schedulers - which
20can improve that device's throughput). 20can improve that device's throughput).
21 21
22To set a specific scheduler, simply do this: 22To set a specific scheduler, simply do this:
@@ -31,7 +31,7 @@ a "cat /sys/block/DEV/queue/scheduler" - the list of valid names
31will be displayed, with the currently selected scheduler in brackets: 31will be displayed, with the currently selected scheduler in brackets:
32 32
33# cat /sys/block/hda/queue/scheduler 33# cat /sys/block/hda/queue/scheduler
34noop anticipatory deadline [cfq] 34noop deadline [cfq]
35# echo anticipatory > /sys/block/hda/queue/scheduler 35# echo deadline > /sys/block/hda/queue/scheduler
36# cat /sys/block/hda/queue/scheduler 36# cat /sys/block/hda/queue/scheduler
37noop [anticipatory] deadline cfq 37noop [deadline] cfq
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index ed45e9802aa8..92e83e53148f 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -706,7 +706,7 @@ and is between 256 and 4096 characters. It is defined in the file
706 arch/x86/kernel/cpu/cpufreq/elanfreq.c. 706 arch/x86/kernel/cpu/cpufreq/elanfreq.c.
707 707
708 elevator= [IOSCHED] 708 elevator= [IOSCHED]
709 Format: {"anticipatory" | "cfq" | "deadline" | "noop"} 709 Format: {"cfq" | "deadline" | "noop"}
710 See Documentation/block/as-iosched.txt and 710 See Documentation/block/as-iosched.txt and
711 Documentation/block/deadline-iosched.txt for details. 711 Documentation/block/deadline-iosched.txt for details.
712 712
diff --git a/Documentation/rbtree.txt b/Documentation/rbtree.txt
index 221f38be98f4..19f8278c3854 100644
--- a/Documentation/rbtree.txt
+++ b/Documentation/rbtree.txt
@@ -21,8 +21,8 @@ three rotations, respectively, to balance the tree), with slightly slower
21To quote Linux Weekly News: 21To quote Linux Weekly News:
22 22
23 There are a number of red-black trees in use in the kernel. 23 There are a number of red-black trees in use in the kernel.
24 The anticipatory, deadline, and CFQ I/O schedulers all employ 24 The deadline and CFQ I/O schedulers employ rbtrees to
25 rbtrees to track requests; the packet CD/DVD driver does the same. 25 track requests; the packet CD/DVD driver does the same.
26 The high-resolution timer code uses an rbtree to organize outstanding 26 The high-resolution timer code uses an rbtree to organize outstanding
27 timer requests. The ext3 filesystem tracks directory entries in a 27 timer requests. The ext3 filesystem tracks directory entries in a
28 red-black tree. Virtual memory areas (VMAs) are tracked with red-black 28 red-black tree. Virtual memory areas (VMAs) are tracked with red-black
diff --git a/block/blk-core.c b/block/blk-core.c
index f0834e2f5727..4ce953f1b390 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1194,13 +1194,6 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1194 int where = ELEVATOR_INSERT_SORT; 1194 int where = ELEVATOR_INSERT_SORT;
1195 int rw_flags; 1195 int rw_flags;
1196 1196
1197 /* REQ_HARDBARRIER is no more */
1198 if (WARN_ONCE(bio->bi_rw & REQ_HARDBARRIER,
1199 "block: HARDBARRIER is deprecated, use FLUSH/FUA instead\n")) {
1200 bio_endio(bio, -EOPNOTSUPP);
1201 return 0;
1202 }
1203
1204 /* 1197 /*
1205 * low level driver can indicate that it wants pages above a 1198 * low level driver can indicate that it wants pages above a
1206 * certain limit bounced to low memory (ie for highmem, or even 1199 * certain limit bounced to low memory (ie for highmem, or even
@@ -1351,7 +1344,7 @@ static void handle_bad_sector(struct bio *bio)
1351 bdevname(bio->bi_bdev, b), 1344 bdevname(bio->bi_bdev, b),
1352 bio->bi_rw, 1345 bio->bi_rw,
1353 (unsigned long long)bio->bi_sector + bio_sectors(bio), 1346 (unsigned long long)bio->bi_sector + bio_sectors(bio),
1354 (long long)(bio->bi_bdev->bd_inode->i_size >> 9)); 1347 (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));
1355 1348
1356 set_bit(BIO_EOF, &bio->bi_flags); 1349 set_bit(BIO_EOF, &bio->bi_flags);
1357} 1350}
@@ -1404,7 +1397,7 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
1404 return 0; 1397 return 0;
1405 1398
1406 /* Test device or partition size, when known. */ 1399 /* Test device or partition size, when known. */
1407 maxsector = bio->bi_bdev->bd_inode->i_size >> 9; 1400 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
1408 if (maxsector) { 1401 if (maxsector) {
1409 sector_t sector = bio->bi_sector; 1402 sector_t sector = bio->bi_sector;
1410 1403
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index d22c4c55c406..3c7a339fe381 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -153,20 +153,6 @@ struct io_context *get_io_context(gfp_t gfp_flags, int node)
153} 153}
154EXPORT_SYMBOL(get_io_context); 154EXPORT_SYMBOL(get_io_context);
155 155
156void copy_io_context(struct io_context **pdst, struct io_context **psrc)
157{
158 struct io_context *src = *psrc;
159 struct io_context *dst = *pdst;
160
161 if (src) {
162 BUG_ON(atomic_long_read(&src->refcount) == 0);
163 atomic_long_inc(&src->refcount);
164 put_io_context(dst);
165 *pdst = src;
166 }
167}
168EXPORT_SYMBOL(copy_io_context);
169
170static int __init blk_ioc_init(void) 156static int __init blk_ioc_init(void)
171{ 157{
172 iocontext_cachep = kmem_cache_create("blkdev_ioc", 158 iocontext_cachep = kmem_cache_create("blkdev_ioc",
diff --git a/block/blk-map.c b/block/blk-map.c
index d4a586d8691e..5d5dbe47c228 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -205,6 +205,8 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
205 unaligned = 1; 205 unaligned = 1;
206 break; 206 break;
207 } 207 }
208 if (!iov[i].iov_len)
209 return -EINVAL;
208 } 210 }
209 211
210 if (unaligned || (q->dma_pad_mask & len) || map_data) 212 if (unaligned || (q->dma_pad_mask & len) || map_data)
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 119f07b74dc0..58c6ee5b010c 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -744,13 +744,13 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
744 bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE; 744 bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
745 return 0; 745 return 0;
746 case BLKGETSIZE: 746 case BLKGETSIZE:
747 size = bdev->bd_inode->i_size; 747 size = i_size_read(bdev->bd_inode);
748 if ((size >> 9) > ~0UL) 748 if ((size >> 9) > ~0UL)
749 return -EFBIG; 749 return -EFBIG;
750 return compat_put_ulong(arg, size >> 9); 750 return compat_put_ulong(arg, size >> 9);
751 751
752 case BLKGETSIZE64_32: 752 case BLKGETSIZE64_32:
753 return compat_put_u64(arg, bdev->bd_inode->i_size); 753 return compat_put_u64(arg, i_size_read(bdev->bd_inode));
754 754
755 case BLKTRACESETUP32: 755 case BLKTRACESETUP32:
756 case BLKTRACESTART: /* compatible */ 756 case BLKTRACESTART: /* compatible */
diff --git a/block/elevator.c b/block/elevator.c
index 282e8308f7e2..2569512830d3 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -429,7 +429,7 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
429 q->nr_sorted--; 429 q->nr_sorted--;
430 430
431 boundary = q->end_sector; 431 boundary = q->end_sector;
432 stop_flags = REQ_SOFTBARRIER | REQ_HARDBARRIER | REQ_STARTED; 432 stop_flags = REQ_SOFTBARRIER | REQ_STARTED;
433 list_for_each_prev(entry, &q->queue_head) { 433 list_for_each_prev(entry, &q->queue_head) {
434 struct request *pos = list_entry_rq(entry); 434 struct request *pos = list_entry_rq(entry);
435 435
@@ -691,7 +691,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
691void __elv_add_request(struct request_queue *q, struct request *rq, int where, 691void __elv_add_request(struct request_queue *q, struct request *rq, int where,
692 int plug) 692 int plug)
693{ 693{
694 if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { 694 if (rq->cmd_flags & REQ_SOFTBARRIER) {
695 /* barriers are scheduling boundary, update end_sector */ 695 /* barriers are scheduling boundary, update end_sector */
696 if (rq->cmd_type == REQ_TYPE_FS || 696 if (rq->cmd_type == REQ_TYPE_FS ||
697 (rq->cmd_flags & REQ_DISCARD)) { 697 (rq->cmd_flags & REQ_DISCARD)) {
diff --git a/block/ioctl.c b/block/ioctl.c
index d724ceb1d465..3d866d0037f2 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -125,7 +125,7 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
125 start >>= 9; 125 start >>= 9;
126 len >>= 9; 126 len >>= 9;
127 127
128 if (start + len > (bdev->bd_inode->i_size >> 9)) 128 if (start + len > (i_size_read(bdev->bd_inode) >> 9))
129 return -EINVAL; 129 return -EINVAL;
130 if (secure) 130 if (secure)
131 flags |= BLKDEV_DISCARD_SECURE; 131 flags |= BLKDEV_DISCARD_SECURE;
@@ -242,6 +242,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
242 * We need to set the startsect first, the driver may 242 * We need to set the startsect first, the driver may
243 * want to override it. 243 * want to override it.
244 */ 244 */
245 memset(&geo, 0, sizeof(geo));
245 geo.start = get_start_sect(bdev); 246 geo.start = get_start_sect(bdev);
246 ret = disk->fops->getgeo(bdev, &geo); 247 ret = disk->fops->getgeo(bdev, &geo);
247 if (ret) 248 if (ret)
@@ -307,12 +308,12 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
307 ret = blkdev_reread_part(bdev); 308 ret = blkdev_reread_part(bdev);
308 break; 309 break;
309 case BLKGETSIZE: 310 case BLKGETSIZE:
310 size = bdev->bd_inode->i_size; 311 size = i_size_read(bdev->bd_inode);
311 if ((size >> 9) > ~0UL) 312 if ((size >> 9) > ~0UL)
312 return -EFBIG; 313 return -EFBIG;
313 return put_ulong(arg, size >> 9); 314 return put_ulong(arg, size >> 9);
314 case BLKGETSIZE64: 315 case BLKGETSIZE64:
315 return put_u64(arg, bdev->bd_inode->i_size); 316 return put_u64(arg, i_size_read(bdev->bd_inode));
316 case BLKTRACESTART: 317 case BLKTRACESTART:
317 case BLKTRACESTOP: 318 case BLKTRACESTOP:
318 case BLKTRACESETUP: 319 case BLKTRACESETUP:
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index a8b5a10eb5b0..4f4230b79bb6 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -321,33 +321,47 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
321 if (hdr->iovec_count) { 321 if (hdr->iovec_count) {
322 const int size = sizeof(struct sg_iovec) * hdr->iovec_count; 322 const int size = sizeof(struct sg_iovec) * hdr->iovec_count;
323 size_t iov_data_len; 323 size_t iov_data_len;
324 struct sg_iovec *iov; 324 struct sg_iovec *sg_iov;
325 struct iovec *iov;
326 int i;
325 327
326 iov = kmalloc(size, GFP_KERNEL); 328 sg_iov = kmalloc(size, GFP_KERNEL);
327 if (!iov) { 329 if (!sg_iov) {
328 ret = -ENOMEM; 330 ret = -ENOMEM;
329 goto out; 331 goto out;
330 } 332 }
331 333
332 if (copy_from_user(iov, hdr->dxferp, size)) { 334 if (copy_from_user(sg_iov, hdr->dxferp, size)) {
333 kfree(iov); 335 kfree(sg_iov);
334 ret = -EFAULT; 336 ret = -EFAULT;
335 goto out; 337 goto out;
336 } 338 }
337 339
340 /*
341 * Sum up the vecs, making sure they don't overflow
342 */
343 iov = (struct iovec *) sg_iov;
344 iov_data_len = 0;
345 for (i = 0; i < hdr->iovec_count; i++) {
346 if (iov_data_len + iov[i].iov_len < iov_data_len) {
347 kfree(sg_iov);
348 ret = -EINVAL;
349 goto out;
350 }
351 iov_data_len += iov[i].iov_len;
352 }
353
338 /* SG_IO howto says that the shorter of the two wins */ 354 /* SG_IO howto says that the shorter of the two wins */
339 iov_data_len = iov_length((struct iovec *)iov,
340 hdr->iovec_count);
341 if (hdr->dxfer_len < iov_data_len) { 355 if (hdr->dxfer_len < iov_data_len) {
342 hdr->iovec_count = iov_shorten((struct iovec *)iov, 356 hdr->iovec_count = iov_shorten(iov,
343 hdr->iovec_count, 357 hdr->iovec_count,
344 hdr->dxfer_len); 358 hdr->dxfer_len);
345 iov_data_len = hdr->dxfer_len; 359 iov_data_len = hdr->dxfer_len;
346 } 360 }
347 361
348 ret = blk_rq_map_user_iov(q, rq, NULL, iov, hdr->iovec_count, 362 ret = blk_rq_map_user_iov(q, rq, NULL, sg_iov, hdr->iovec_count,
349 iov_data_len, GFP_KERNEL); 363 iov_data_len, GFP_KERNEL);
350 kfree(iov); 364 kfree(sg_iov);
351 } else if (hdr->dxfer_len) 365 } else if (hdr->dxfer_len)
352 ret = blk_rq_map_user(q, rq, NULL, hdr->dxferp, hdr->dxfer_len, 366 ret = blk_rq_map_user(q, rq, NULL, hdr->dxferp, hdr->dxfer_len,
353 GFP_KERNEL); 367 GFP_KERNEL);
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 541e18879965..528f6318ded1 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -180,9 +180,6 @@ aoeblk_make_request(struct request_queue *q, struct bio *bio)
180 BUG(); 180 BUG();
181 bio_endio(bio, -ENXIO); 181 bio_endio(bio, -ENXIO);
182 return 0; 182 return 0;
183 } else if (bio->bi_rw & REQ_HARDBARRIER) {
184 bio_endio(bio, -EOPNOTSUPP);
185 return 0;
186 } else if (bio->bi_io_vec == NULL) { 183 } else if (bio->bi_io_vec == NULL) {
187 printk(KERN_ERR "aoe: bi_io_vec is NULL\n"); 184 printk(KERN_ERR "aoe: bi_io_vec is NULL\n");
188 BUG(); 185 BUG();
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 2cc4dda46279..a67d0a611a8a 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -113,6 +113,8 @@ static struct board_type products[] = {
113 {0x409D0E11, "Smart Array 6400 EM", &SA5_access}, 113 {0x409D0E11, "Smart Array 6400 EM", &SA5_access},
114 {0x40910E11, "Smart Array 6i", &SA5_access}, 114 {0x40910E11, "Smart Array 6i", &SA5_access},
115 {0x3225103C, "Smart Array P600", &SA5_access}, 115 {0x3225103C, "Smart Array P600", &SA5_access},
116 {0x3223103C, "Smart Array P800", &SA5_access},
117 {0x3234103C, "Smart Array P400", &SA5_access},
116 {0x3235103C, "Smart Array P400i", &SA5_access}, 118 {0x3235103C, "Smart Array P400i", &SA5_access},
117 {0x3211103C, "Smart Array E200i", &SA5_access}, 119 {0x3211103C, "Smart Array E200i", &SA5_access},
118 {0x3212103C, "Smart Array E200", &SA5_access}, 120 {0x3212103C, "Smart Array E200", &SA5_access},
@@ -3753,7 +3755,7 @@ static void __devinit cciss_wait_for_mode_change_ack(ctlr_info_t *h)
3753 for (i = 0; i < MAX_CONFIG_WAIT; i++) { 3755 for (i = 0; i < MAX_CONFIG_WAIT; i++) {
3754 if (!(readl(h->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq)) 3756 if (!(readl(h->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq))
3755 break; 3757 break;
3756 msleep(10); 3758 usleep_range(10000, 20000);
3757 } 3759 }
3758} 3760}
3759 3761
@@ -3937,10 +3939,9 @@ static int __devinit cciss_lookup_board_id(struct pci_dev *pdev, u32 *board_id)
3937 *board_id = ((subsystem_device_id << 16) & 0xffff0000) | 3939 *board_id = ((subsystem_device_id << 16) & 0xffff0000) |
3938 subsystem_vendor_id; 3940 subsystem_vendor_id;
3939 3941
3940 for (i = 0; i < ARRAY_SIZE(products); i++) { 3942 for (i = 0; i < ARRAY_SIZE(products); i++)
3941 if (*board_id == products[i].board_id) 3943 if (*board_id == products[i].board_id)
3942 return i; 3944 return i;
3943 }
3944 dev_warn(&pdev->dev, "unrecognized board ID: 0x%08x, ignoring.\n", 3945 dev_warn(&pdev->dev, "unrecognized board ID: 0x%08x, ignoring.\n",
3945 *board_id); 3946 *board_id);
3946 return -ENODEV; 3947 return -ENODEV;
@@ -3971,18 +3972,31 @@ static int __devinit cciss_pci_find_memory_BAR(struct pci_dev *pdev,
3971 return -ENODEV; 3972 return -ENODEV;
3972} 3973}
3973 3974
3974static int __devinit cciss_wait_for_board_ready(ctlr_info_t *h) 3975static int __devinit cciss_wait_for_board_state(struct pci_dev *pdev,
3976 void __iomem *vaddr, int wait_for_ready)
3977#define BOARD_READY 1
3978#define BOARD_NOT_READY 0
3975{ 3979{
3976 int i; 3980 int i, iterations;
3977 u32 scratchpad; 3981 u32 scratchpad;
3978 3982
3979 for (i = 0; i < CCISS_BOARD_READY_ITERATIONS; i++) { 3983 if (wait_for_ready)
3980 scratchpad = readl(h->vaddr + SA5_SCRATCHPAD_OFFSET); 3984 iterations = CCISS_BOARD_READY_ITERATIONS;
3981 if (scratchpad == CCISS_FIRMWARE_READY) 3985 else
3982 return 0; 3986 iterations = CCISS_BOARD_NOT_READY_ITERATIONS;
3987
3988 for (i = 0; i < iterations; i++) {
3989 scratchpad = readl(vaddr + SA5_SCRATCHPAD_OFFSET);
3990 if (wait_for_ready) {
3991 if (scratchpad == CCISS_FIRMWARE_READY)
3992 return 0;
3993 } else {
3994 if (scratchpad != CCISS_FIRMWARE_READY)
3995 return 0;
3996 }
3983 msleep(CCISS_BOARD_READY_POLL_INTERVAL_MSECS); 3997 msleep(CCISS_BOARD_READY_POLL_INTERVAL_MSECS);
3984 } 3998 }
3985 dev_warn(&h->pdev->dev, "board not ready, timed out.\n"); 3999 dev_warn(&pdev->dev, "board not ready, timed out.\n");
3986 return -ENODEV; 4000 return -ENODEV;
3987} 4001}
3988 4002
@@ -4031,6 +4045,11 @@ static int __devinit cciss_find_cfgtables(ctlr_info_t *h)
4031static void __devinit cciss_get_max_perf_mode_cmds(struct ctlr_info *h) 4045static void __devinit cciss_get_max_perf_mode_cmds(struct ctlr_info *h)
4032{ 4046{
4033 h->max_commands = readl(&(h->cfgtable->MaxPerformantModeCommands)); 4047 h->max_commands = readl(&(h->cfgtable->MaxPerformantModeCommands));
4048
4049 /* Limit commands in memory limited kdump scenario. */
4050 if (reset_devices && h->max_commands > 32)
4051 h->max_commands = 32;
4052
4034 if (h->max_commands < 16) { 4053 if (h->max_commands < 16) {
4035 dev_warn(&h->pdev->dev, "Controller reports " 4054 dev_warn(&h->pdev->dev, "Controller reports "
4036 "max supported commands of %d, an obvious lie. " 4055 "max supported commands of %d, an obvious lie. "
@@ -4148,7 +4167,7 @@ static int __devinit cciss_pci_init(ctlr_info_t *h)
4148 err = -ENOMEM; 4167 err = -ENOMEM;
4149 goto err_out_free_res; 4168 goto err_out_free_res;
4150 } 4169 }
4151 err = cciss_wait_for_board_ready(h); 4170 err = cciss_wait_for_board_state(h->pdev, h->vaddr, BOARD_READY);
4152 if (err) 4171 if (err)
4153 goto err_out_free_res; 4172 goto err_out_free_res;
4154 err = cciss_find_cfgtables(h); 4173 err = cciss_find_cfgtables(h);
@@ -4313,36 +4332,6 @@ static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, u
4313#define cciss_soft_reset_controller(p) cciss_message(p, 1, 0) 4332#define cciss_soft_reset_controller(p) cciss_message(p, 1, 0)
4314#define cciss_noop(p) cciss_message(p, 3, 0) 4333#define cciss_noop(p) cciss_message(p, 3, 0)
4315 4334
4316static __devinit int cciss_reset_msi(struct pci_dev *pdev)
4317{
4318/* the #defines are stolen from drivers/pci/msi.h. */
4319#define msi_control_reg(base) (base + PCI_MSI_FLAGS)
4320#define PCI_MSIX_FLAGS_ENABLE (1 << 15)
4321
4322 int pos;
4323 u16 control = 0;
4324
4325 pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
4326 if (pos) {
4327 pci_read_config_word(pdev, msi_control_reg(pos), &control);
4328 if (control & PCI_MSI_FLAGS_ENABLE) {
4329 dev_info(&pdev->dev, "resetting MSI\n");
4330 pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSI_FLAGS_ENABLE);
4331 }
4332 }
4333
4334 pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
4335 if (pos) {
4336 pci_read_config_word(pdev, msi_control_reg(pos), &control);
4337 if (control & PCI_MSIX_FLAGS_ENABLE) {
4338 dev_info(&pdev->dev, "resetting MSI-X\n");
4339 pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSIX_FLAGS_ENABLE);
4340 }
4341 }
4342
4343 return 0;
4344}
4345
4346static int cciss_controller_hard_reset(struct pci_dev *pdev, 4335static int cciss_controller_hard_reset(struct pci_dev *pdev,
4347 void * __iomem vaddr, bool use_doorbell) 4336 void * __iomem vaddr, bool use_doorbell)
4348{ 4337{
@@ -4397,17 +4386,17 @@ static int cciss_controller_hard_reset(struct pci_dev *pdev,
4397 * states or using the doorbell register. */ 4386 * states or using the doorbell register. */
4398static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev) 4387static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev)
4399{ 4388{
4400 u16 saved_config_space[32];
4401 u64 cfg_offset; 4389 u64 cfg_offset;
4402 u32 cfg_base_addr; 4390 u32 cfg_base_addr;
4403 u64 cfg_base_addr_index; 4391 u64 cfg_base_addr_index;
4404 void __iomem *vaddr; 4392 void __iomem *vaddr;
4405 unsigned long paddr; 4393 unsigned long paddr;
4406 u32 misc_fw_support, active_transport; 4394 u32 misc_fw_support, active_transport;
4407 int rc, i; 4395 int rc;
4408 CfgTable_struct __iomem *cfgtable; 4396 CfgTable_struct __iomem *cfgtable;
4409 bool use_doorbell; 4397 bool use_doorbell;
4410 u32 board_id; 4398 u32 board_id;
4399 u16 command_register;
4411 4400
4412 /* For controllers as old a the p600, this is very nearly 4401 /* For controllers as old a the p600, this is very nearly
4413 * the same thing as 4402 * the same thing as
@@ -4417,14 +4406,6 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev)
4417 * pci_set_power_state(pci_dev, PCI_D0); 4406 * pci_set_power_state(pci_dev, PCI_D0);
4418 * pci_restore_state(pci_dev); 4407 * pci_restore_state(pci_dev);
4419 * 4408 *
4420 * but we can't use these nice canned kernel routines on
4421 * kexec, because they also check the MSI/MSI-X state in PCI
4422 * configuration space and do the wrong thing when it is
4423 * set/cleared. Also, the pci_save/restore_state functions
4424 * violate the ordering requirements for restoring the
4425 * configuration space from the CCISS document (see the
4426 * comment below). So we roll our own ....
4427 *
4428 * For controllers newer than the P600, the pci power state 4409 * For controllers newer than the P600, the pci power state
4429 * method of resetting doesn't work so we have another way 4410 * method of resetting doesn't work so we have another way
4430 * using the doorbell register. 4411 * using the doorbell register.
@@ -4443,8 +4424,13 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev)
4443 return -ENODEV; 4424 return -ENODEV;
4444 } 4425 }
4445 4426
4446 for (i = 0; i < 32; i++) 4427 /* Save the PCI command register */
4447 pci_read_config_word(pdev, 2*i, &saved_config_space[i]); 4428 pci_read_config_word(pdev, 4, &command_register);
4429 /* Turn the board off. This is so that later pci_restore_state()
4430 * won't turn the board on before the rest of config space is ready.
4431 */
4432 pci_disable_device(pdev);
4433 pci_save_state(pdev);
4448 4434
4449 /* find the first memory BAR, so we can find the cfg table */ 4435 /* find the first memory BAR, so we can find the cfg table */
4450 rc = cciss_pci_find_memory_BAR(pdev, &paddr); 4436 rc = cciss_pci_find_memory_BAR(pdev, &paddr);
@@ -4479,26 +4465,32 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev)
4479 rc = cciss_controller_hard_reset(pdev, vaddr, use_doorbell); 4465 rc = cciss_controller_hard_reset(pdev, vaddr, use_doorbell);
4480 if (rc) 4466 if (rc)
4481 goto unmap_cfgtable; 4467 goto unmap_cfgtable;
4482 4468 pci_restore_state(pdev);
4483 /* Restore the PCI configuration space. The Open CISS 4469 rc = pci_enable_device(pdev);
4484 * Specification says, "Restore the PCI Configuration 4470 if (rc) {
4485 * Registers, offsets 00h through 60h. It is important to 4471 dev_warn(&pdev->dev, "failed to enable device.\n");
4486 * restore the command register, 16-bits at offset 04h, 4472 goto unmap_cfgtable;
4487 * last. Do not restore the configuration status register,
4488 * 16-bits at offset 06h." Note that the offset is 2*i.
4489 */
4490 for (i = 0; i < 32; i++) {
4491 if (i == 2 || i == 3)
4492 continue;
4493 pci_write_config_word(pdev, 2*i, saved_config_space[i]);
4494 } 4473 }
4495 wmb(); 4474 pci_write_config_word(pdev, 4, command_register);
4496 pci_write_config_word(pdev, 4, saved_config_space[2]);
4497 4475
4498 /* Some devices (notably the HP Smart Array 5i Controller) 4476 /* Some devices (notably the HP Smart Array 5i Controller)
4499 need a little pause here */ 4477 need a little pause here */
4500 msleep(CCISS_POST_RESET_PAUSE_MSECS); 4478 msleep(CCISS_POST_RESET_PAUSE_MSECS);
4501 4479
4480 /* Wait for board to become not ready, then ready. */
4481 dev_info(&pdev->dev, "Waiting for board to become ready.\n");
4482 rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_NOT_READY);
4483 if (rc) /* Don't bail, might be E500, etc. which can't be reset */
4484 dev_warn(&pdev->dev,
4485 "failed waiting for board to become not ready\n");
4486 rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_READY);
4487 if (rc) {
4488 dev_warn(&pdev->dev,
4489 "failed waiting for board to become ready\n");
4490 goto unmap_cfgtable;
4491 }
4492 dev_info(&pdev->dev, "board ready.\n");
4493
4502 /* Controller should be in simple mode at this point. If it's not, 4494 /* Controller should be in simple mode at this point. If it's not,
4503 * It means we're on one of those controllers which doesn't support 4495 * It means we're on one of those controllers which doesn't support
4504 * the doorbell reset method and on which the PCI power management reset 4496 * the doorbell reset method and on which the PCI power management reset
@@ -4539,8 +4531,6 @@ static __devinit int cciss_init_reset_devices(struct pci_dev *pdev)
4539 return 0; /* just try to do the kdump anyhow. */ 4531 return 0; /* just try to do the kdump anyhow. */
4540 if (rc) 4532 if (rc)
4541 return -ENODEV; 4533 return -ENODEV;
4542 if (cciss_reset_msi(pdev))
4543 return -ENODEV;
4544 4534
4545 /* Now try to get the controller to respond to a no-op */ 4535 /* Now try to get the controller to respond to a no-op */
4546 for (i = 0; i < CCISS_POST_RESET_NOOP_RETRIES; i++) { 4536 for (i = 0; i < CCISS_POST_RESET_NOOP_RETRIES; i++) {
@@ -4936,7 +4926,8 @@ static void __exit cciss_cleanup(void)
4936 } 4926 }
4937 } 4927 }
4938 kthread_stop(cciss_scan_thread); 4928 kthread_stop(cciss_scan_thread);
4939 remove_proc_entry("driver/cciss", NULL); 4929 if (proc_cciss)
4930 remove_proc_entry("driver/cciss", NULL);
4940 bus_unregister(&cciss_bus_type); 4931 bus_unregister(&cciss_bus_type);
4941} 4932}
4942 4933
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index ae340ffc8f81..4b8933d778f1 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h
@@ -200,10 +200,14 @@ struct ctlr_info
200 * the above. 200 * the above.
201 */ 201 */
202#define CCISS_BOARD_READY_WAIT_SECS (120) 202#define CCISS_BOARD_READY_WAIT_SECS (120)
203#define CCISS_BOARD_NOT_READY_WAIT_SECS (10)
203#define CCISS_BOARD_READY_POLL_INTERVAL_MSECS (100) 204#define CCISS_BOARD_READY_POLL_INTERVAL_MSECS (100)
204#define CCISS_BOARD_READY_ITERATIONS \ 205#define CCISS_BOARD_READY_ITERATIONS \
205 ((CCISS_BOARD_READY_WAIT_SECS * 1000) / \ 206 ((CCISS_BOARD_READY_WAIT_SECS * 1000) / \
206 CCISS_BOARD_READY_POLL_INTERVAL_MSECS) 207 CCISS_BOARD_READY_POLL_INTERVAL_MSECS)
208#define CCISS_BOARD_NOT_READY_ITERATIONS \
209 ((CCISS_BOARD_NOT_READY_WAIT_SECS * 1000) / \
210 CCISS_BOARD_READY_POLL_INTERVAL_MSECS)
207#define CCISS_POST_RESET_PAUSE_MSECS (3000) 211#define CCISS_POST_RESET_PAUSE_MSECS (3000)
208#define CCISS_POST_RESET_NOOP_INTERVAL_MSECS (1000) 212#define CCISS_POST_RESET_NOOP_INTERVAL_MSECS (1000)
209#define CCISS_POST_RESET_NOOP_RETRIES (12) 213#define CCISS_POST_RESET_NOOP_RETRIES (12)
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index ac04ef97eac2..ba95cba192be 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -78,11 +78,10 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
78 init_completion(&md_io.event); 78 init_completion(&md_io.event);
79 md_io.error = 0; 79 md_io.error = 0;
80 80
81 if ((rw & WRITE) && !test_bit(MD_NO_BARRIER, &mdev->flags)) 81 if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags))
82 rw |= REQ_HARDBARRIER; 82 rw |= REQ_FUA;
83 rw |= REQ_UNPLUG | REQ_SYNC; 83 rw |= REQ_UNPLUG | REQ_SYNC;
84 84
85 retry:
86 bio = bio_alloc(GFP_NOIO, 1); 85 bio = bio_alloc(GFP_NOIO, 1);
87 bio->bi_bdev = bdev->md_bdev; 86 bio->bi_bdev = bdev->md_bdev;
88 bio->bi_sector = sector; 87 bio->bi_sector = sector;
@@ -100,17 +99,6 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
100 wait_for_completion(&md_io.event); 99 wait_for_completion(&md_io.event);
101 ok = bio_flagged(bio, BIO_UPTODATE) && md_io.error == 0; 100 ok = bio_flagged(bio, BIO_UPTODATE) && md_io.error == 0;
102 101
103 /* check for unsupported barrier op.
104 * would rather check on EOPNOTSUPP, but that is not reliable.
105 * don't try again for ANY return value != 0 */
106 if (unlikely((bio->bi_rw & REQ_HARDBARRIER) && !ok)) {
107 /* Try again with no barrier */
108 dev_warn(DEV, "Barriers not supported on meta data device - disabling\n");
109 set_bit(MD_NO_BARRIER, &mdev->flags);
110 rw &= ~REQ_HARDBARRIER;
111 bio_put(bio);
112 goto retry;
113 }
114 out: 102 out:
115 bio_put(bio); 103 bio_put(bio);
116 return ok; 104 return ok;
@@ -284,18 +272,32 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
284 u32 xor_sum = 0; 272 u32 xor_sum = 0;
285 273
286 if (!get_ldev(mdev)) { 274 if (!get_ldev(mdev)) {
287 dev_err(DEV, "get_ldev() failed in w_al_write_transaction\n"); 275 dev_err(DEV,
276 "disk is %s, cannot start al transaction (-%d +%d)\n",
277 drbd_disk_str(mdev->state.disk), evicted, new_enr);
288 complete(&((struct update_al_work *)w)->event); 278 complete(&((struct update_al_work *)w)->event);
289 return 1; 279 return 1;
290 } 280 }
291 /* do we have to do a bitmap write, first? 281 /* do we have to do a bitmap write, first?
292 * TODO reduce maximum latency: 282 * TODO reduce maximum latency:
293 * submit both bios, then wait for both, 283 * submit both bios, then wait for both,
294 * instead of doing two synchronous sector writes. */ 284 * instead of doing two synchronous sector writes.
285 * For now, we must not write the transaction,
286 * if we cannot write out the bitmap of the evicted extent. */
295 if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE) 287 if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE)
296 drbd_bm_write_sect(mdev, evicted/AL_EXT_PER_BM_SECT); 288 drbd_bm_write_sect(mdev, evicted/AL_EXT_PER_BM_SECT);
297 289
298 mutex_lock(&mdev->md_io_mutex); /* protects md_io_page, al_tr_cycle, ... */ 290 /* The bitmap write may have failed, causing a state change. */
291 if (mdev->state.disk < D_INCONSISTENT) {
292 dev_err(DEV,
293 "disk is %s, cannot write al transaction (-%d +%d)\n",
294 drbd_disk_str(mdev->state.disk), evicted, new_enr);
295 complete(&((struct update_al_work *)w)->event);
296 put_ldev(mdev);
297 return 1;
298 }
299
300 mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */
299 buffer = (struct al_transaction *)page_address(mdev->md_io_page); 301 buffer = (struct al_transaction *)page_address(mdev->md_io_page);
300 302
301 buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC); 303 buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC);
@@ -739,7 +741,7 @@ void drbd_al_apply_to_bm(struct drbd_conf *mdev)
739 unsigned int enr; 741 unsigned int enr;
740 unsigned long add = 0; 742 unsigned long add = 0;
741 char ppb[10]; 743 char ppb[10];
742 int i; 744 int i, tmp;
743 745
744 wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); 746 wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
745 747
@@ -747,7 +749,9 @@ void drbd_al_apply_to_bm(struct drbd_conf *mdev)
747 enr = lc_element_by_index(mdev->act_log, i)->lc_number; 749 enr = lc_element_by_index(mdev->act_log, i)->lc_number;
748 if (enr == LC_FREE) 750 if (enr == LC_FREE)
749 continue; 751 continue;
750 add += drbd_bm_ALe_set_all(mdev, enr); 752 tmp = drbd_bm_ALe_set_all(mdev, enr);
753 dynamic_dev_dbg(DEV, "AL: set %d bits in extent %u\n", tmp, enr);
754 add += tmp;
751 } 755 }
752 756
753 lc_unlock(mdev->act_log); 757 lc_unlock(mdev->act_log);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 9bdcf4393c0a..1ea1a34e78b2 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -114,11 +114,11 @@ struct drbd_conf;
114#define D_ASSERT(exp) if (!(exp)) \ 114#define D_ASSERT(exp) if (!(exp)) \
115 dev_err(DEV, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__) 115 dev_err(DEV, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__)
116 116
117#define ERR_IF(exp) if (({ \ 117#define ERR_IF(exp) if (({ \
118 int _b = (exp) != 0; \ 118 int _b = (exp) != 0; \
119 if (_b) dev_err(DEV, "%s: (%s) in %s:%d\n", \ 119 if (_b) dev_err(DEV, "ASSERT FAILED: %s: (%s) in %s:%d\n", \
120 __func__, #exp, __FILE__, __LINE__); \ 120 __func__, #exp, __FILE__, __LINE__); \
121 _b; \ 121 _b; \
122 })) 122 }))
123 123
124/* Defines to control fault insertion */ 124/* Defines to control fault insertion */
@@ -749,17 +749,12 @@ struct drbd_epoch {
749 749
750/* drbd_epoch flag bits */ 750/* drbd_epoch flag bits */
751enum { 751enum {
752 DE_BARRIER_IN_NEXT_EPOCH_ISSUED,
753 DE_BARRIER_IN_NEXT_EPOCH_DONE,
754 DE_CONTAINS_A_BARRIER,
755 DE_HAVE_BARRIER_NUMBER, 752 DE_HAVE_BARRIER_NUMBER,
756 DE_IS_FINISHING,
757}; 753};
758 754
759enum epoch_event { 755enum epoch_event {
760 EV_PUT, 756 EV_PUT,
761 EV_GOT_BARRIER_NR, 757 EV_GOT_BARRIER_NR,
762 EV_BARRIER_DONE,
763 EV_BECAME_LAST, 758 EV_BECAME_LAST,
764 EV_CLEANUP = 32, /* used as flag */ 759 EV_CLEANUP = 32, /* used as flag */
765}; 760};
@@ -801,11 +796,6 @@ enum {
801 __EE_CALL_AL_COMPLETE_IO, 796 __EE_CALL_AL_COMPLETE_IO,
802 __EE_MAY_SET_IN_SYNC, 797 __EE_MAY_SET_IN_SYNC,
803 798
804 /* This epoch entry closes an epoch using a barrier.
805 * On sucessful completion, the epoch is released,
806 * and the P_BARRIER_ACK send. */
807 __EE_IS_BARRIER,
808
809 /* In case a barrier failed, 799 /* In case a barrier failed,
810 * we need to resubmit without the barrier flag. */ 800 * we need to resubmit without the barrier flag. */
811 __EE_RESUBMITTED, 801 __EE_RESUBMITTED,
@@ -820,7 +810,6 @@ enum {
820}; 810};
821#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) 811#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
822#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) 812#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC)
823#define EE_IS_BARRIER (1<<__EE_IS_BARRIER)
824#define EE_RESUBMITTED (1<<__EE_RESUBMITTED) 813#define EE_RESUBMITTED (1<<__EE_RESUBMITTED)
825#define EE_WAS_ERROR (1<<__EE_WAS_ERROR) 814#define EE_WAS_ERROR (1<<__EE_WAS_ERROR)
826#define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST) 815#define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST)
@@ -843,16 +832,15 @@ enum {
843 * Gets cleared when the state.conn 832 * Gets cleared when the state.conn
844 * goes into C_CONNECTED state. */ 833 * goes into C_CONNECTED state. */
845 WRITE_BM_AFTER_RESYNC, /* A kmalloc() during resync failed */ 834 WRITE_BM_AFTER_RESYNC, /* A kmalloc() during resync failed */
846 NO_BARRIER_SUPP, /* underlying block device doesn't implement barriers */
847 CONSIDER_RESYNC, 835 CONSIDER_RESYNC,
848 836
849 MD_NO_BARRIER, /* meta data device does not support barriers, 837 MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */
850 so don't even try */
851 SUSPEND_IO, /* suspend application io */ 838 SUSPEND_IO, /* suspend application io */
852 BITMAP_IO, /* suspend application io; 839 BITMAP_IO, /* suspend application io;
853 once no more io in flight, start bitmap io */ 840 once no more io in flight, start bitmap io */
854 BITMAP_IO_QUEUED, /* Started bitmap IO */ 841 BITMAP_IO_QUEUED, /* Started bitmap IO */
855 GO_DISKLESS, /* Disk failed, local_cnt reached zero, we are going diskless */ 842 GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */
843 WAS_IO_ERROR, /* Local disk failed returned IO error */
856 RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ 844 RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */
857 NET_CONGESTED, /* The data socket is congested */ 845 NET_CONGESTED, /* The data socket is congested */
858 846
@@ -947,7 +935,6 @@ enum write_ordering_e {
947 WO_none, 935 WO_none,
948 WO_drain_io, 936 WO_drain_io,
949 WO_bdev_flush, 937 WO_bdev_flush,
950 WO_bio_barrier
951}; 938};
952 939
953struct fifo_buffer { 940struct fifo_buffer {
@@ -1281,6 +1268,7 @@ extern int drbd_bmio_set_n_write(struct drbd_conf *mdev);
1281extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); 1268extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev);
1282extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why); 1269extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why);
1283extern void drbd_go_diskless(struct drbd_conf *mdev); 1270extern void drbd_go_diskless(struct drbd_conf *mdev);
1271extern void drbd_ldev_destroy(struct drbd_conf *mdev);
1284 1272
1285 1273
1286/* Meta data layout 1274/* Meta data layout
@@ -1798,17 +1786,17 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach,
1798 case EP_PASS_ON: 1786 case EP_PASS_ON:
1799 if (!forcedetach) { 1787 if (!forcedetach) {
1800 if (__ratelimit(&drbd_ratelimit_state)) 1788 if (__ratelimit(&drbd_ratelimit_state))
1801 dev_err(DEV, "Local IO failed in %s." 1789 dev_err(DEV, "Local IO failed in %s.\n", where);
1802 "Passing error on...\n", where);
1803 break; 1790 break;
1804 } 1791 }
1805 /* NOTE fall through to detach case if forcedetach set */ 1792 /* NOTE fall through to detach case if forcedetach set */
1806 case EP_DETACH: 1793 case EP_DETACH:
1807 case EP_CALL_HELPER: 1794 case EP_CALL_HELPER:
1795 set_bit(WAS_IO_ERROR, &mdev->flags);
1808 if (mdev->state.disk > D_FAILED) { 1796 if (mdev->state.disk > D_FAILED) {
1809 _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL); 1797 _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL);
1810 dev_err(DEV, "Local IO failed in %s." 1798 dev_err(DEV,
1811 "Detaching...\n", where); 1799 "Local IO failed in %s. Detaching...\n", where);
1812 } 1800 }
1813 break; 1801 break;
1814 } 1802 }
@@ -1874,7 +1862,7 @@ static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev)
1874static inline sector_t drbd_get_capacity(struct block_device *bdev) 1862static inline sector_t drbd_get_capacity(struct block_device *bdev)
1875{ 1863{
1876 /* return bdev ? get_capacity(bdev->bd_disk) : 0; */ 1864 /* return bdev ? get_capacity(bdev->bd_disk) : 0; */
1877 return bdev ? bdev->bd_inode->i_size >> 9 : 0; 1865 return bdev ? i_size_read(bdev->bd_inode) >> 9 : 0;
1878} 1866}
1879 1867
1880/** 1868/**
@@ -2127,7 +2115,11 @@ static inline void put_ldev(struct drbd_conf *mdev)
2127 __release(local); 2115 __release(local);
2128 D_ASSERT(i >= 0); 2116 D_ASSERT(i >= 0);
2129 if (i == 0) { 2117 if (i == 0) {
2118 if (mdev->state.disk == D_DISKLESS)
2119 /* even internal references gone, safe to destroy */
2120 drbd_ldev_destroy(mdev);
2130 if (mdev->state.disk == D_FAILED) 2121 if (mdev->state.disk == D_FAILED)
2122 /* all application IO references gone. */
2131 drbd_go_diskless(mdev); 2123 drbd_go_diskless(mdev);
2132 wake_up(&mdev->misc_wait); 2124 wake_up(&mdev->misc_wait);
2133 } 2125 }
@@ -2138,6 +2130,10 @@ static inline int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_stat
2138{ 2130{
2139 int io_allowed; 2131 int io_allowed;
2140 2132
2133 /* never get a reference while D_DISKLESS */
2134 if (mdev->state.disk == D_DISKLESS)
2135 return 0;
2136
2141 atomic_inc(&mdev->local_cnt); 2137 atomic_inc(&mdev->local_cnt);
2142 io_allowed = (mdev->state.disk >= mins); 2138 io_allowed = (mdev->state.disk >= mins);
2143 if (!io_allowed) 2139 if (!io_allowed)
@@ -2406,12 +2402,12 @@ static inline void drbd_md_flush(struct drbd_conf *mdev)
2406{ 2402{
2407 int r; 2403 int r;
2408 2404
2409 if (test_bit(MD_NO_BARRIER, &mdev->flags)) 2405 if (test_bit(MD_NO_FUA, &mdev->flags))
2410 return; 2406 return;
2411 2407
2412 r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_KERNEL, NULL); 2408 r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_KERNEL, NULL);
2413 if (r) { 2409 if (r) {
2414 set_bit(MD_NO_BARRIER, &mdev->flags); 2410 set_bit(MD_NO_FUA, &mdev->flags);
2415 dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r); 2411 dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r);
2416 } 2412 }
2417} 2413}
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 25c7a73c5062..6be5401d0e88 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -835,6 +835,15 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
835 ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_TEAR_DOWN) 835 ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_TEAR_DOWN)
836 ns.conn = os.conn; 836 ns.conn = os.conn;
837 837
838 /* we cannot fail (again) if we already detached */
839 if (ns.disk == D_FAILED && os.disk == D_DISKLESS)
840 ns.disk = D_DISKLESS;
841
842 /* if we are only D_ATTACHING yet,
843 * we can (and should) go directly to D_DISKLESS. */
844 if (ns.disk == D_FAILED && os.disk == D_ATTACHING)
845 ns.disk = D_DISKLESS;
846
838 /* After C_DISCONNECTING only C_STANDALONE may follow */ 847 /* After C_DISCONNECTING only C_STANDALONE may follow */
839 if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE) 848 if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE)
840 ns.conn = os.conn; 849 ns.conn = os.conn;
@@ -1056,7 +1065,15 @@ int __drbd_set_state(struct drbd_conf *mdev,
1056 !test_and_set_bit(CONFIG_PENDING, &mdev->flags)) 1065 !test_and_set_bit(CONFIG_PENDING, &mdev->flags))
1057 set_bit(DEVICE_DYING, &mdev->flags); 1066 set_bit(DEVICE_DYING, &mdev->flags);
1058 1067
1059 mdev->state.i = ns.i; 1068 /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference
1069 * on the ldev here, to be sure the transition -> D_DISKLESS resp.
1070 * drbd_ldev_destroy() won't happen before our corresponding
1071 * after_state_ch works run, where we put_ldev again. */
1072 if ((os.disk != D_FAILED && ns.disk == D_FAILED) ||
1073 (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))
1074 atomic_inc(&mdev->local_cnt);
1075
1076 mdev->state = ns;
1060 wake_up(&mdev->misc_wait); 1077 wake_up(&mdev->misc_wait);
1061 wake_up(&mdev->state_wait); 1078 wake_up(&mdev->state_wait);
1062 1079
@@ -1268,7 +1285,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1268 if (test_bit(NEW_CUR_UUID, &mdev->flags)) { 1285 if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
1269 drbd_uuid_new_current(mdev); 1286 drbd_uuid_new_current(mdev);
1270 clear_bit(NEW_CUR_UUID, &mdev->flags); 1287 clear_bit(NEW_CUR_UUID, &mdev->flags);
1271 drbd_md_sync(mdev);
1272 } 1288 }
1273 spin_lock_irq(&mdev->req_lock); 1289 spin_lock_irq(&mdev->req_lock);
1274 _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL); 1290 _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL);
@@ -1365,63 +1381,64 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1365 os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT) 1381 os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
1366 drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate"); 1382 drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate");
1367 1383
1368 /* first half of local IO error */ 1384 /* first half of local IO error, failure to attach,
1369 if (os.disk > D_FAILED && ns.disk == D_FAILED) { 1385 * or administrative detach */
1370 enum drbd_io_error_p eh = EP_PASS_ON; 1386 if (os.disk != D_FAILED && ns.disk == D_FAILED) {
1387 enum drbd_io_error_p eh;
1388 int was_io_error;
1389 /* corresponding get_ldev was in __drbd_set_state, to serialize
1390 * our cleanup here with the transition to D_DISKLESS,
1391 * so it is safe to dreference ldev here. */
1392 eh = mdev->ldev->dc.on_io_error;
1393 was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);
1394
1395 /* current state still has to be D_FAILED,
1396 * there is only one way out: to D_DISKLESS,
1397 * and that may only happen after our put_ldev below. */
1398 if (mdev->state.disk != D_FAILED)
1399 dev_err(DEV,
1400 "ASSERT FAILED: disk is %s during detach\n",
1401 drbd_disk_str(mdev->state.disk));
1371 1402
1372 if (drbd_send_state(mdev)) 1403 if (drbd_send_state(mdev))
1373 dev_warn(DEV, "Notified peer that my disk is broken.\n"); 1404 dev_warn(DEV, "Notified peer that I am detaching my disk\n");
1374 else 1405 else
1375 dev_err(DEV, "Sending state for drbd_io_error() failed\n"); 1406 dev_err(DEV, "Sending state for detaching disk failed\n");
1376 1407
1377 drbd_rs_cancel_all(mdev); 1408 drbd_rs_cancel_all(mdev);
1378 1409
1379 if (get_ldev_if_state(mdev, D_FAILED)) { 1410 /* In case we want to get something to stable storage still,
1380 eh = mdev->ldev->dc.on_io_error; 1411 * this may be the last chance.
1381 put_ldev(mdev); 1412 * Following put_ldev may transition to D_DISKLESS. */
1382 } 1413 drbd_md_sync(mdev);
1383 if (eh == EP_CALL_HELPER) 1414 put_ldev(mdev);
1415
1416 if (was_io_error && eh == EP_CALL_HELPER)
1384 drbd_khelper(mdev, "local-io-error"); 1417 drbd_khelper(mdev, "local-io-error");
1385 } 1418 }
1386 1419
1420 /* second half of local IO error, failure to attach,
1421 * or administrative detach,
1422 * after local_cnt references have reached zero again */
1423 if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) {
1424 /* We must still be diskless,
1425 * re-attach has to be serialized with this! */
1426 if (mdev->state.disk != D_DISKLESS)
1427 dev_err(DEV,
1428 "ASSERT FAILED: disk is %s while going diskless\n",
1429 drbd_disk_str(mdev->state.disk));
1387 1430
1388 /* second half of local IO error handling, 1431 mdev->rs_total = 0;
1389 * after local_cnt references have reached zero: */ 1432 mdev->rs_failed = 0;
1390 if (os.disk == D_FAILED && ns.disk == D_DISKLESS) { 1433 atomic_set(&mdev->rs_pending_cnt, 0);
1391 mdev->rs_total = 0;
1392 mdev->rs_failed = 0;
1393 atomic_set(&mdev->rs_pending_cnt, 0);
1394 }
1395
1396 if (os.disk > D_DISKLESS && ns.disk == D_DISKLESS) {
1397 /* We must still be diskless,
1398 * re-attach has to be serialized with this! */
1399 if (mdev->state.disk != D_DISKLESS)
1400 dev_err(DEV,
1401 "ASSERT FAILED: disk is %s while going diskless\n",
1402 drbd_disk_str(mdev->state.disk));
1403 1434
1404 /* we cannot assert local_cnt == 0 here, as get_ldev_if_state
1405 * will inc/dec it frequently. Since we became D_DISKLESS, no
1406 * one has touched the protected members anymore, though, so we
1407 * are safe to free them here. */
1408 if (drbd_send_state(mdev)) 1435 if (drbd_send_state(mdev))
1409 dev_warn(DEV, "Notified peer that I detached my disk.\n"); 1436 dev_warn(DEV, "Notified peer that I'm now diskless.\n");
1410 else 1437 else
1411 dev_err(DEV, "Sending state for detach failed\n"); 1438 dev_err(DEV, "Sending state for being diskless failed\n");
1412 1439 /* corresponding get_ldev in __drbd_set_state
1413 lc_destroy(mdev->resync); 1440 * this may finaly trigger drbd_ldev_destroy. */
1414 mdev->resync = NULL; 1441 put_ldev(mdev);
1415 lc_destroy(mdev->act_log);
1416 mdev->act_log = NULL;
1417 __no_warn(local,
1418 drbd_free_bc(mdev->ldev);
1419 mdev->ldev = NULL;);
1420
1421 if (mdev->md_io_tmpp) {
1422 __free_page(mdev->md_io_tmpp);
1423 mdev->md_io_tmpp = NULL;
1424 }
1425 } 1442 }
1426 1443
1427 /* Disks got bigger while they were detached */ 1444 /* Disks got bigger while they were detached */
@@ -2772,11 +2789,6 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
2772 2789
2773 drbd_set_defaults(mdev); 2790 drbd_set_defaults(mdev);
2774 2791
2775 /* for now, we do NOT yet support it,
2776 * even though we start some framework
2777 * to eventually support barriers */
2778 set_bit(NO_BARRIER_SUPP, &mdev->flags);
2779
2780 atomic_set(&mdev->ap_bio_cnt, 0); 2792 atomic_set(&mdev->ap_bio_cnt, 0);
2781 atomic_set(&mdev->ap_pending_cnt, 0); 2793 atomic_set(&mdev->ap_pending_cnt, 0);
2782 atomic_set(&mdev->rs_pending_cnt, 0); 2794 atomic_set(&mdev->rs_pending_cnt, 0);
@@ -2842,7 +2854,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
2842 drbd_thread_init(mdev, &mdev->asender, drbd_asender); 2854 drbd_thread_init(mdev, &mdev->asender, drbd_asender);
2843 2855
2844 mdev->agreed_pro_version = PRO_VERSION_MAX; 2856 mdev->agreed_pro_version = PRO_VERSION_MAX;
2845 mdev->write_ordering = WO_bio_barrier; 2857 mdev->write_ordering = WO_bdev_flush;
2846 mdev->resync_wenr = LC_FREE; 2858 mdev->resync_wenr = LC_FREE;
2847} 2859}
2848 2860
@@ -2899,7 +2911,6 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev)
2899 D_ASSERT(list_empty(&mdev->resync_work.list)); 2911 D_ASSERT(list_empty(&mdev->resync_work.list));
2900 D_ASSERT(list_empty(&mdev->unplug_work.list)); 2912 D_ASSERT(list_empty(&mdev->unplug_work.list));
2901 D_ASSERT(list_empty(&mdev->go_diskless.list)); 2913 D_ASSERT(list_empty(&mdev->go_diskless.list));
2902
2903} 2914}
2904 2915
2905 2916
@@ -3660,6 +3671,8 @@ void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
3660 3671
3661 get_random_bytes(&val, sizeof(u64)); 3672 get_random_bytes(&val, sizeof(u64));
3662 _drbd_uuid_set(mdev, UI_CURRENT, val); 3673 _drbd_uuid_set(mdev, UI_CURRENT, val);
3674 /* get it to stable storage _now_ */
3675 drbd_md_sync(mdev);
3663} 3676}
3664 3677
3665void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local) 3678void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
@@ -3756,19 +3769,31 @@ static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
3756 return 1; 3769 return 1;
3757} 3770}
3758 3771
3772void drbd_ldev_destroy(struct drbd_conf *mdev)
3773{
3774 lc_destroy(mdev->resync);
3775 mdev->resync = NULL;
3776 lc_destroy(mdev->act_log);
3777 mdev->act_log = NULL;
3778 __no_warn(local,
3779 drbd_free_bc(mdev->ldev);
3780 mdev->ldev = NULL;);
3781
3782 if (mdev->md_io_tmpp) {
3783 __free_page(mdev->md_io_tmpp);
3784 mdev->md_io_tmpp = NULL;
3785 }
3786 clear_bit(GO_DISKLESS, &mdev->flags);
3787}
3788
3759static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused) 3789static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused)
3760{ 3790{
3761 D_ASSERT(mdev->state.disk == D_FAILED); 3791 D_ASSERT(mdev->state.disk == D_FAILED);
3762 /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will 3792 /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
3763 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch 3793 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
3764 * the protected members anymore, though, so in the after_state_ch work 3794 * the protected members anymore, though, so once put_ldev reaches zero
3765 * it will be safe to free them. */ 3795 * again, it will be safe to free them. */
3766 drbd_force_state(mdev, NS(disk, D_DISKLESS)); 3796 drbd_force_state(mdev, NS(disk, D_DISKLESS));
3767 /* We need to wait for return of references checked out while we still
3768 * have been D_FAILED, though (drbd_md_sync, bitmap io). */
3769 wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
3770
3771 clear_bit(GO_DISKLESS, &mdev->flags);
3772 return 1; 3797 return 1;
3773} 3798}
3774 3799
@@ -3777,9 +3802,6 @@ void drbd_go_diskless(struct drbd_conf *mdev)
3777 D_ASSERT(mdev->state.disk == D_FAILED); 3802 D_ASSERT(mdev->state.disk == D_FAILED);
3778 if (!test_and_set_bit(GO_DISKLESS, &mdev->flags)) 3803 if (!test_and_set_bit(GO_DISKLESS, &mdev->flags))
3779 drbd_queue_work(&mdev->data.work, &mdev->go_diskless); 3804 drbd_queue_work(&mdev->data.work, &mdev->go_diskless);
3780 /* don't drbd_queue_work_front,
3781 * we need to serialize with the after_state_ch work
3782 * of the -> D_FAILED transition. */
3783} 3805}
3784 3806
3785/** 3807/**
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 87925e97e613..29e5c70e4e26 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -870,6 +870,11 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
870 retcode = ERR_DISK_CONFIGURED; 870 retcode = ERR_DISK_CONFIGURED;
871 goto fail; 871 goto fail;
872 } 872 }
873 /* It may just now have detached because of IO error. Make sure
874 * drbd_ldev_destroy is done already, we may end up here very fast,
875 * e.g. if someone calls attach from the on-io-error handler,
876 * to realize a "hot spare" feature (not that I'd recommend that) */
877 wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
873 878
874 /* allocation not in the IO path, cqueue thread context */ 879 /* allocation not in the IO path, cqueue thread context */
875 nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL); 880 nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
@@ -1098,9 +1103,9 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1098 /* Reset the "barriers don't work" bits here, then force meta data to 1103 /* Reset the "barriers don't work" bits here, then force meta data to
1099 * be written, to ensure we determine if barriers are supported. */ 1104 * be written, to ensure we determine if barriers are supported. */
1100 if (nbc->dc.no_md_flush) 1105 if (nbc->dc.no_md_flush)
1101 set_bit(MD_NO_BARRIER, &mdev->flags); 1106 set_bit(MD_NO_FUA, &mdev->flags);
1102 else 1107 else
1103 clear_bit(MD_NO_BARRIER, &mdev->flags); 1108 clear_bit(MD_NO_FUA, &mdev->flags);
1104 1109
1105 /* Point of no return reached. 1110 /* Point of no return reached.
1106 * Devices and memory are no longer released by error cleanup below. 1111 * Devices and memory are no longer released by error cleanup below.
@@ -1112,8 +1117,8 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1112 nbc = NULL; 1117 nbc = NULL;
1113 resync_lru = NULL; 1118 resync_lru = NULL;
1114 1119
1115 mdev->write_ordering = WO_bio_barrier; 1120 mdev->write_ordering = WO_bdev_flush;
1116 drbd_bump_write_ordering(mdev, WO_bio_barrier); 1121 drbd_bump_write_ordering(mdev, WO_bdev_flush);
1117 1122
1118 if (drbd_md_test_flag(mdev->ldev, MDF_CRASHED_PRIMARY)) 1123 if (drbd_md_test_flag(mdev->ldev, MDF_CRASHED_PRIMARY))
1119 set_bit(CRASHED_PRIMARY, &mdev->flags); 1124 set_bit(CRASHED_PRIMARY, &mdev->flags);
@@ -1262,7 +1267,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1262 force_diskless_dec: 1267 force_diskless_dec:
1263 put_ldev(mdev); 1268 put_ldev(mdev);
1264 force_diskless: 1269 force_diskless:
1265 drbd_force_state(mdev, NS(disk, D_DISKLESS)); 1270 drbd_force_state(mdev, NS(disk, D_FAILED));
1266 drbd_md_sync(mdev); 1271 drbd_md_sync(mdev);
1267 release_bdev2_fail: 1272 release_bdev2_fail:
1268 if (nbc) 1273 if (nbc)
@@ -1285,10 +1290,19 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1285 return 0; 1290 return 0;
1286} 1291}
1287 1292
1293/* Detaching the disk is a process in multiple stages. First we need to lock
1294 * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
1295 * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
1296 * internal references as well.
1297 * Only then we have finally detached. */
1288static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 1298static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1289 struct drbd_nl_cfg_reply *reply) 1299 struct drbd_nl_cfg_reply *reply)
1290{ 1300{
1301 drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
1291 reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS)); 1302 reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS));
1303 if (mdev->state.disk == D_DISKLESS)
1304 wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
1305 drbd_resume_io(mdev);
1292 return 0; 1306 return 0;
1293} 1307}
1294 1308
@@ -1953,7 +1967,6 @@ static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1953 if (test_bit(NEW_CUR_UUID, &mdev->flags)) { 1967 if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
1954 drbd_uuid_new_current(mdev); 1968 drbd_uuid_new_current(mdev);
1955 clear_bit(NEW_CUR_UUID, &mdev->flags); 1969 clear_bit(NEW_CUR_UUID, &mdev->flags);
1956 drbd_md_sync(mdev);
1957 } 1970 }
1958 drbd_suspend_io(mdev); 1971 drbd_suspend_io(mdev);
1959 reply->ret_code = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0)); 1972 reply->ret_code = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index ad325c5d0ce1..7e6ac307e2de 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -158,7 +158,6 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
158 [WO_none] = 'n', 158 [WO_none] = 'n',
159 [WO_drain_io] = 'd', 159 [WO_drain_io] = 'd',
160 [WO_bdev_flush] = 'f', 160 [WO_bdev_flush] = 'f',
161 [WO_bio_barrier] = 'b',
162 }; 161 };
163 162
164 seq_printf(seq, "version: " REL_VERSION " (api:%d/proto:%d-%d)\n%s\n", 163 seq_printf(seq, "version: " REL_VERSION " (api:%d/proto:%d-%d)\n%s\n",
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index efd6169acf2f..d299fe9e78c8 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -49,11 +49,6 @@
49 49
50#include "drbd_vli.h" 50#include "drbd_vli.h"
51 51
52struct flush_work {
53 struct drbd_work w;
54 struct drbd_epoch *epoch;
55};
56
57enum finish_epoch { 52enum finish_epoch {
58 FE_STILL_LIVE, 53 FE_STILL_LIVE,
59 FE_DESTROYED, 54 FE_DESTROYED,
@@ -66,16 +61,6 @@ static int drbd_do_auth(struct drbd_conf *mdev);
66static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event); 61static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
67static int e_end_block(struct drbd_conf *, struct drbd_work *, int); 62static int e_end_block(struct drbd_conf *, struct drbd_work *, int);
68 63
69static struct drbd_epoch *previous_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch)
70{
71 struct drbd_epoch *prev;
72 spin_lock(&mdev->epoch_lock);
73 prev = list_entry(epoch->list.prev, struct drbd_epoch, list);
74 if (prev == epoch || prev == mdev->current_epoch)
75 prev = NULL;
76 spin_unlock(&mdev->epoch_lock);
77 return prev;
78}
79 64
80#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) 65#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
81 66
@@ -981,7 +966,7 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsi
981 return TRUE; 966 return TRUE;
982} 967}
983 968
984static enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch) 969static void drbd_flush(struct drbd_conf *mdev)
985{ 970{
986 int rv; 971 int rv;
987 972
@@ -997,24 +982,6 @@ static enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct d
997 } 982 }
998 put_ldev(mdev); 983 put_ldev(mdev);
999 } 984 }
1000
1001 return drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE);
1002}
1003
1004static int w_flush(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1005{
1006 struct flush_work *fw = (struct flush_work *)w;
1007 struct drbd_epoch *epoch = fw->epoch;
1008
1009 kfree(w);
1010
1011 if (!test_and_set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags))
1012 drbd_flush_after_epoch(mdev, epoch);
1013
1014 drbd_may_finish_epoch(mdev, epoch, EV_PUT |
1015 (mdev->state.conn < C_CONNECTED ? EV_CLEANUP : 0));
1016
1017 return 1;
1018} 985}
1019 986
1020/** 987/**
@@ -1027,15 +994,13 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1027 struct drbd_epoch *epoch, 994 struct drbd_epoch *epoch,
1028 enum epoch_event ev) 995 enum epoch_event ev)
1029{ 996{
1030 int finish, epoch_size; 997 int epoch_size;
1031 struct drbd_epoch *next_epoch; 998 struct drbd_epoch *next_epoch;
1032 int schedule_flush = 0;
1033 enum finish_epoch rv = FE_STILL_LIVE; 999 enum finish_epoch rv = FE_STILL_LIVE;
1034 1000
1035 spin_lock(&mdev->epoch_lock); 1001 spin_lock(&mdev->epoch_lock);
1036 do { 1002 do {
1037 next_epoch = NULL; 1003 next_epoch = NULL;
1038 finish = 0;
1039 1004
1040 epoch_size = atomic_read(&epoch->epoch_size); 1005 epoch_size = atomic_read(&epoch->epoch_size);
1041 1006
@@ -1045,16 +1010,6 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1045 break; 1010 break;
1046 case EV_GOT_BARRIER_NR: 1011 case EV_GOT_BARRIER_NR:
1047 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags); 1012 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
1048
1049 /* Special case: If we just switched from WO_bio_barrier to
1050 WO_bdev_flush we should not finish the current epoch */
1051 if (test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags) && epoch_size == 1 &&
1052 mdev->write_ordering != WO_bio_barrier &&
1053 epoch == mdev->current_epoch)
1054 clear_bit(DE_CONTAINS_A_BARRIER, &epoch->flags);
1055 break;
1056 case EV_BARRIER_DONE:
1057 set_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags);
1058 break; 1013 break;
1059 case EV_BECAME_LAST: 1014 case EV_BECAME_LAST:
1060 /* nothing to do*/ 1015 /* nothing to do*/
@@ -1063,23 +1018,7 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1063 1018
1064 if (epoch_size != 0 && 1019 if (epoch_size != 0 &&
1065 atomic_read(&epoch->active) == 0 && 1020 atomic_read(&epoch->active) == 0 &&
1066 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) && 1021 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
1067 epoch->list.prev == &mdev->current_epoch->list &&
1068 !test_bit(DE_IS_FINISHING, &epoch->flags)) {
1069 /* Nearly all conditions are met to finish that epoch... */
1070 if (test_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags) ||
1071 mdev->write_ordering == WO_none ||
1072 (epoch_size == 1 && test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) ||
1073 ev & EV_CLEANUP) {
1074 finish = 1;
1075 set_bit(DE_IS_FINISHING, &epoch->flags);
1076 } else if (!test_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags) &&
1077 mdev->write_ordering == WO_bio_barrier) {
1078 atomic_inc(&epoch->active);
1079 schedule_flush = 1;
1080 }
1081 }
1082 if (finish) {
1083 if (!(ev & EV_CLEANUP)) { 1022 if (!(ev & EV_CLEANUP)) {
1084 spin_unlock(&mdev->epoch_lock); 1023 spin_unlock(&mdev->epoch_lock);
1085 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size); 1024 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
@@ -1102,6 +1041,7 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1102 /* atomic_set(&epoch->active, 0); is already zero */ 1041 /* atomic_set(&epoch->active, 0); is already zero */
1103 if (rv == FE_STILL_LIVE) 1042 if (rv == FE_STILL_LIVE)
1104 rv = FE_RECYCLED; 1043 rv = FE_RECYCLED;
1044 wake_up(&mdev->ee_wait);
1105 } 1045 }
1106 } 1046 }
1107 1047
@@ -1113,22 +1053,6 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1113 1053
1114 spin_unlock(&mdev->epoch_lock); 1054 spin_unlock(&mdev->epoch_lock);
1115 1055
1116 if (schedule_flush) {
1117 struct flush_work *fw;
1118 fw = kmalloc(sizeof(*fw), GFP_ATOMIC);
1119 if (fw) {
1120 fw->w.cb = w_flush;
1121 fw->epoch = epoch;
1122 drbd_queue_work(&mdev->data.work, &fw->w);
1123 } else {
1124 dev_warn(DEV, "Could not kmalloc a flush_work obj\n");
1125 set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags);
1126 /* That is not a recursion, only one level */
1127 drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE);
1128 drbd_may_finish_epoch(mdev, epoch, EV_PUT);
1129 }
1130 }
1131
1132 return rv; 1056 return rv;
1133} 1057}
1134 1058
@@ -1144,19 +1068,16 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo)
1144 [WO_none] = "none", 1068 [WO_none] = "none",
1145 [WO_drain_io] = "drain", 1069 [WO_drain_io] = "drain",
1146 [WO_bdev_flush] = "flush", 1070 [WO_bdev_flush] = "flush",
1147 [WO_bio_barrier] = "barrier",
1148 }; 1071 };
1149 1072
1150 pwo = mdev->write_ordering; 1073 pwo = mdev->write_ordering;
1151 wo = min(pwo, wo); 1074 wo = min(pwo, wo);
1152 if (wo == WO_bio_barrier && mdev->ldev->dc.no_disk_barrier)
1153 wo = WO_bdev_flush;
1154 if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush) 1075 if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
1155 wo = WO_drain_io; 1076 wo = WO_drain_io;
1156 if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain) 1077 if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
1157 wo = WO_none; 1078 wo = WO_none;
1158 mdev->write_ordering = wo; 1079 mdev->write_ordering = wo;
1159 if (pwo != mdev->write_ordering || wo == WO_bio_barrier) 1080 if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
1160 dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]); 1081 dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
1161} 1082}
1162 1083
@@ -1192,7 +1113,7 @@ next_bio:
1192 bio->bi_sector = sector; 1113 bio->bi_sector = sector;
1193 bio->bi_bdev = mdev->ldev->backing_bdev; 1114 bio->bi_bdev = mdev->ldev->backing_bdev;
1194 /* we special case some flags in the multi-bio case, see below 1115 /* we special case some flags in the multi-bio case, see below
1195 * (REQ_UNPLUG, REQ_HARDBARRIER) */ 1116 * (REQ_UNPLUG) */
1196 bio->bi_rw = rw; 1117 bio->bi_rw = rw;
1197 bio->bi_private = e; 1118 bio->bi_private = e;
1198 bio->bi_end_io = drbd_endio_sec; 1119 bio->bi_end_io = drbd_endio_sec;
@@ -1226,11 +1147,6 @@ next_bio:
1226 bio->bi_rw &= ~REQ_UNPLUG; 1147 bio->bi_rw &= ~REQ_UNPLUG;
1227 1148
1228 drbd_generic_make_request(mdev, fault_type, bio); 1149 drbd_generic_make_request(mdev, fault_type, bio);
1229
1230 /* strip off REQ_HARDBARRIER,
1231 * unless it is the first or last bio */
1232 if (bios && bios->bi_next)
1233 bios->bi_rw &= ~REQ_HARDBARRIER;
1234 } while (bios); 1150 } while (bios);
1235 maybe_kick_lo(mdev); 1151 maybe_kick_lo(mdev);
1236 return 0; 1152 return 0;
@@ -1244,45 +1160,9 @@ fail:
1244 return -ENOMEM; 1160 return -ENOMEM;
1245} 1161}
1246 1162
1247/**
1248 * w_e_reissue() - Worker callback; Resubmit a bio, without REQ_HARDBARRIER set
1249 * @mdev: DRBD device.
1250 * @w: work object.
1251 * @cancel: The connection will be closed anyways (unused in this callback)
1252 */
1253int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __releases(local)
1254{
1255 struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
1256 /* We leave DE_CONTAINS_A_BARRIER and EE_IS_BARRIER in place,
1257 (and DE_BARRIER_IN_NEXT_EPOCH_ISSUED in the previous Epoch)
1258 so that we can finish that epoch in drbd_may_finish_epoch().
1259 That is necessary if we already have a long chain of Epochs, before
1260 we realize that REQ_HARDBARRIER is actually not supported */
1261
1262 /* As long as the -ENOTSUPP on the barrier is reported immediately
1263 that will never trigger. If it is reported late, we will just
1264 print that warning and continue correctly for all future requests
1265 with WO_bdev_flush */
1266 if (previous_epoch(mdev, e->epoch))
1267 dev_warn(DEV, "Write ordering was not enforced (one time event)\n");
1268
1269 /* we still have a local reference,
1270 * get_ldev was done in receive_Data. */
1271
1272 e->w.cb = e_end_block;
1273 if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_DT_WR) != 0) {
1274 /* drbd_submit_ee fails for one reason only:
1275 * if was not able to allocate sufficient bios.
1276 * requeue, try again later. */
1277 e->w.cb = w_e_reissue;
1278 drbd_queue_work(&mdev->data.work, &e->w);
1279 }
1280 return 1;
1281}
1282
1283static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) 1163static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
1284{ 1164{
1285 int rv, issue_flush; 1165 int rv;
1286 struct p_barrier *p = &mdev->data.rbuf.barrier; 1166 struct p_barrier *p = &mdev->data.rbuf.barrier;
1287 struct drbd_epoch *epoch; 1167 struct drbd_epoch *epoch;
1288 1168
@@ -1300,44 +1180,40 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsign
1300 * Therefore we must send the barrier_ack after the barrier request was 1180 * Therefore we must send the barrier_ack after the barrier request was
1301 * completed. */ 1181 * completed. */
1302 switch (mdev->write_ordering) { 1182 switch (mdev->write_ordering) {
1303 case WO_bio_barrier:
1304 case WO_none: 1183 case WO_none:
1305 if (rv == FE_RECYCLED) 1184 if (rv == FE_RECYCLED)
1306 return TRUE; 1185 return TRUE;
1307 break; 1186
1187 /* receiver context, in the writeout path of the other node.
1188 * avoid potential distributed deadlock */
1189 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1190 if (epoch)
1191 break;
1192 else
1193 dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
1194 /* Fall through */
1308 1195
1309 case WO_bdev_flush: 1196 case WO_bdev_flush:
1310 case WO_drain_io: 1197 case WO_drain_io:
1311 if (rv == FE_STILL_LIVE) {
1312 set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &mdev->current_epoch->flags);
1313 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
1314 rv = drbd_flush_after_epoch(mdev, mdev->current_epoch);
1315 }
1316 if (rv == FE_RECYCLED)
1317 return TRUE;
1318
1319 /* The asender will send all the ACKs and barrier ACKs out, since
1320 all EEs moved from the active_ee to the done_ee. We need to
1321 provide a new epoch object for the EEs that come in soon */
1322 break;
1323 }
1324
1325 /* receiver context, in the writeout path of the other node.
1326 * avoid potential distributed deadlock */
1327 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1328 if (!epoch) {
1329 dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
1330 issue_flush = !test_and_set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &mdev->current_epoch->flags);
1331 drbd_wait_ee_list_empty(mdev, &mdev->active_ee); 1198 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
1332 if (issue_flush) { 1199 drbd_flush(mdev);
1333 rv = drbd_flush_after_epoch(mdev, mdev->current_epoch); 1200
1334 if (rv == FE_RECYCLED) 1201 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1335 return TRUE; 1202 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1203 if (epoch)
1204 break;
1336 } 1205 }
1337 1206
1338 drbd_wait_ee_list_empty(mdev, &mdev->done_ee); 1207 epoch = mdev->current_epoch;
1208 wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
1209
1210 D_ASSERT(atomic_read(&epoch->active) == 0);
1211 D_ASSERT(epoch->flags == 0);
1339 1212
1340 return TRUE; 1213 return TRUE;
1214 default:
1215 dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
1216 return FALSE;
1341 } 1217 }
1342 1218
1343 epoch->flags = 0; 1219 epoch->flags = 0;
@@ -1652,15 +1528,8 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1652{ 1528{
1653 struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; 1529 struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
1654 sector_t sector = e->sector; 1530 sector_t sector = e->sector;
1655 struct drbd_epoch *epoch;
1656 int ok = 1, pcmd; 1531 int ok = 1, pcmd;
1657 1532
1658 if (e->flags & EE_IS_BARRIER) {
1659 epoch = previous_epoch(mdev, e->epoch);
1660 if (epoch)
1661 drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE + (cancel ? EV_CLEANUP : 0));
1662 }
1663
1664 if (mdev->net_conf->wire_protocol == DRBD_PROT_C) { 1533 if (mdev->net_conf->wire_protocol == DRBD_PROT_C) {
1665 if (likely((e->flags & EE_WAS_ERROR) == 0)) { 1534 if (likely((e->flags & EE_WAS_ERROR) == 0)) {
1666 pcmd = (mdev->state.conn >= C_SYNC_SOURCE && 1535 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
@@ -1817,27 +1686,6 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
1817 e->epoch = mdev->current_epoch; 1686 e->epoch = mdev->current_epoch;
1818 atomic_inc(&e->epoch->epoch_size); 1687 atomic_inc(&e->epoch->epoch_size);
1819 atomic_inc(&e->epoch->active); 1688 atomic_inc(&e->epoch->active);
1820
1821 if (mdev->write_ordering == WO_bio_barrier && atomic_read(&e->epoch->epoch_size) == 1) {
1822 struct drbd_epoch *epoch;
1823 /* Issue a barrier if we start a new epoch, and the previous epoch
1824 was not a epoch containing a single request which already was
1825 a Barrier. */
1826 epoch = list_entry(e->epoch->list.prev, struct drbd_epoch, list);
1827 if (epoch == e->epoch) {
1828 set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags);
1829 rw |= REQ_HARDBARRIER;
1830 e->flags |= EE_IS_BARRIER;
1831 } else {
1832 if (atomic_read(&epoch->epoch_size) > 1 ||
1833 !test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) {
1834 set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags);
1835 set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags);
1836 rw |= REQ_HARDBARRIER;
1837 e->flags |= EE_IS_BARRIER;
1838 }
1839 }
1840 }
1841 spin_unlock(&mdev->epoch_lock); 1689 spin_unlock(&mdev->epoch_lock);
1842 1690
1843 dp_flags = be32_to_cpu(p->dp_flags); 1691 dp_flags = be32_to_cpu(p->dp_flags);
@@ -1995,10 +1843,11 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
1995 break; 1843 break;
1996 } 1844 }
1997 1845
1998 if (mdev->state.pdsk == D_DISKLESS) { 1846 if (mdev->state.pdsk < D_INCONSISTENT) {
1999 /* In case we have the only disk of the cluster, */ 1847 /* In case we have the only disk of the cluster, */
2000 drbd_set_out_of_sync(mdev, e->sector, e->size); 1848 drbd_set_out_of_sync(mdev, e->sector, e->size);
2001 e->flags |= EE_CALL_AL_COMPLETE_IO; 1849 e->flags |= EE_CALL_AL_COMPLETE_IO;
1850 e->flags &= ~EE_MAY_SET_IN_SYNC;
2002 drbd_al_begin_io(mdev, e->sector); 1851 drbd_al_begin_io(mdev, e->sector);
2003 } 1852 }
2004 1853
@@ -3362,7 +3211,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
3362 if (ns.conn == C_MASK) { 3211 if (ns.conn == C_MASK) {
3363 ns.conn = C_CONNECTED; 3212 ns.conn = C_CONNECTED;
3364 if (mdev->state.disk == D_NEGOTIATING) { 3213 if (mdev->state.disk == D_NEGOTIATING) {
3365 drbd_force_state(mdev, NS(disk, D_DISKLESS)); 3214 drbd_force_state(mdev, NS(disk, D_FAILED));
3366 } else if (peer_state.disk == D_NEGOTIATING) { 3215 } else if (peer_state.disk == D_NEGOTIATING) {
3367 dev_err(DEV, "Disk attach process on the peer node was aborted.\n"); 3216 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3368 peer_state.disk = D_DISKLESS; 3217 peer_state.disk = D_DISKLESS;
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 9e91a2545fc8..11a75d32a2e2 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -258,7 +258,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
258 if (!hlist_unhashed(&req->colision)) 258 if (!hlist_unhashed(&req->colision))
259 hlist_del(&req->colision); 259 hlist_del(&req->colision);
260 else 260 else
261 D_ASSERT((s & RQ_NET_MASK) == 0); 261 D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0);
262 262
263 /* for writes we need to do some extra housekeeping */ 263 /* for writes we need to do some extra housekeeping */
264 if (rw == WRITE) 264 if (rw == WRITE)
@@ -813,7 +813,8 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
813 mdev->state.conn >= C_CONNECTED)); 813 mdev->state.conn >= C_CONNECTED));
814 814
815 if (!(local || remote) && !is_susp(mdev->state)) { 815 if (!(local || remote) && !is_susp(mdev->state)) {
816 dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); 816 if (__ratelimit(&drbd_ratelimit_state))
817 dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
817 goto fail_free_complete; 818 goto fail_free_complete;
818 } 819 }
819 820
@@ -942,12 +943,21 @@ allocate_barrier:
942 if (local) { 943 if (local) {
943 req->private_bio->bi_bdev = mdev->ldev->backing_bdev; 944 req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
944 945
945 if (FAULT_ACTIVE(mdev, rw == WRITE ? DRBD_FAULT_DT_WR 946 /* State may have changed since we grabbed our reference on the
946 : rw == READ ? DRBD_FAULT_DT_RD 947 * mdev->ldev member. Double check, and short-circuit to endio.
947 : DRBD_FAULT_DT_RA)) 948 * In case the last activity log transaction failed to get on
949 * stable storage, and this is a WRITE, we may not even submit
950 * this bio. */
951 if (get_ldev(mdev)) {
952 if (FAULT_ACTIVE(mdev, rw == WRITE ? DRBD_FAULT_DT_WR
953 : rw == READ ? DRBD_FAULT_DT_RD
954 : DRBD_FAULT_DT_RA))
955 bio_endio(req->private_bio, -EIO);
956 else
957 generic_make_request(req->private_bio);
958 put_ldev(mdev);
959 } else
948 bio_endio(req->private_bio, -EIO); 960 bio_endio(req->private_bio, -EIO);
949 else
950 generic_make_request(req->private_bio);
951 } 961 }
952 962
953 /* we need to plug ALWAYS since we possibly need to kick lo_dev. 963 /* we need to plug ALWAYS since we possibly need to kick lo_dev.
@@ -1022,20 +1032,6 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio)
1022 return 0; 1032 return 0;
1023 } 1033 }
1024 1034
1025 /* Reject barrier requests if we know the underlying device does
1026 * not support them.
1027 * XXX: Need to get this info from peer as well some how so we
1028 * XXX: reject if EITHER side/data/metadata area does not support them.
1029 *
1030 * because of those XXX, this is not yet enabled,
1031 * i.e. in drbd_init_set_defaults we set the NO_BARRIER_SUPP bit.
1032 */
1033 if (unlikely(bio->bi_rw & REQ_HARDBARRIER) && test_bit(NO_BARRIER_SUPP, &mdev->flags)) {
1034 /* dev_warn(DEV, "Rejecting barrier request as underlying device does not support\n"); */
1035 bio_endio(bio, -EOPNOTSUPP);
1036 return 0;
1037 }
1038
1039 /* 1035 /*
1040 * what we "blindly" assume: 1036 * what we "blindly" assume:
1041 */ 1037 */
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 108d58015cd1..b0551ba7ad0c 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -102,12 +102,6 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local)
102 put_ldev(mdev); 102 put_ldev(mdev);
103} 103}
104 104
105static int is_failed_barrier(int ee_flags)
106{
107 return (ee_flags & (EE_IS_BARRIER|EE_WAS_ERROR|EE_RESUBMITTED))
108 == (EE_IS_BARRIER|EE_WAS_ERROR);
109}
110
111/* writes on behalf of the partner, or resync writes, 105/* writes on behalf of the partner, or resync writes,
112 * "submitted" by the receiver, final stage. */ 106 * "submitted" by the receiver, final stage. */
113static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(local) 107static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(local)
@@ -119,21 +113,6 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo
119 int is_syncer_req; 113 int is_syncer_req;
120 int do_al_complete_io; 114 int do_al_complete_io;
121 115
122 /* if this is a failed barrier request, disable use of barriers,
123 * and schedule for resubmission */
124 if (is_failed_barrier(e->flags)) {
125 drbd_bump_write_ordering(mdev, WO_bdev_flush);
126 spin_lock_irqsave(&mdev->req_lock, flags);
127 list_del(&e->w.list);
128 e->flags = (e->flags & ~EE_WAS_ERROR) | EE_RESUBMITTED;
129 e->w.cb = w_e_reissue;
130 /* put_ldev actually happens below, once we come here again. */
131 __release(local);
132 spin_unlock_irqrestore(&mdev->req_lock, flags);
133 drbd_queue_work(&mdev->data.work, &e->w);
134 return;
135 }
136
137 D_ASSERT(e->block_id != ID_VACANT); 116 D_ASSERT(e->block_id != ID_VACANT);
138 117
139 /* after we moved e to done_ee, 118 /* after we moved e to done_ee,
@@ -925,7 +904,7 @@ out:
925 drbd_md_sync(mdev); 904 drbd_md_sync(mdev);
926 905
927 if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags)) { 906 if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags)) {
928 dev_warn(DEV, "Writing the whole bitmap, due to failed kmalloc\n"); 907 dev_info(DEV, "Writing the whole bitmap\n");
929 drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished"); 908 drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished");
930 } 909 }
931 910
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 1e5284ef65fa..7ea0bea2f7e3 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -481,12 +481,6 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
481 if (bio_rw(bio) == WRITE) { 481 if (bio_rw(bio) == WRITE) {
482 struct file *file = lo->lo_backing_file; 482 struct file *file = lo->lo_backing_file;
483 483
484 /* REQ_HARDBARRIER is deprecated */
485 if (bio->bi_rw & REQ_HARDBARRIER) {
486 ret = -EOPNOTSUPP;
487 goto out;
488 }
489
490 if (bio->bi_rw & REQ_FLUSH) { 484 if (bio->bi_rw & REQ_FLUSH) {
491 ret = vfs_fsync(file, 0); 485 ret = vfs_fsync(file, 0);
492 if (unlikely(ret && ret != -EINVAL)) { 486 if (unlikely(ret && ret != -EINVAL)) {
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 06e2812ba124..255035cfc88a 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -289,8 +289,6 @@ static int blkif_queue_request(struct request *req)
289 289
290 ring_req->operation = rq_data_dir(req) ? 290 ring_req->operation = rq_data_dir(req) ?
291 BLKIF_OP_WRITE : BLKIF_OP_READ; 291 BLKIF_OP_WRITE : BLKIF_OP_READ;
292 if (req->cmd_flags & REQ_HARDBARRIER)
293 ring_req->operation = BLKIF_OP_WRITE_BARRIER;
294 292
295 ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); 293 ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
296 BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); 294 BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 4e957f3140a8..324a3663fcda 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -706,7 +706,7 @@ static struct mdk_personality *find_pers(int level, char *clevel)
706/* return the offset of the super block in 512byte sectors */ 706/* return the offset of the super block in 512byte sectors */
707static inline sector_t calc_dev_sboffset(struct block_device *bdev) 707static inline sector_t calc_dev_sboffset(struct block_device *bdev)
708{ 708{
709 sector_t num_sectors = bdev->bd_inode->i_size / 512; 709 sector_t num_sectors = i_size_read(bdev->bd_inode) / 512;
710 return MD_NEW_SIZE_SECTORS(num_sectors); 710 return MD_NEW_SIZE_SECTORS(num_sectors);
711} 711}
712 712
@@ -1386,7 +1386,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
1386 */ 1386 */
1387 switch(minor_version) { 1387 switch(minor_version) {
1388 case 0: 1388 case 0:
1389 sb_start = rdev->bdev->bd_inode->i_size >> 9; 1389 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1390 sb_start -= 8*2; 1390 sb_start -= 8*2;
1391 sb_start &= ~(sector_t)(4*2-1); 1391 sb_start &= ~(sector_t)(4*2-1);
1392 break; 1392 break;
@@ -1472,7 +1472,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
1472 ret = 0; 1472 ret = 0;
1473 } 1473 }
1474 if (minor_version) 1474 if (minor_version)
1475 rdev->sectors = (rdev->bdev->bd_inode->i_size >> 9) - 1475 rdev->sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
1476 le64_to_cpu(sb->data_offset); 1476 le64_to_cpu(sb->data_offset);
1477 else 1477 else
1478 rdev->sectors = rdev->sb_start; 1478 rdev->sectors = rdev->sb_start;
@@ -1680,7 +1680,7 @@ super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
1680 return 0; /* component must fit device */ 1680 return 0; /* component must fit device */
1681 if (rdev->sb_start < rdev->data_offset) { 1681 if (rdev->sb_start < rdev->data_offset) {
1682 /* minor versions 1 and 2; superblock before data */ 1682 /* minor versions 1 and 2; superblock before data */
1683 max_sectors = rdev->bdev->bd_inode->i_size >> 9; 1683 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
1684 max_sectors -= rdev->data_offset; 1684 max_sectors -= rdev->data_offset;
1685 if (!num_sectors || num_sectors > max_sectors) 1685 if (!num_sectors || num_sectors > max_sectors)
1686 num_sectors = max_sectors; 1686 num_sectors = max_sectors;
@@ -1690,7 +1690,7 @@ super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
1690 } else { 1690 } else {
1691 /* minor version 0; superblock after data */ 1691 /* minor version 0; superblock after data */
1692 sector_t sb_start; 1692 sector_t sb_start;
1693 sb_start = (rdev->bdev->bd_inode->i_size >> 9) - 8*2; 1693 sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
1694 sb_start &= ~(sector_t)(4*2 - 1); 1694 sb_start &= ~(sector_t)(4*2 - 1);
1695 max_sectors = rdev->sectors + sb_start - rdev->sb_start; 1695 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
1696 if (!num_sectors || num_sectors > max_sectors) 1696 if (!num_sectors || num_sectors > max_sectors)
@@ -2584,7 +2584,7 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2584 if (!sectors) 2584 if (!sectors)
2585 return -EBUSY; 2585 return -EBUSY;
2586 } else if (!sectors) 2586 } else if (!sectors)
2587 sectors = (rdev->bdev->bd_inode->i_size >> 9) - 2587 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
2588 rdev->data_offset; 2588 rdev->data_offset;
2589 } 2589 }
2590 if (sectors < my_mddev->dev_sectors) 2590 if (sectors < my_mddev->dev_sectors)
@@ -2797,7 +2797,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
2797 2797
2798 kobject_init(&rdev->kobj, &rdev_ktype); 2798 kobject_init(&rdev->kobj, &rdev_ktype);
2799 2799
2800 size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; 2800 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
2801 if (!size) { 2801 if (!size) {
2802 printk(KERN_WARNING 2802 printk(KERN_WARNING
2803 "md: %s has zero or unknown size, marking faulty!\n", 2803 "md: %s has zero or unknown size, marking faulty!\n",
@@ -5235,8 +5235,8 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
5235 5235
5236 if (!mddev->persistent) { 5236 if (!mddev->persistent) {
5237 printk(KERN_INFO "md: nonpersistent superblock ...\n"); 5237 printk(KERN_INFO "md: nonpersistent superblock ...\n");
5238 rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; 5238 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
5239 } else 5239 } else
5240 rdev->sb_start = calc_dev_sboffset(rdev->bdev); 5240 rdev->sb_start = calc_dev_sboffset(rdev->bdev);
5241 rdev->sectors = rdev->sb_start; 5241 rdev->sectors = rdev->sb_start;
5242 5242
@@ -5306,7 +5306,7 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev)
5306 if (mddev->persistent) 5306 if (mddev->persistent)
5307 rdev->sb_start = calc_dev_sboffset(rdev->bdev); 5307 rdev->sb_start = calc_dev_sboffset(rdev->bdev);
5308 else 5308 else
5309 rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; 5309 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
5310 5310
5311 rdev->sectors = rdev->sb_start; 5311 rdev->sectors = rdev->sb_start;
5312 5312
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 1de30eb83bb0..f3cf924a2cd9 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -320,19 +320,11 @@ static int scsi_check_sense(struct scsi_cmnd *scmd)
320 "changed. The Linux SCSI layer does not " 320 "changed. The Linux SCSI layer does not "
321 "automatically adjust these parameters.\n"); 321 "automatically adjust these parameters.\n");
322 322
323 if (scmd->request->cmd_flags & REQ_HARDBARRIER) 323 /*
324 /* 324 * Pass the UA upwards for a determination in the completion
325 * barrier requests should always retry on UA 325 * functions.
326 * otherwise block will get a spurious error 326 */
327 */ 327 return SUCCESS;
328 return NEEDS_RETRY;
329 else
330 /*
331 * for normal (non barrier) commands, pass the
332 * UA upwards for a determination in the
333 * completion functions
334 */
335 return SUCCESS;
336 328
337 /* these three are not supported */ 329 /* these three are not supported */
338 case COPY_ABORTED: 330 case COPY_ABORTED:
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 2054b1e25a65..d1268191acbd 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -331,10 +331,7 @@ static struct urb *uas_alloc_cmd_urb(struct uas_dev_info *devinfo, gfp_t gfp,
331 331
332 iu->iu_id = IU_ID_COMMAND; 332 iu->iu_id = IU_ID_COMMAND;
333 iu->tag = cpu_to_be16(stream_id); 333 iu->tag = cpu_to_be16(stream_id);
334 if (sdev->ordered_tags && (cmnd->request->cmd_flags & REQ_HARDBARRIER)) 334 iu->prio_attr = UAS_SIMPLE_TAG;
335 iu->prio_attr = UAS_ORDERED_TAG;
336 else
337 iu->prio_attr = UAS_SIMPLE_TAG;
338 iu->len = len; 335 iu->len = len;
339 int_to_scsilun(sdev->lun, &iu->lun); 336 int_to_scsilun(sdev->lun, &iu->lun);
340 memcpy(iu->cdb, cmnd->cmnd, cmnd->cmd_len); 337 memcpy(iu->cdb, cmnd->cmnd, cmnd->cmd_len);
diff --git a/fs/bio.c b/fs/bio.c
index 8abb2dfb2e7c..4bd454fa844e 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -370,6 +370,9 @@ struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs)
370{ 370{
371 struct bio *bio; 371 struct bio *bio;
372 372
373 if (nr_iovecs > UIO_MAXIOV)
374 return NULL;
375
373 bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec), 376 bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec),
374 gfp_mask); 377 gfp_mask);
375 if (unlikely(!bio)) 378 if (unlikely(!bio))
@@ -697,8 +700,12 @@ static void bio_free_map_data(struct bio_map_data *bmd)
697static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count, 700static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
698 gfp_t gfp_mask) 701 gfp_t gfp_mask)
699{ 702{
700 struct bio_map_data *bmd = kmalloc(sizeof(*bmd), gfp_mask); 703 struct bio_map_data *bmd;
701 704
705 if (iov_count > UIO_MAXIOV)
706 return NULL;
707
708 bmd = kmalloc(sizeof(*bmd), gfp_mask);
702 if (!bmd) 709 if (!bmd)
703 return NULL; 710 return NULL;
704 711
@@ -827,6 +834,12 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
827 end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT; 834 end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
828 start = uaddr >> PAGE_SHIFT; 835 start = uaddr >> PAGE_SHIFT;
829 836
837 /*
838 * Overflow, abort
839 */
840 if (end < start)
841 return ERR_PTR(-EINVAL);
842
830 nr_pages += end - start; 843 nr_pages += end - start;
831 len += iov[i].iov_len; 844 len += iov[i].iov_len;
832 } 845 }
@@ -955,6 +968,12 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
955 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 968 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
956 unsigned long start = uaddr >> PAGE_SHIFT; 969 unsigned long start = uaddr >> PAGE_SHIFT;
957 970
971 /*
972 * Overflow, abort
973 */
974 if (end < start)
975 return ERR_PTR(-EINVAL);
976
958 nr_pages += end - start; 977 nr_pages += end - start;
959 /* 978 /*
960 * buffer must be aligned to at least hardsector size for now 979 * buffer must be aligned to at least hardsector size for now
@@ -982,7 +1001,7 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
982 unsigned long start = uaddr >> PAGE_SHIFT; 1001 unsigned long start = uaddr >> PAGE_SHIFT;
983 const int local_nr_pages = end - start; 1002 const int local_nr_pages = end - start;
984 const int page_limit = cur_page + local_nr_pages; 1003 const int page_limit = cur_page + local_nr_pages;
985 1004
986 ret = get_user_pages_fast(uaddr, local_nr_pages, 1005 ret = get_user_pages_fast(uaddr, local_nr_pages,
987 write_to_vm, &pages[cur_page]); 1006 write_to_vm, &pages[cur_page]);
988 if (ret < local_nr_pages) { 1007 if (ret < local_nr_pages) {
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 748cfb92dcc6..2f7d05c89922 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -111,12 +111,14 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
111 read_lock(&tasklist_lock); 111 read_lock(&tasklist_lock);
112 switch (which) { 112 switch (which) {
113 case IOPRIO_WHO_PROCESS: 113 case IOPRIO_WHO_PROCESS:
114 rcu_read_lock();
114 if (!who) 115 if (!who)
115 p = current; 116 p = current;
116 else 117 else
117 p = find_task_by_vpid(who); 118 p = find_task_by_vpid(who);
118 if (p) 119 if (p)
119 ret = set_task_ioprio(p, ioprio); 120 ret = set_task_ioprio(p, ioprio);
121 rcu_read_unlock();
120 break; 122 break;
121 case IOPRIO_WHO_PGRP: 123 case IOPRIO_WHO_PGRP:
122 if (!who) 124 if (!who)
@@ -139,7 +141,12 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
139 break; 141 break;
140 142
141 do_each_thread(g, p) { 143 do_each_thread(g, p) {
142 if (__task_cred(p)->uid != who) 144 int match;
145
146 rcu_read_lock();
147 match = __task_cred(p)->uid == who;
148 rcu_read_unlock();
149 if (!match)
143 continue; 150 continue;
144 ret = set_task_ioprio(p, ioprio); 151 ret = set_task_ioprio(p, ioprio);
145 if (ret) 152 if (ret)
@@ -200,12 +207,14 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
200 read_lock(&tasklist_lock); 207 read_lock(&tasklist_lock);
201 switch (which) { 208 switch (which) {
202 case IOPRIO_WHO_PROCESS: 209 case IOPRIO_WHO_PROCESS:
210 rcu_read_lock();
203 if (!who) 211 if (!who)
204 p = current; 212 p = current;
205 else 213 else
206 p = find_task_by_vpid(who); 214 p = find_task_by_vpid(who);
207 if (p) 215 if (p)
208 ret = get_task_ioprio(p); 216 ret = get_task_ioprio(p);
217 rcu_read_unlock();
209 break; 218 break;
210 case IOPRIO_WHO_PGRP: 219 case IOPRIO_WHO_PGRP:
211 if (!who) 220 if (!who)
@@ -232,7 +241,12 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
232 break; 241 break;
233 242
234 do_each_thread(g, p) { 243 do_each_thread(g, p) {
235 if (__task_cred(p)->uid != user->uid) 244 int match;
245
246 rcu_read_lock();
247 match = __task_cred(p)->uid == user->uid;
248 rcu_read_unlock();
249 if (!match)
236 continue; 250 continue;
237 tmpio = get_task_ioprio(p); 251 tmpio = get_task_ioprio(p);
238 if (tmpio < 0) 252 if (tmpio < 0)
diff --git a/include/linux/bio.h b/include/linux/bio.h
index ba679992d39b..35dcdb3589bc 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -66,10 +66,6 @@
66#define bio_offset(bio) bio_iovec((bio))->bv_offset 66#define bio_offset(bio) bio_iovec((bio))->bv_offset
67#define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx) 67#define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx)
68#define bio_sectors(bio) ((bio)->bi_size >> 9) 68#define bio_sectors(bio) ((bio)->bi_size >> 9)
69#define bio_empty_barrier(bio) \
70 ((bio->bi_rw & REQ_HARDBARRIER) && \
71 !bio_has_data(bio) && \
72 !(bio->bi_rw & REQ_DISCARD))
73 69
74static inline unsigned int bio_cur_bytes(struct bio *bio) 70static inline unsigned int bio_cur_bytes(struct bio *bio)
75{ 71{
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 0437ab6bb54c..46ad5197537a 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -122,7 +122,6 @@ enum rq_flag_bits {
122 __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */ 122 __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */
123 __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */ 123 __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */
124 124
125 __REQ_HARDBARRIER, /* may not be passed by drive either */
126 __REQ_SYNC, /* request is sync (sync write or read) */ 125 __REQ_SYNC, /* request is sync (sync write or read) */
127 __REQ_META, /* metadata io request */ 126 __REQ_META, /* metadata io request */
128 __REQ_DISCARD, /* request to discard sectors */ 127 __REQ_DISCARD, /* request to discard sectors */
@@ -159,7 +158,6 @@ enum rq_flag_bits {
159#define REQ_FAILFAST_DEV (1 << __REQ_FAILFAST_DEV) 158#define REQ_FAILFAST_DEV (1 << __REQ_FAILFAST_DEV)
160#define REQ_FAILFAST_TRANSPORT (1 << __REQ_FAILFAST_TRANSPORT) 159#define REQ_FAILFAST_TRANSPORT (1 << __REQ_FAILFAST_TRANSPORT)
161#define REQ_FAILFAST_DRIVER (1 << __REQ_FAILFAST_DRIVER) 160#define REQ_FAILFAST_DRIVER (1 << __REQ_FAILFAST_DRIVER)
162#define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER)
163#define REQ_SYNC (1 << __REQ_SYNC) 161#define REQ_SYNC (1 << __REQ_SYNC)
164#define REQ_META (1 << __REQ_META) 162#define REQ_META (1 << __REQ_META)
165#define REQ_DISCARD (1 << __REQ_DISCARD) 163#define REQ_DISCARD (1 << __REQ_DISCARD)
@@ -168,8 +166,8 @@ enum rq_flag_bits {
168#define REQ_FAILFAST_MASK \ 166#define REQ_FAILFAST_MASK \
169 (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) 167 (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
170#define REQ_COMMON_MASK \ 168#define REQ_COMMON_MASK \
171 (REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \ 169 (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_DISCARD | \
172 REQ_META | REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) 170 REQ_NOIDLE | REQ_FLUSH | REQ_FUA)
173#define REQ_CLONE_MASK REQ_COMMON_MASK 171#define REQ_CLONE_MASK REQ_COMMON_MASK
174 172
175#define REQ_UNPLUG (1 << __REQ_UNPLUG) 173#define REQ_UNPLUG (1 << __REQ_UNPLUG)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5027a599077d..aae86fd10c4f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -552,8 +552,7 @@ static inline void blk_clear_queue_full(struct request_queue *q, int sync)
552 * it already be started by driver. 552 * it already be started by driver.
553 */ 553 */
554#define RQ_NOMERGE_FLAGS \ 554#define RQ_NOMERGE_FLAGS \
555 (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER | \ 555 (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA)
556 REQ_FLUSH | REQ_FUA)
557#define rq_mergeable(rq) \ 556#define rq_mergeable(rq) \
558 (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ 557 (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \
559 (((rq)->cmd_flags & REQ_DISCARD) || \ 558 (((rq)->cmd_flags & REQ_DISCARD) || \
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 9b2a0158f399..ef44c7a0638c 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -53,7 +53,7 @@
53 53
54 54
55extern const char *drbd_buildtag(void); 55extern const char *drbd_buildtag(void);
56#define REL_VERSION "8.3.9rc2" 56#define REL_VERSION "8.3.9"
57#define API_VERSION 88 57#define API_VERSION 88
58#define PRO_VERSION_MIN 86 58#define PRO_VERSION_MIN 86
59#define PRO_VERSION_MAX 95 59#define PRO_VERSION_MAX 95
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 3e70b21884a9..b2eee896dcbc 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -76,7 +76,6 @@ int put_io_context(struct io_context *ioc);
76void exit_io_context(struct task_struct *task); 76void exit_io_context(struct task_struct *task);
77struct io_context *get_io_context(gfp_t gfp_flags, int node); 77struct io_context *get_io_context(gfp_t gfp_flags, int node);
78struct io_context *alloc_io_context(gfp_t gfp_flags, int node); 78struct io_context *alloc_io_context(gfp_t gfp_flags, int node);
79void copy_io_context(struct io_context **pdst, struct io_context **psrc);
80#else 79#else
81static inline void exit_io_context(struct task_struct *task) 80static inline void exit_io_context(struct task_struct *task)
82{ 81{
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index bc251ed66724..7b8ec0281548 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -168,7 +168,6 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
168static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), 168static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
169 BLK_TC_ACT(BLK_TC_WRITE) }; 169 BLK_TC_ACT(BLK_TC_WRITE) };
170 170
171#define BLK_TC_HARDBARRIER BLK_TC_BARRIER
172#define BLK_TC_RAHEAD BLK_TC_AHEAD 171#define BLK_TC_RAHEAD BLK_TC_AHEAD
173 172
174/* The ilog2() calls fall out because they're constant */ 173/* The ilog2() calls fall out because they're constant */
@@ -196,7 +195,6 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
196 return; 195 return;
197 196
198 what |= ddir_act[rw & WRITE]; 197 what |= ddir_act[rw & WRITE];
199 what |= MASK_TC_BIT(rw, HARDBARRIER);
200 what |= MASK_TC_BIT(rw, SYNC); 198 what |= MASK_TC_BIT(rw, SYNC);
201 what |= MASK_TC_BIT(rw, RAHEAD); 199 what |= MASK_TC_BIT(rw, RAHEAD);
202 what |= MASK_TC_BIT(rw, META); 200 what |= MASK_TC_BIT(rw, META);
@@ -1807,8 +1805,6 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
1807 1805
1808 if (rw & REQ_RAHEAD) 1806 if (rw & REQ_RAHEAD)
1809 rwbs[i++] = 'A'; 1807 rwbs[i++] = 'A';
1810 if (rw & REQ_HARDBARRIER)
1811 rwbs[i++] = 'B';
1812 if (rw & REQ_SYNC) 1808 if (rw & REQ_SYNC)
1813 rwbs[i++] = 'S'; 1809 rwbs[i++] = 'S';
1814 if (rw & REQ_META) 1810 if (rw & REQ_META)