aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-01-02 21:49:58 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2019-01-02 21:49:58 -0500
commit77d0b194b2df04a1992f882d96ff4e2bd8bb8fe0 (patch)
tree2bc1800faf8bf6712e693b5a8ea9c1fdecbe1dc7
parentb79f9f93eb483f2757b089bb4e1eb3827a609080 (diff)
parent427c5ce4417cba0801fbf79c8525d1330704759c (diff)
Merge tag 'for-4.21/block-20190102' of git://git.kernel.dk/linux-block
Pull more block updates from Jens Axboe: - Dead code removal for loop/sunvdc (Chengguang) - Mark BIDI support for bsg as deprecated, logging a single dmesg warning if anyone is actually using it (Christoph) - blkcg cleanup, killing a dead function and making the tryget_closest variant easier to read (Dennis) - Floppy fixes, one fixing a regression in swim3 (Finn) - lightnvm use-after-free fix (Gustavo) - gdrom leak fix (Wenwen) - a set of drbd updates (Lars, Luc, Nathan, Roland) * tag 'for-4.21/block-20190102' of git://git.kernel.dk/linux-block: (28 commits) block/swim3: Fix regression on PowerBook G3 block/swim3: Fix -EBUSY error when re-opening device after unmount block/swim3: Remove dead return statement block/amiflop: Don't log error message on invalid ioctl gdrom: fix a memory leak bug lightnvm: pblk: fix use-after-free bug block: sunvdc: remove redundant code block: loop: remove redundant code bsg: deprecate BIDI support in bsg blkcg: remove unused __blkg_release_rcu() blkcg: clean up blkg_tryget_closest() drbd: Change drbd_request_detach_interruptible's return type to int drbd: Avoid Clang warning about pointless switch statment drbd: introduce P_ZEROES (REQ_OP_WRITE_ZEROES on the "wire") drbd: skip spurious timeout (ping-timeo) when failing promote drbd: don't retry connection if peers do not agree on "authentication" settings drbd: fix print_st_err()'s prototype to match the definition drbd: avoid spurious self-outdating with concurrent disconnect / down drbd: do not block when adjusting "disk-options" while IO is frozen drbd: fix comment typos ...
-rw-r--r--block/bio.c4
-rw-r--r--block/blk-cgroup.c23
-rw-r--r--block/bsg.c4
-rw-r--r--drivers/block/amiflop.c2
-rw-r--r--drivers/block/drbd/drbd_debugfs.c2
-rw-r--r--drivers/block/drbd/drbd_int.h19
-rw-r--r--drivers/block/drbd/drbd_main.c28
-rw-r--r--drivers/block/drbd/drbd_nl.c133
-rw-r--r--drivers/block/drbd/drbd_protocol.h47
-rw-r--r--drivers/block/drbd/drbd_receiver.c251
-rw-r--r--drivers/block/drbd/drbd_req.c19
-rw-r--r--drivers/block/drbd/drbd_req.h2
-rw-r--r--drivers/block/drbd/drbd_state.c11
-rw-r--r--drivers/block/drbd/drbd_state.h5
-rw-r--r--drivers/block/drbd/drbd_worker.c2
-rw-r--r--drivers/block/loop.c1
-rw-r--r--drivers/block/sunvdc.c1
-rw-r--r--drivers/block/swim3.c14
-rw-r--r--drivers/cdrom/gdrom.c1
-rw-r--r--drivers/lightnvm/pblk-recovery.c1
-rw-r--r--include/linux/blk-cgroup.h21
-rw-r--r--include/linux/drbd.h2
-rw-r--r--include/linux/genl_magic_struct.h5
23 files changed, 467 insertions, 131 deletions
diff --git a/block/bio.c b/block/bio.c
index 8281bfcbc265..4db1008309ed 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -2097,8 +2097,12 @@ EXPORT_SYMBOL_GPL(bio_associate_blkg);
2097 */ 2097 */
2098void bio_clone_blkg_association(struct bio *dst, struct bio *src) 2098void bio_clone_blkg_association(struct bio *dst, struct bio *src)
2099{ 2099{
2100 rcu_read_lock();
2101
2100 if (src->bi_blkg) 2102 if (src->bi_blkg)
2101 __bio_associate_blkg(dst, src->bi_blkg); 2103 __bio_associate_blkg(dst, src->bi_blkg);
2104
2105 rcu_read_unlock();
2102} 2106}
2103EXPORT_SYMBOL_GPL(bio_clone_blkg_association); 2107EXPORT_SYMBOL_GPL(bio_clone_blkg_association);
2104#endif /* CONFIG_BLK_CGROUP */ 2108#endif /* CONFIG_BLK_CGROUP */
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index c8cc1cbb6370..2bed5725aa03 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -438,29 +438,6 @@ static void blkg_destroy_all(struct request_queue *q)
438 spin_unlock_irq(&q->queue_lock); 438 spin_unlock_irq(&q->queue_lock);
439} 439}
440 440
441/*
442 * A group is RCU protected, but having an rcu lock does not mean that one
443 * can access all the fields of blkg and assume these are valid. For
444 * example, don't try to follow throtl_data and request queue links.
445 *
446 * Having a reference to blkg under an rcu allows accesses to only values
447 * local to groups like group stats and group rate limits.
448 */
449void __blkg_release_rcu(struct rcu_head *rcu_head)
450{
451 struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head);
452
453 /* release the blkcg and parent blkg refs this blkg has been holding */
454 css_put(&blkg->blkcg->css);
455 if (blkg->parent)
456 blkg_put(blkg->parent);
457
458 wb_congested_put(blkg->wb_congested);
459
460 blkg_free(blkg);
461}
462EXPORT_SYMBOL_GPL(__blkg_release_rcu);
463
464static int blkcg_reset_stats(struct cgroup_subsys_state *css, 441static int blkcg_reset_stats(struct cgroup_subsys_state *css,
465 struct cftype *cftype, u64 val) 442 struct cftype *cftype, u64 val)
466{ 443{
diff --git a/block/bsg.c b/block/bsg.c
index 44f6028b9567..50e5f8f666f2 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -177,6 +177,10 @@ bsg_map_hdr(struct request_queue *q, struct sg_io_v4 *hdr, fmode_t mode)
177 goto out; 177 goto out;
178 } 178 }
179 179
180 pr_warn_once(
181 "BIDI support in bsg has been deprecated and might be removed. "
182 "Please report your use case to linux-scsi@vger.kernel.org\n");
183
180 next_rq = blk_get_request(q, REQ_OP_SCSI_IN, 0); 184 next_rq = blk_get_request(q, REQ_OP_SCSI_IN, 0);
181 if (IS_ERR(next_rq)) { 185 if (IS_ERR(next_rq)) {
182 ret = PTR_ERR(next_rq); 186 ret = PTR_ERR(next_rq);
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index bf996bd44cfc..0903e0803ec8 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1601,8 +1601,6 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode,
1601 return p->type->read_size; 1601 return p->type->read_size;
1602#endif 1602#endif
1603 default: 1603 default:
1604 printk(KERN_DEBUG "fd_ioctl: unknown cmd %d for drive %d.",
1605 cmd, drive);
1606 return -ENOSYS; 1604 return -ENOSYS;
1607 } 1605 }
1608 return 0; 1606 return 0;
diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c
index 5d5e8d6a8a56..f13b48ff5f43 100644
--- a/drivers/block/drbd/drbd_debugfs.c
+++ b/drivers/block/drbd/drbd_debugfs.c
@@ -237,6 +237,8 @@ static void seq_print_peer_request_flags(struct seq_file *m, struct drbd_peer_re
237 seq_print_rq_state_bit(m, f & EE_CALL_AL_COMPLETE_IO, &sep, "in-AL"); 237 seq_print_rq_state_bit(m, f & EE_CALL_AL_COMPLETE_IO, &sep, "in-AL");
238 seq_print_rq_state_bit(m, f & EE_SEND_WRITE_ACK, &sep, "C"); 238 seq_print_rq_state_bit(m, f & EE_SEND_WRITE_ACK, &sep, "C");
239 seq_print_rq_state_bit(m, f & EE_MAY_SET_IN_SYNC, &sep, "set-in-sync"); 239 seq_print_rq_state_bit(m, f & EE_MAY_SET_IN_SYNC, &sep, "set-in-sync");
240 seq_print_rq_state_bit(m, f & EE_TRIM, &sep, "trim");
241 seq_print_rq_state_bit(m, f & EE_ZEROOUT, &sep, "zero-out");
240 seq_print_rq_state_bit(m, f & EE_WRITE_SAME, &sep, "write-same"); 242 seq_print_rq_state_bit(m, f & EE_WRITE_SAME, &sep, "write-same");
241 seq_putc(m, '\n'); 243 seq_putc(m, '\n');
242} 244}
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 1e47db57b9d2..000a2f4c0e92 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -430,7 +430,11 @@ enum {
430 __EE_MAY_SET_IN_SYNC, 430 __EE_MAY_SET_IN_SYNC,
431 431
432 /* is this a TRIM aka REQ_OP_DISCARD? */ 432 /* is this a TRIM aka REQ_OP_DISCARD? */
433 __EE_IS_TRIM, 433 __EE_TRIM,
434 /* explicit zero-out requested, or
435 * our lower level cannot handle trim,
436 * and we want to fall back to zeroout instead */
437 __EE_ZEROOUT,
434 438
435 /* In case a barrier failed, 439 /* In case a barrier failed,
436 * we need to resubmit without the barrier flag. */ 440 * we need to resubmit without the barrier flag. */
@@ -472,7 +476,8 @@ enum {
472}; 476};
473#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) 477#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
474#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) 478#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC)
475#define EE_IS_TRIM (1<<__EE_IS_TRIM) 479#define EE_TRIM (1<<__EE_TRIM)
480#define EE_ZEROOUT (1<<__EE_ZEROOUT)
476#define EE_RESUBMITTED (1<<__EE_RESUBMITTED) 481#define EE_RESUBMITTED (1<<__EE_RESUBMITTED)
477#define EE_WAS_ERROR (1<<__EE_WAS_ERROR) 482#define EE_WAS_ERROR (1<<__EE_WAS_ERROR)
478#define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST) 483#define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST)
@@ -1556,6 +1561,8 @@ extern void start_resync_timer_fn(struct timer_list *t);
1556extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req); 1561extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req);
1557 1562
1558/* drbd_receiver.c */ 1563/* drbd_receiver.c */
1564extern int drbd_issue_discard_or_zero_out(struct drbd_device *device,
1565 sector_t start, unsigned int nr_sectors, int flags);
1559extern int drbd_receiver(struct drbd_thread *thi); 1566extern int drbd_receiver(struct drbd_thread *thi);
1560extern int drbd_ack_receiver(struct drbd_thread *thi); 1567extern int drbd_ack_receiver(struct drbd_thread *thi);
1561extern void drbd_send_ping_wf(struct work_struct *ws); 1568extern void drbd_send_ping_wf(struct work_struct *ws);
@@ -1609,13 +1616,7 @@ static inline void drbd_tcp_quickack(struct socket *sock)
1609} 1616}
1610 1617
1611/* sets the number of 512 byte sectors of our virtual device */ 1618/* sets the number of 512 byte sectors of our virtual device */
1612static inline void drbd_set_my_capacity(struct drbd_device *device, 1619void drbd_set_my_capacity(struct drbd_device *device, sector_t size);
1613 sector_t size)
1614{
1615 /* set_capacity(device->this_bdev->bd_disk, size); */
1616 set_capacity(device->vdisk, size);
1617 device->this_bdev->bd_inode->i_size = (loff_t)size << 9;
1618}
1619 1620
1620/* 1621/*
1621 * used to submit our private bio 1622 * used to submit our private bio
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index f973a2a845c8..714eb64fabfd 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1668,7 +1668,11 @@ static u32 bio_flags_to_wire(struct drbd_connection *connection,
1668 (bio->bi_opf & REQ_PREFLUSH ? DP_FLUSH : 0) | 1668 (bio->bi_opf & REQ_PREFLUSH ? DP_FLUSH : 0) |
1669 (bio_op(bio) == REQ_OP_WRITE_SAME ? DP_WSAME : 0) | 1669 (bio_op(bio) == REQ_OP_WRITE_SAME ? DP_WSAME : 0) |
1670 (bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0) | 1670 (bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0) |
1671 (bio_op(bio) == REQ_OP_WRITE_ZEROES ? DP_DISCARD : 0); 1671 (bio_op(bio) == REQ_OP_WRITE_ZEROES ?
1672 ((connection->agreed_features & DRBD_FF_WZEROES) ?
1673 (DP_ZEROES |(!(bio->bi_opf & REQ_NOUNMAP) ? DP_DISCARD : 0))
1674 : DP_DISCARD)
1675 : 0);
1672 else 1676 else
1673 return bio->bi_opf & REQ_SYNC ? DP_RW_SYNC : 0; 1677 return bio->bi_opf & REQ_SYNC ? DP_RW_SYNC : 0;
1674} 1678}
@@ -1712,10 +1716,11 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
1712 } 1716 }
1713 p->dp_flags = cpu_to_be32(dp_flags); 1717 p->dp_flags = cpu_to_be32(dp_flags);
1714 1718
1715 if (dp_flags & DP_DISCARD) { 1719 if (dp_flags & (DP_DISCARD|DP_ZEROES)) {
1720 enum drbd_packet cmd = (dp_flags & DP_ZEROES) ? P_ZEROES : P_TRIM;
1716 struct p_trim *t = (struct p_trim*)p; 1721 struct p_trim *t = (struct p_trim*)p;
1717 t->size = cpu_to_be32(req->i.size); 1722 t->size = cpu_to_be32(req->i.size);
1718 err = __send_command(peer_device->connection, device->vnr, sock, P_TRIM, sizeof(*t), NULL, 0); 1723 err = __send_command(peer_device->connection, device->vnr, sock, cmd, sizeof(*t), NULL, 0);
1719 goto out; 1724 goto out;
1720 } 1725 }
1721 if (dp_flags & DP_WSAME) { 1726 if (dp_flags & DP_WSAME) {
@@ -2034,6 +2039,21 @@ void drbd_init_set_defaults(struct drbd_device *device)
2034 device->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE; 2039 device->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
2035} 2040}
2036 2041
2042static void _drbd_set_my_capacity(struct drbd_device *device, sector_t size)
2043{
2044 /* set_capacity(device->this_bdev->bd_disk, size); */
2045 set_capacity(device->vdisk, size);
2046 device->this_bdev->bd_inode->i_size = (loff_t)size << 9;
2047}
2048
2049void drbd_set_my_capacity(struct drbd_device *device, sector_t size)
2050{
2051 char ppb[10];
2052 _drbd_set_my_capacity(device, size);
2053 drbd_info(device, "size = %s (%llu KB)\n",
2054 ppsize(ppb, size>>1), (unsigned long long)size>>1);
2055}
2056
2037void drbd_device_cleanup(struct drbd_device *device) 2057void drbd_device_cleanup(struct drbd_device *device)
2038{ 2058{
2039 int i; 2059 int i;
@@ -2059,7 +2079,7 @@ void drbd_device_cleanup(struct drbd_device *device)
2059 } 2079 }
2060 D_ASSERT(device, first_peer_device(device)->connection->net_conf == NULL); 2080 D_ASSERT(device, first_peer_device(device)->connection->net_conf == NULL);
2061 2081
2062 drbd_set_my_capacity(device, 0); 2082 _drbd_set_my_capacity(device, 0);
2063 if (device->bitmap) { 2083 if (device->bitmap) {
2064 /* maybe never allocated. */ 2084 /* maybe never allocated. */
2065 drbd_bm_resize(device, 0, 1); 2085 drbd_bm_resize(device, 0, 1);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index d15703b1ffe8..f2471172a961 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -127,6 +127,35 @@ static int drbd_msg_put_info(struct sk_buff *skb, const char *info)
127 return 0; 127 return 0;
128} 128}
129 129
130__printf(2, 3)
131static int drbd_msg_sprintf_info(struct sk_buff *skb, const char *fmt, ...)
132{
133 va_list args;
134 struct nlattr *nla, *txt;
135 int err = -EMSGSIZE;
136 int len;
137
138 nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY);
139 if (!nla)
140 return err;
141
142 txt = nla_reserve(skb, T_info_text, 256);
143 if (!txt) {
144 nla_nest_cancel(skb, nla);
145 return err;
146 }
147 va_start(args, fmt);
148 len = vscnprintf(nla_data(txt), 256, fmt, args);
149 va_end(args);
150
151 /* maybe: retry with larger reserve, if truncated */
152 txt->nla_len = nla_attr_size(len+1);
153 nlmsg_trim(skb, (char*)txt + NLA_ALIGN(txt->nla_len));
154 nla_nest_end(skb, nla);
155
156 return 0;
157}
158
130/* This would be a good candidate for a "pre_doit" hook, 159/* This would be a good candidate for a "pre_doit" hook,
131 * and per-family private info->pointers. 160 * and per-family private info->pointers.
132 * But we need to stay compatible with older kernels. 161 * But we need to stay compatible with older kernels.
@@ -668,14 +697,15 @@ drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int for
668 if (rv == SS_TWO_PRIMARIES) { 697 if (rv == SS_TWO_PRIMARIES) {
669 /* Maybe the peer is detected as dead very soon... 698 /* Maybe the peer is detected as dead very soon...
670 retry at most once more in this case. */ 699 retry at most once more in this case. */
671 int timeo; 700 if (try < max_tries) {
672 rcu_read_lock(); 701 int timeo;
673 nc = rcu_dereference(connection->net_conf);
674 timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
675 rcu_read_unlock();
676 schedule_timeout_interruptible(timeo);
677 if (try < max_tries)
678 try = max_tries - 1; 702 try = max_tries - 1;
703 rcu_read_lock();
704 nc = rcu_dereference(connection->net_conf);
705 timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
706 rcu_read_unlock();
707 schedule_timeout_interruptible(timeo);
708 }
679 continue; 709 continue;
680 } 710 }
681 if (rv < SS_SUCCESS) { 711 if (rv < SS_SUCCESS) {
@@ -921,7 +951,6 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
921 } prev; 951 } prev;
922 sector_t u_size, size; 952 sector_t u_size, size;
923 struct drbd_md *md = &device->ldev->md; 953 struct drbd_md *md = &device->ldev->md;
924 char ppb[10];
925 void *buffer; 954 void *buffer;
926 955
927 int md_moved, la_size_changed; 956 int md_moved, la_size_changed;
@@ -999,8 +1028,6 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
999 /* racy, see comments above. */ 1028 /* racy, see comments above. */
1000 drbd_set_my_capacity(device, size); 1029 drbd_set_my_capacity(device, size);
1001 md->la_size_sect = size; 1030 md->la_size_sect = size;
1002 drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
1003 (unsigned long long)size>>1);
1004 } 1031 }
1005 if (rv <= DS_ERROR) 1032 if (rv <= DS_ERROR)
1006 goto err_out; 1033 goto err_out;
@@ -1234,6 +1261,21 @@ static void fixup_discard_if_not_supported(struct request_queue *q)
1234 } 1261 }
1235} 1262}
1236 1263
1264static void fixup_write_zeroes(struct drbd_device *device, struct request_queue *q)
1265{
1266 /* Fixup max_write_zeroes_sectors after blk_queue_stack_limits():
1267 * if we can handle "zeroes" efficiently on the protocol,
1268 * we want to do that, even if our backend does not announce
1269 * max_write_zeroes_sectors itself. */
1270 struct drbd_connection *connection = first_peer_device(device)->connection;
1271 /* If the peer announces WZEROES support, use it. Otherwise, rather
1272 * send explicit zeroes than rely on some discard-zeroes-data magic. */
1273 if (connection->agreed_features & DRBD_FF_WZEROES)
1274 q->limits.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS;
1275 else
1276 q->limits.max_write_zeroes_sectors = 0;
1277}
1278
1237static void decide_on_write_same_support(struct drbd_device *device, 1279static void decide_on_write_same_support(struct drbd_device *device,
1238 struct request_queue *q, 1280 struct request_queue *q,
1239 struct request_queue *b, struct o_qlim *o, 1281 struct request_queue *b, struct o_qlim *o,
@@ -1344,6 +1386,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
1344 } 1386 }
1345 } 1387 }
1346 fixup_discard_if_not_supported(q); 1388 fixup_discard_if_not_supported(q);
1389 fixup_write_zeroes(device, q);
1347} 1390}
1348 1391
1349void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o) 1392void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o)
@@ -1514,6 +1557,30 @@ static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *dis
1514 } 1557 }
1515} 1558}
1516 1559
1560static int disk_opts_check_al_size(struct drbd_device *device, struct disk_conf *dc)
1561{
1562 int err = -EBUSY;
1563
1564 if (device->act_log &&
1565 device->act_log->nr_elements == dc->al_extents)
1566 return 0;
1567
1568 drbd_suspend_io(device);
1569 /* If IO completion is currently blocked, we would likely wait
1570 * "forever" for the activity log to become unused. So we don't. */
1571 if (atomic_read(&device->ap_bio_cnt))
1572 goto out;
1573
1574 wait_event(device->al_wait, lc_try_lock(device->act_log));
1575 drbd_al_shrink(device);
1576 err = drbd_check_al_size(device, dc);
1577 lc_unlock(device->act_log);
1578 wake_up(&device->al_wait);
1579out:
1580 drbd_resume_io(device);
1581 return err;
1582}
1583
1517int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) 1584int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
1518{ 1585{
1519 struct drbd_config_context adm_ctx; 1586 struct drbd_config_context adm_ctx;
@@ -1576,15 +1643,12 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
1576 } 1643 }
1577 } 1644 }
1578 1645
1579 drbd_suspend_io(device); 1646 err = disk_opts_check_al_size(device, new_disk_conf);
1580 wait_event(device->al_wait, lc_try_lock(device->act_log));
1581 drbd_al_shrink(device);
1582 err = drbd_check_al_size(device, new_disk_conf);
1583 lc_unlock(device->act_log);
1584 wake_up(&device->al_wait);
1585 drbd_resume_io(device);
1586
1587 if (err) { 1647 if (err) {
1648 /* Could be just "busy". Ignore?
1649 * Introduce dedicated error code? */
1650 drbd_msg_put_info(adm_ctx.reply_skb,
1651 "Try again without changing current al-extents setting");
1588 retcode = ERR_NOMEM; 1652 retcode = ERR_NOMEM;
1589 goto fail_unlock; 1653 goto fail_unlock;
1590 } 1654 }
@@ -1934,9 +1998,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1934 } 1998 }
1935 } 1999 }
1936 2000
1937 if (device->state.conn < C_CONNECTED && 2001 if (device->state.pdsk != D_UP_TO_DATE && device->ed_uuid &&
1938 device->state.role == R_PRIMARY && device->ed_uuid && 2002 (device->state.role == R_PRIMARY || device->state.peer == R_PRIMARY) &&
1939 (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) { 2003 (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
1940 drbd_err(device, "Can only attach to data with current UUID=%016llX\n", 2004 drbd_err(device, "Can only attach to data with current UUID=%016llX\n",
1941 (unsigned long long)device->ed_uuid); 2005 (unsigned long long)device->ed_uuid);
1942 retcode = ERR_DATA_NOT_CURRENT; 2006 retcode = ERR_DATA_NOT_CURRENT;
@@ -1950,11 +2014,21 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1950 } 2014 }
1951 2015
1952 /* Prevent shrinking of consistent devices ! */ 2016 /* Prevent shrinking of consistent devices ! */
1953 if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && 2017 {
1954 drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) { 2018 unsigned long long nsz = drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0);
1955 drbd_warn(device, "refusing to truncate a consistent device\n"); 2019 unsigned long long eff = nbc->md.la_size_sect;
1956 retcode = ERR_DISK_TOO_SMALL; 2020 if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && nsz < eff) {
1957 goto force_diskless_dec; 2021 if (nsz == nbc->disk_conf->disk_size) {
2022 drbd_warn(device, "truncating a consistent device during attach (%llu < %llu)\n", nsz, eff);
2023 } else {
2024 drbd_warn(device, "refusing to truncate a consistent device (%llu < %llu)\n", nsz, eff);
2025 drbd_msg_sprintf_info(adm_ctx.reply_skb,
2026 "To-be-attached device has last effective > current size, and is consistent\n"
2027 "(%llu > %llu sectors). Refusing to attach.", eff, nsz);
2028 retcode = ERR_IMPLICIT_SHRINK;
2029 goto force_diskless_dec;
2030 }
2031 }
1958 } 2032 }
1959 2033
1960 lock_all_resources(); 2034 lock_all_resources();
@@ -2654,8 +2728,10 @@ out:
2654 2728
2655static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force) 2729static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force)
2656{ 2730{
2731 enum drbd_conns cstate;
2657 enum drbd_state_rv rv; 2732 enum drbd_state_rv rv;
2658 2733
2734repeat:
2659 rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), 2735 rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2660 force ? CS_HARD : 0); 2736 force ? CS_HARD : 0);
2661 2737
@@ -2673,6 +2749,11 @@ static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection
2673 2749
2674 break; 2750 break;
2675 case SS_CW_FAILED_BY_PEER: 2751 case SS_CW_FAILED_BY_PEER:
2752 spin_lock_irq(&connection->resource->req_lock);
2753 cstate = connection->cstate;
2754 spin_unlock_irq(&connection->resource->req_lock);
2755 if (cstate <= C_WF_CONNECTION)
2756 goto repeat;
2676 /* The peer probably wants to see us outdated. */ 2757 /* The peer probably wants to see us outdated. */
2677 rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, 2758 rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING,
2678 disk, D_OUTDATED), 0); 2759 disk, D_OUTDATED), 0);
diff --git a/drivers/block/drbd/drbd_protocol.h b/drivers/block/drbd/drbd_protocol.h
index 48dabbb21e11..e6fc5ad72501 100644
--- a/drivers/block/drbd/drbd_protocol.h
+++ b/drivers/block/drbd/drbd_protocol.h
@@ -70,6 +70,11 @@ enum drbd_packet {
70 * we may fall back to an opencoded loop instead. */ 70 * we may fall back to an opencoded loop instead. */
71 P_WSAME = 0x34, 71 P_WSAME = 0x34,
72 72
73 /* 0x35 already claimed in DRBD 9 */
74 P_ZEROES = 0x36, /* data sock: zero-out, WRITE_ZEROES */
75
76 /* 0x40 .. 0x48 already claimed in DRBD 9 */
77
73 P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ 78 P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
74 P_MAX_OPT_CMD = 0x101, 79 P_MAX_OPT_CMD = 0x101,
75 80
@@ -130,6 +135,12 @@ struct p_header100 {
130#define DP_SEND_RECEIVE_ACK 128 /* This is a proto B write request */ 135#define DP_SEND_RECEIVE_ACK 128 /* This is a proto B write request */
131#define DP_SEND_WRITE_ACK 256 /* This is a proto C write request */ 136#define DP_SEND_WRITE_ACK 256 /* This is a proto C write request */
132#define DP_WSAME 512 /* equiv. REQ_WRITE_SAME */ 137#define DP_WSAME 512 /* equiv. REQ_WRITE_SAME */
138#define DP_ZEROES 1024 /* equiv. REQ_OP_WRITE_ZEROES */
139
140/* possible combinations:
141 * REQ_OP_WRITE_ZEROES: DP_DISCARD | DP_ZEROES
142 * REQ_OP_WRITE_ZEROES + REQ_NOUNMAP: DP_ZEROES
143 */
133 144
134struct p_data { 145struct p_data {
135 u64 sector; /* 64 bits sector number */ 146 u64 sector; /* 64 bits sector number */
@@ -197,6 +208,42 @@ struct p_block_req {
197 */ 208 */
198#define DRBD_FF_WSAME 4 209#define DRBD_FF_WSAME 4
199 210
211/* supports REQ_OP_WRITE_ZEROES on the "wire" protocol.
212 *
213 * We used to map that to "discard" on the sending side, and if we cannot
214 * guarantee that discard zeroes data, the receiving side would map discard
215 * back to zero-out.
216 *
217 * With the introduction of REQ_OP_WRITE_ZEROES,
218 * we started to use that for both WRITE_ZEROES and DISCARDS,
219 * hoping that WRITE_ZEROES would "do what we want",
220 * UNMAP if possible, zero-out the rest.
221 *
222 * The example scenario is some LVM "thin" backend.
223 *
224 * While an un-allocated block on dm-thin reads as zeroes, on a dm-thin
225 * with "skip_block_zeroing=true", after a partial block write allocated
226 * that block, that same block may well map "undefined old garbage" from
227 * the backends on LBAs that have not yet been written to.
228 *
229 * If we cannot distinguish between zero-out and discard on the receiving
230 * side, to avoid "undefined old garbage" to pop up randomly at later times
231 * on supposedly zero-initialized blocks, we'd need to map all discards to
232 * zero-out on the receiving side. But that would potentially do a full
233 * alloc on thinly provisioned backends, even when the expectation was to
234 * unmap/trim/discard/de-allocate.
235 *
236 * We need to distinguish on the protocol level, whether we need to guarantee
237 * zeroes (and thus use zero-out, potentially doing the mentioned full-alloc),
238 * or if we want to put the emphasis on discard, and only do a "best effort
239 * zeroing" (by "discarding" blocks aligned to discard-granularity, and zeroing
240 * only potential unaligned head and tail clippings), to at least *try* to
241 * avoid "false positives" in an online-verify later, hoping that someone
242 * set skip_block_zeroing=false.
243 */
244#define DRBD_FF_WZEROES 8
245
246
200struct p_connection_features { 247struct p_connection_features {
201 u32 protocol_min; 248 u32 protocol_min;
202 u32 feature_flags; 249 u32 feature_flags;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index ccfcf00f2798..c7ad88d91a09 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -50,7 +50,7 @@
50#include "drbd_req.h" 50#include "drbd_req.h"
51#include "drbd_vli.h" 51#include "drbd_vli.h"
52 52
53#define PRO_FEATURES (DRBD_FF_TRIM|DRBD_FF_THIN_RESYNC|DRBD_FF_WSAME) 53#define PRO_FEATURES (DRBD_FF_TRIM|DRBD_FF_THIN_RESYNC|DRBD_FF_WSAME|DRBD_FF_WZEROES)
54 54
55struct packet_info { 55struct packet_info {
56 enum drbd_packet cmd; 56 enum drbd_packet cmd;
@@ -1490,14 +1490,129 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
1490 drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]); 1490 drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
1491} 1491}
1492 1492
1493static void drbd_issue_peer_discard(struct drbd_device *device, struct drbd_peer_request *peer_req) 1493/*
1494 * Mapping "discard" to ZEROOUT with UNMAP does not work for us:
1495 * Drivers have to "announce" q->limits.max_write_zeroes_sectors, or it
1496 * will directly go to fallback mode, submitting normal writes, and
1497 * never even try to UNMAP.
1498 *
1499 * And dm-thin does not do this (yet), mostly because in general it has
1500 * to assume that "skip_block_zeroing" is set. See also:
1501 * https://www.mail-archive.com/dm-devel%40redhat.com/msg07965.html
1502 * https://www.redhat.com/archives/dm-devel/2018-January/msg00271.html
1503 *
1504 * We *may* ignore the discard-zeroes-data setting, if so configured.
1505 *
1506 * Assumption is that this "discard_zeroes_data=0" is only because the backend
1507 * may ignore partial unaligned discards.
1508 *
1509 * LVM/DM thin as of at least
1510 * LVM version: 2.02.115(2)-RHEL7 (2015-01-28)
1511 * Library version: 1.02.93-RHEL7 (2015-01-28)
1512 * Driver version: 4.29.0
1513 * still behaves this way.
1514 *
1515 * For unaligned (wrt. alignment and granularity) or too small discards,
1516 * we zero-out the initial (and/or) trailing unaligned partial chunks,
1517 * but discard all the aligned full chunks.
1518 *
1519 * At least for LVM/DM thin, with skip_block_zeroing=false,
1520 * the result is effectively "discard_zeroes_data=1".
1521 */
1522/* flags: EE_TRIM|EE_ZEROOUT */
1523int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, int flags)
1494{ 1524{
1495 struct block_device *bdev = device->ldev->backing_bdev; 1525 struct block_device *bdev = device->ldev->backing_bdev;
1526 struct request_queue *q = bdev_get_queue(bdev);
1527 sector_t tmp, nr;
1528 unsigned int max_discard_sectors, granularity;
1529 int alignment;
1530 int err = 0;
1496 1531
1497 if (blkdev_issue_zeroout(bdev, peer_req->i.sector, peer_req->i.size >> 9, 1532 if ((flags & EE_ZEROOUT) || !(flags & EE_TRIM))
1498 GFP_NOIO, 0)) 1533 goto zero_out;
1499 peer_req->flags |= EE_WAS_ERROR; 1534
1535 /* Zero-sector (unknown) and one-sector granularities are the same. */
1536 granularity = max(q->limits.discard_granularity >> 9, 1U);
1537 alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
1538
1539 max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22));
1540 max_discard_sectors -= max_discard_sectors % granularity;
1541 if (unlikely(!max_discard_sectors))
1542 goto zero_out;
1543
1544 if (nr_sectors < granularity)
1545 goto zero_out;
1546
1547 tmp = start;
1548 if (sector_div(tmp, granularity) != alignment) {
1549 if (nr_sectors < 2*granularity)
1550 goto zero_out;
1551 /* start + gran - (start + gran - align) % gran */
1552 tmp = start + granularity - alignment;
1553 tmp = start + granularity - sector_div(tmp, granularity);
1554
1555 nr = tmp - start;
1556 /* don't flag BLKDEV_ZERO_NOUNMAP, we don't know how many
1557 * layers are below us, some may have smaller granularity */
1558 err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO, 0);
1559 nr_sectors -= nr;
1560 start = tmp;
1561 }
1562 while (nr_sectors >= max_discard_sectors) {
1563 err |= blkdev_issue_discard(bdev, start, max_discard_sectors, GFP_NOIO, 0);
1564 nr_sectors -= max_discard_sectors;
1565 start += max_discard_sectors;
1566 }
1567 if (nr_sectors) {
1568 /* max_discard_sectors is unsigned int (and a multiple of
1569 * granularity, we made sure of that above already);
1570 * nr is < max_discard_sectors;
1571 * I don't need sector_div here, even though nr is sector_t */
1572 nr = nr_sectors;
1573 nr -= (unsigned int)nr % granularity;
1574 if (nr) {
1575 err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0);
1576 nr_sectors -= nr;
1577 start += nr;
1578 }
1579 }
1580 zero_out:
1581 if (nr_sectors) {
1582 err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO,
1583 (flags & EE_TRIM) ? 0 : BLKDEV_ZERO_NOUNMAP);
1584 }
1585 return err != 0;
1586}
1500 1587
1588static bool can_do_reliable_discards(struct drbd_device *device)
1589{
1590 struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
1591 struct disk_conf *dc;
1592 bool can_do;
1593
1594 if (!blk_queue_discard(q))
1595 return false;
1596
1597 rcu_read_lock();
1598 dc = rcu_dereference(device->ldev->disk_conf);
1599 can_do = dc->discard_zeroes_if_aligned;
1600 rcu_read_unlock();
1601 return can_do;
1602}
1603
1604static void drbd_issue_peer_discard_or_zero_out(struct drbd_device *device, struct drbd_peer_request *peer_req)
1605{
1606 /* If the backend cannot discard, or does not guarantee
1607 * read-back zeroes in discarded ranges, we fall back to
1608 * zero-out. Unless configuration specifically requested
1609 * otherwise. */
1610 if (!can_do_reliable_discards(device))
1611 peer_req->flags |= EE_ZEROOUT;
1612
1613 if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector,
1614 peer_req->i.size >> 9, peer_req->flags & (EE_ZEROOUT|EE_TRIM)))
1615 peer_req->flags |= EE_WAS_ERROR;
1501 drbd_endio_write_sec_final(peer_req); 1616 drbd_endio_write_sec_final(peer_req);
1502} 1617}
1503 1618
@@ -1550,7 +1665,7 @@ int drbd_submit_peer_request(struct drbd_device *device,
1550 * Correctness first, performance later. Next step is to code an 1665 * Correctness first, performance later. Next step is to code an
1551 * asynchronous variant of the same. 1666 * asynchronous variant of the same.
1552 */ 1667 */
1553 if (peer_req->flags & (EE_IS_TRIM|EE_WRITE_SAME)) { 1668 if (peer_req->flags & (EE_TRIM|EE_WRITE_SAME|EE_ZEROOUT)) {
1554 /* wait for all pending IO completions, before we start 1669 /* wait for all pending IO completions, before we start
1555 * zeroing things out. */ 1670 * zeroing things out. */
1556 conn_wait_active_ee_empty(peer_req->peer_device->connection); 1671 conn_wait_active_ee_empty(peer_req->peer_device->connection);
@@ -1567,8 +1682,8 @@ int drbd_submit_peer_request(struct drbd_device *device,
1567 spin_unlock_irq(&device->resource->req_lock); 1682 spin_unlock_irq(&device->resource->req_lock);
1568 } 1683 }
1569 1684
1570 if (peer_req->flags & EE_IS_TRIM) 1685 if (peer_req->flags & (EE_TRIM|EE_ZEROOUT))
1571 drbd_issue_peer_discard(device, peer_req); 1686 drbd_issue_peer_discard_or_zero_out(device, peer_req);
1572 else /* EE_WRITE_SAME */ 1687 else /* EE_WRITE_SAME */
1573 drbd_issue_peer_wsame(device, peer_req); 1688 drbd_issue_peer_wsame(device, peer_req);
1574 return 0; 1689 return 0;
@@ -1765,6 +1880,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
1765 void *dig_vv = peer_device->connection->int_dig_vv; 1880 void *dig_vv = peer_device->connection->int_dig_vv;
1766 unsigned long *data; 1881 unsigned long *data;
1767 struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL; 1882 struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
1883 struct p_trim *zeroes = (pi->cmd == P_ZEROES) ? pi->data : NULL;
1768 struct p_trim *wsame = (pi->cmd == P_WSAME) ? pi->data : NULL; 1884 struct p_trim *wsame = (pi->cmd == P_WSAME) ? pi->data : NULL;
1769 1885
1770 digest_size = 0; 1886 digest_size = 0;
@@ -1786,6 +1902,10 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
1786 if (!expect(data_size == 0)) 1902 if (!expect(data_size == 0))
1787 return NULL; 1903 return NULL;
1788 ds = be32_to_cpu(trim->size); 1904 ds = be32_to_cpu(trim->size);
1905 } else if (zeroes) {
1906 if (!expect(data_size == 0))
1907 return NULL;
1908 ds = be32_to_cpu(zeroes->size);
1789 } else if (wsame) { 1909 } else if (wsame) {
1790 if (data_size != queue_logical_block_size(device->rq_queue)) { 1910 if (data_size != queue_logical_block_size(device->rq_queue)) {
1791 drbd_err(peer_device, "data size (%u) != drbd logical block size (%u)\n", 1911 drbd_err(peer_device, "data size (%u) != drbd logical block size (%u)\n",
@@ -1802,7 +1922,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
1802 1922
1803 if (!expect(IS_ALIGNED(ds, 512))) 1923 if (!expect(IS_ALIGNED(ds, 512)))
1804 return NULL; 1924 return NULL;
1805 if (trim || wsame) { 1925 if (trim || wsame || zeroes) {
1806 if (!expect(ds <= (DRBD_MAX_BBIO_SECTORS << 9))) 1926 if (!expect(ds <= (DRBD_MAX_BBIO_SECTORS << 9)))
1807 return NULL; 1927 return NULL;
1808 } else if (!expect(ds <= DRBD_MAX_BIO_SIZE)) 1928 } else if (!expect(ds <= DRBD_MAX_BIO_SIZE))
@@ -1827,7 +1947,11 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
1827 1947
1828 peer_req->flags |= EE_WRITE; 1948 peer_req->flags |= EE_WRITE;
1829 if (trim) { 1949 if (trim) {
1830 peer_req->flags |= EE_IS_TRIM; 1950 peer_req->flags |= EE_TRIM;
1951 return peer_req;
1952 }
1953 if (zeroes) {
1954 peer_req->flags |= EE_ZEROOUT;
1831 return peer_req; 1955 return peer_req;
1832 } 1956 }
1833 if (wsame) 1957 if (wsame)
@@ -2326,8 +2450,12 @@ static unsigned long wire_flags_to_bio_flags(u32 dpf)
2326 2450
2327static unsigned long wire_flags_to_bio_op(u32 dpf) 2451static unsigned long wire_flags_to_bio_op(u32 dpf)
2328{ 2452{
2329 if (dpf & DP_DISCARD) 2453 if (dpf & DP_ZEROES)
2330 return REQ_OP_WRITE_ZEROES; 2454 return REQ_OP_WRITE_ZEROES;
2455 if (dpf & DP_DISCARD)
2456 return REQ_OP_DISCARD;
2457 if (dpf & DP_WSAME)
2458 return REQ_OP_WRITE_SAME;
2331 else 2459 else
2332 return REQ_OP_WRITE; 2460 return REQ_OP_WRITE;
2333} 2461}
@@ -2518,8 +2646,19 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
2518 op_flags = wire_flags_to_bio_flags(dp_flags); 2646 op_flags = wire_flags_to_bio_flags(dp_flags);
2519 if (pi->cmd == P_TRIM) { 2647 if (pi->cmd == P_TRIM) {
2520 D_ASSERT(peer_device, peer_req->i.size > 0); 2648 D_ASSERT(peer_device, peer_req->i.size > 0);
2649 D_ASSERT(peer_device, op == REQ_OP_DISCARD);
2650 D_ASSERT(peer_device, peer_req->pages == NULL);
2651 /* need to play safe: an older DRBD sender
2652 * may mean zero-out while sending P_TRIM. */
2653 if (0 == (connection->agreed_features & DRBD_FF_WZEROES))
2654 peer_req->flags |= EE_ZEROOUT;
2655 } else if (pi->cmd == P_ZEROES) {
2656 D_ASSERT(peer_device, peer_req->i.size > 0);
2521 D_ASSERT(peer_device, op == REQ_OP_WRITE_ZEROES); 2657 D_ASSERT(peer_device, op == REQ_OP_WRITE_ZEROES);
2522 D_ASSERT(peer_device, peer_req->pages == NULL); 2658 D_ASSERT(peer_device, peer_req->pages == NULL);
2659 /* Do (not) pass down BLKDEV_ZERO_NOUNMAP? */
2660 if (dp_flags & DP_DISCARD)
2661 peer_req->flags |= EE_TRIM;
2523 } else if (peer_req->pages == NULL) { 2662 } else if (peer_req->pages == NULL) {
2524 D_ASSERT(device, peer_req->i.size == 0); 2663 D_ASSERT(device, peer_req->i.size == 0);
2525 D_ASSERT(device, dp_flags & DP_FLUSH); 2664 D_ASSERT(device, dp_flags & DP_FLUSH);
@@ -2587,7 +2726,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
2587 * we wait for all pending requests, respectively wait for 2726 * we wait for all pending requests, respectively wait for
2588 * active_ee to become empty in drbd_submit_peer_request(); 2727 * active_ee to become empty in drbd_submit_peer_request();
2589 * better not add ourselves here. */ 2728 * better not add ourselves here. */
2590 if ((peer_req->flags & (EE_IS_TRIM|EE_WRITE_SAME)) == 0) 2729 if ((peer_req->flags & (EE_TRIM|EE_WRITE_SAME|EE_ZEROOUT)) == 0)
2591 list_add_tail(&peer_req->w.list, &device->active_ee); 2730 list_add_tail(&peer_req->w.list, &device->active_ee);
2592 spin_unlock_irq(&device->resource->req_lock); 2731 spin_unlock_irq(&device->resource->req_lock);
2593 2732
@@ -3364,7 +3503,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
3364 enum drbd_conns rv = C_MASK; 3503 enum drbd_conns rv = C_MASK;
3365 enum drbd_disk_state mydisk; 3504 enum drbd_disk_state mydisk;
3366 struct net_conf *nc; 3505 struct net_conf *nc;
3367 int hg, rule_nr, rr_conflict, tentative; 3506 int hg, rule_nr, rr_conflict, tentative, always_asbp;
3368 3507
3369 mydisk = device->state.disk; 3508 mydisk = device->state.disk;
3370 if (mydisk == D_NEGOTIATING) 3509 if (mydisk == D_NEGOTIATING)
@@ -3415,8 +3554,12 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
3415 3554
3416 rcu_read_lock(); 3555 rcu_read_lock();
3417 nc = rcu_dereference(peer_device->connection->net_conf); 3556 nc = rcu_dereference(peer_device->connection->net_conf);
3557 always_asbp = nc->always_asbp;
3558 rr_conflict = nc->rr_conflict;
3559 tentative = nc->tentative;
3560 rcu_read_unlock();
3418 3561
3419 if (hg == 100 || (hg == -100 && nc->always_asbp)) { 3562 if (hg == 100 || (hg == -100 && always_asbp)) {
3420 int pcount = (device->state.role == R_PRIMARY) 3563 int pcount = (device->state.role == R_PRIMARY)
3421 + (peer_role == R_PRIMARY); 3564 + (peer_role == R_PRIMARY);
3422 int forced = (hg == -100); 3565 int forced = (hg == -100);
@@ -3455,9 +3598,6 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
3455 "Sync from %s node\n", 3598 "Sync from %s node\n",
3456 (hg < 0) ? "peer" : "this"); 3599 (hg < 0) ? "peer" : "this");
3457 } 3600 }
3458 rr_conflict = nc->rr_conflict;
3459 tentative = nc->tentative;
3460 rcu_read_unlock();
3461 3601
3462 if (hg == -100) { 3602 if (hg == -100) {
3463 /* FIXME this log message is not correct if we end up here 3603 /* FIXME this log message is not correct if we end up here
@@ -3980,6 +4120,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
3980 struct o_qlim *o = (connection->agreed_features & DRBD_FF_WSAME) ? p->qlim : NULL; 4120 struct o_qlim *o = (connection->agreed_features & DRBD_FF_WSAME) ? p->qlim : NULL;
3981 enum determine_dev_size dd = DS_UNCHANGED; 4121 enum determine_dev_size dd = DS_UNCHANGED;
3982 sector_t p_size, p_usize, p_csize, my_usize; 4122 sector_t p_size, p_usize, p_csize, my_usize;
4123 sector_t new_size, cur_size;
3983 int ldsc = 0; /* local disk size changed */ 4124 int ldsc = 0; /* local disk size changed */
3984 enum dds_flags ddsf; 4125 enum dds_flags ddsf;
3985 4126
@@ -3987,6 +4128,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
3987 if (!peer_device) 4128 if (!peer_device)
3988 return config_unknown_volume(connection, pi); 4129 return config_unknown_volume(connection, pi);
3989 device = peer_device->device; 4130 device = peer_device->device;
4131 cur_size = drbd_get_capacity(device->this_bdev);
3990 4132
3991 p_size = be64_to_cpu(p->d_size); 4133 p_size = be64_to_cpu(p->d_size);
3992 p_usize = be64_to_cpu(p->u_size); 4134 p_usize = be64_to_cpu(p->u_size);
@@ -3997,7 +4139,6 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
3997 device->p_size = p_size; 4139 device->p_size = p_size;
3998 4140
3999 if (get_ldev(device)) { 4141 if (get_ldev(device)) {
4000 sector_t new_size, cur_size;
4001 rcu_read_lock(); 4142 rcu_read_lock();
4002 my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size; 4143 my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
4003 rcu_read_unlock(); 4144 rcu_read_unlock();
@@ -4012,13 +4153,13 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
4012 if (device->state.conn == C_WF_REPORT_PARAMS) 4153 if (device->state.conn == C_WF_REPORT_PARAMS)
4013 p_usize = min_not_zero(my_usize, p_usize); 4154 p_usize = min_not_zero(my_usize, p_usize);
4014 4155
4015 /* Never shrink a device with usable data during connect. 4156 /* Never shrink a device with usable data during connect,
4016 But allow online shrinking if we are connected. */ 4157 * or "attach" on the peer.
4158 * But allow online shrinking if we are connected. */
4017 new_size = drbd_new_dev_size(device, device->ldev, p_usize, 0); 4159 new_size = drbd_new_dev_size(device, device->ldev, p_usize, 0);
4018 cur_size = drbd_get_capacity(device->this_bdev);
4019 if (new_size < cur_size && 4160 if (new_size < cur_size &&
4020 device->state.disk >= D_OUTDATED && 4161 device->state.disk >= D_OUTDATED &&
4021 device->state.conn < C_CONNECTED) { 4162 (device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS)) {
4022 drbd_err(device, "The peer's disk size is too small! (%llu < %llu sectors)\n", 4163 drbd_err(device, "The peer's disk size is too small! (%llu < %llu sectors)\n",
4023 (unsigned long long)new_size, (unsigned long long)cur_size); 4164 (unsigned long long)new_size, (unsigned long long)cur_size);
4024 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 4165 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
@@ -4046,8 +4187,8 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
4046 synchronize_rcu(); 4187 synchronize_rcu();
4047 kfree(old_disk_conf); 4188 kfree(old_disk_conf);
4048 4189
4049 drbd_info(device, "Peer sets u_size to %lu sectors\n", 4190 drbd_info(device, "Peer sets u_size to %lu sectors (old: %lu)\n",
4050 (unsigned long)my_usize); 4191 (unsigned long)p_usize, (unsigned long)my_usize);
4051 } 4192 }
4052 4193
4053 put_ldev(device); 4194 put_ldev(device);
@@ -4080,9 +4221,36 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
4080 * 4221 *
4081 * However, if he sends a zero current size, 4222 * However, if he sends a zero current size,
4082 * take his (user-capped or) backing disk size anyways. 4223 * take his (user-capped or) backing disk size anyways.
4224 *
4225 * Unless of course he does not have a disk himself.
4226 * In which case we ignore this completely.
4083 */ 4227 */
4228 sector_t new_size = p_csize ?: p_usize ?: p_size;
4084 drbd_reconsider_queue_parameters(device, NULL, o); 4229 drbd_reconsider_queue_parameters(device, NULL, o);
4085 drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size); 4230 if (new_size == 0) {
4231 /* Ignore, peer does not know nothing. */
4232 } else if (new_size == cur_size) {
4233 /* nothing to do */
4234 } else if (cur_size != 0 && p_size == 0) {
4235 drbd_warn(device, "Ignored diskless peer device size (peer:%llu != me:%llu sectors)!\n",
4236 (unsigned long long)new_size, (unsigned long long)cur_size);
4237 } else if (new_size < cur_size && device->state.role == R_PRIMARY) {
4238 drbd_err(device, "The peer's device size is too small! (%llu < %llu sectors); demote me first!\n",
4239 (unsigned long long)new_size, (unsigned long long)cur_size);
4240 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
4241 return -EIO;
4242 } else {
4243 /* I believe the peer, if
4244 * - I don't have a current size myself
4245 * - we agree on the size anyways
4246 * - I do have a current size, am Secondary,
4247 * and he has the only disk
4248 * - I do have a current size, am Primary,
4249 * and he has the only disk,
4250 * which is larger than my current size
4251 */
4252 drbd_set_my_capacity(device, new_size);
4253 }
4086 } 4254 }
4087 4255
4088 if (get_ldev(device)) { 4256 if (get_ldev(device)) {
@@ -4142,7 +4310,7 @@ static int receive_uuids(struct drbd_connection *connection, struct packet_info
4142 kfree(device->p_uuid); 4310 kfree(device->p_uuid);
4143 device->p_uuid = p_uuid; 4311 device->p_uuid = p_uuid;
4144 4312
4145 if (device->state.conn < C_CONNECTED && 4313 if ((device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS) &&
4146 device->state.disk < D_INCONSISTENT && 4314 device->state.disk < D_INCONSISTENT &&
4147 device->state.role == R_PRIMARY && 4315 device->state.role == R_PRIMARY &&
4148 (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) { 4316 (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
@@ -4368,6 +4536,25 @@ static int receive_state(struct drbd_connection *connection, struct packet_info
4368 if (peer_state.conn == C_AHEAD) 4536 if (peer_state.conn == C_AHEAD)
4369 ns.conn = C_BEHIND; 4537 ns.conn = C_BEHIND;
4370 4538
4539 /* TODO:
4540 * if (primary and diskless and peer uuid != effective uuid)
4541 * abort attach on peer;
4542 *
4543 * If this node does not have good data, was already connected, but
4544 * the peer did a late attach only now, trying to "negotiate" with me,
4545 * AND I am currently Primary, possibly frozen, with some specific
4546 * "effective" uuid, this should never be reached, really, because
4547 * we first send the uuids, then the current state.
4548 *
4549 * In this scenario, we already dropped the connection hard
4550 * when we received the unsuitable uuids (receive_uuids().
4551 *
4552 * Should we want to change this, that is: not drop the connection in
4553 * receive_uuids() already, then we would need to add a branch here
4554 * that aborts the attach of "unsuitable uuids" on the peer in case
4555 * this node is currently Diskless Primary.
4556 */
4557
4371 if (device->p_uuid && peer_state.disk >= D_NEGOTIATING && 4558 if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4372 get_ldev_if_state(device, D_NEGOTIATING)) { 4559 get_ldev_if_state(device, D_NEGOTIATING)) {
4373 int cr; /* consider resync */ 4560 int cr; /* consider resync */
@@ -4380,7 +4567,7 @@ static int receive_state(struct drbd_connection *connection, struct packet_info
4380 (peer_state.disk == D_NEGOTIATING || 4567 (peer_state.disk == D_NEGOTIATING ||
4381 os.disk == D_NEGOTIATING)); 4568 os.disk == D_NEGOTIATING));
4382 /* if we have both been inconsistent, and the peer has been 4569 /* if we have both been inconsistent, and the peer has been
4383 * forced to be UpToDate with --overwrite-data */ 4570 * forced to be UpToDate with --force */
4384 cr |= test_bit(CONSIDER_RESYNC, &device->flags); 4571 cr |= test_bit(CONSIDER_RESYNC, &device->flags);
4385 /* if we had been plain connected, and the admin requested to 4572 /* if we had been plain connected, and the admin requested to
4386 * start a sync by "invalidate" or "invalidate-remote" */ 4573 * start a sync by "invalidate" or "invalidate-remote" */
@@ -4845,7 +5032,7 @@ static int receive_rs_deallocated(struct drbd_connection *connection, struct pac
4845 5032
4846 peer_req->w.cb = e_end_resync_block; 5033 peer_req->w.cb = e_end_resync_block;
4847 peer_req->submit_jif = jiffies; 5034 peer_req->submit_jif = jiffies;
4848 peer_req->flags |= EE_IS_TRIM; 5035 peer_req->flags |= EE_TRIM;
4849 5036
4850 spin_lock_irq(&device->resource->req_lock); 5037 spin_lock_irq(&device->resource->req_lock);
4851 list_add_tail(&peer_req->w.list, &device->sync_ee); 5038 list_add_tail(&peer_req->w.list, &device->sync_ee);
@@ -4913,6 +5100,7 @@ static struct data_cmd drbd_cmd_handler[] = {
4913 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state }, 5100 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
4914 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol }, 5101 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
4915 [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data }, 5102 [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data },
5103 [P_ZEROES] = { 0, sizeof(struct p_trim), receive_Data },
4916 [P_RS_DEALLOCATED] = { 0, sizeof(struct p_block_desc), receive_rs_deallocated }, 5104 [P_RS_DEALLOCATED] = { 0, sizeof(struct p_block_desc), receive_rs_deallocated },
4917 [P_WSAME] = { 1, sizeof(struct p_wsame), receive_Data }, 5105 [P_WSAME] = { 1, sizeof(struct p_wsame), receive_Data },
4918}; 5106};
@@ -5197,11 +5385,12 @@ static int drbd_do_features(struct drbd_connection *connection)
5197 drbd_info(connection, "Handshake successful: " 5385 drbd_info(connection, "Handshake successful: "
5198 "Agreed network protocol version %d\n", connection->agreed_pro_version); 5386 "Agreed network protocol version %d\n", connection->agreed_pro_version);
5199 5387
5200 drbd_info(connection, "Feature flags enabled on protocol level: 0x%x%s%s%s.\n", 5388 drbd_info(connection, "Feature flags enabled on protocol level: 0x%x%s%s%s%s.\n",
5201 connection->agreed_features, 5389 connection->agreed_features,
5202 connection->agreed_features & DRBD_FF_TRIM ? " TRIM" : "", 5390 connection->agreed_features & DRBD_FF_TRIM ? " TRIM" : "",
5203 connection->agreed_features & DRBD_FF_THIN_RESYNC ? " THIN_RESYNC" : "", 5391 connection->agreed_features & DRBD_FF_THIN_RESYNC ? " THIN_RESYNC" : "",
5204 connection->agreed_features & DRBD_FF_WSAME ? " WRITE_SAME" : 5392 connection->agreed_features & DRBD_FF_WSAME ? " WRITE_SAME" : "",
5393 connection->agreed_features & DRBD_FF_WZEROES ? " WRITE_ZEROES" :
5205 connection->agreed_features ? "" : " none"); 5394 connection->agreed_features ? "" : " none");
5206 5395
5207 return 1; 5396 return 1;
@@ -5284,7 +5473,7 @@ static int drbd_do_auth(struct drbd_connection *connection)
5284 if (pi.cmd != P_AUTH_CHALLENGE) { 5473 if (pi.cmd != P_AUTH_CHALLENGE) {
5285 drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n", 5474 drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
5286 cmdname(pi.cmd), pi.cmd); 5475 cmdname(pi.cmd), pi.cmd);
5287 rv = 0; 5476 rv = -1;
5288 goto fail; 5477 goto fail;
5289 } 5478 }
5290 5479
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 1c4da17e902e..643a04af213b 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -63,7 +63,7 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio
63 drbd_req_make_private_bio(req, bio_src); 63 drbd_req_make_private_bio(req, bio_src);
64 req->rq_state = (bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0) 64 req->rq_state = (bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0)
65 | (bio_op(bio_src) == REQ_OP_WRITE_SAME ? RQ_WSAME : 0) 65 | (bio_op(bio_src) == REQ_OP_WRITE_SAME ? RQ_WSAME : 0)
66 | (bio_op(bio_src) == REQ_OP_WRITE_ZEROES ? RQ_UNMAP : 0) 66 | (bio_op(bio_src) == REQ_OP_WRITE_ZEROES ? RQ_ZEROES : 0)
67 | (bio_op(bio_src) == REQ_OP_DISCARD ? RQ_UNMAP : 0); 67 | (bio_op(bio_src) == REQ_OP_DISCARD ? RQ_UNMAP : 0);
68 req->device = device; 68 req->device = device;
69 req->master_bio = bio_src; 69 req->master_bio = bio_src;
@@ -1155,12 +1155,11 @@ static int drbd_process_write_request(struct drbd_request *req)
1155 return remote; 1155 return remote;
1156} 1156}
1157 1157
1158static void drbd_process_discard_req(struct drbd_request *req) 1158static void drbd_process_discard_or_zeroes_req(struct drbd_request *req, int flags)
1159{ 1159{
1160 struct block_device *bdev = req->device->ldev->backing_bdev; 1160 int err = drbd_issue_discard_or_zero_out(req->device,
1161 1161 req->i.sector, req->i.size >> 9, flags);
1162 if (blkdev_issue_zeroout(bdev, req->i.sector, req->i.size >> 9, 1162 if (err)
1163 GFP_NOIO, 0))
1164 req->private_bio->bi_status = BLK_STS_IOERR; 1163 req->private_bio->bi_status = BLK_STS_IOERR;
1165 bio_endio(req->private_bio); 1164 bio_endio(req->private_bio);
1166} 1165}
@@ -1189,9 +1188,11 @@ drbd_submit_req_private_bio(struct drbd_request *req)
1189 if (get_ldev(device)) { 1188 if (get_ldev(device)) {
1190 if (drbd_insert_fault(device, type)) 1189 if (drbd_insert_fault(device, type))
1191 bio_io_error(bio); 1190 bio_io_error(bio);
1192 else if (bio_op(bio) == REQ_OP_WRITE_ZEROES || 1191 else if (bio_op(bio) == REQ_OP_WRITE_ZEROES)
1193 bio_op(bio) == REQ_OP_DISCARD) 1192 drbd_process_discard_or_zeroes_req(req, EE_ZEROOUT |
1194 drbd_process_discard_req(req); 1193 ((bio->bi_opf & REQ_NOUNMAP) ? 0 : EE_TRIM));
1194 else if (bio_op(bio) == REQ_OP_DISCARD)
1195 drbd_process_discard_or_zeroes_req(req, EE_TRIM);
1195 else 1196 else
1196 generic_make_request(bio); 1197 generic_make_request(bio);
1197 put_ldev(device); 1198 put_ldev(device);
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 94c654020f0f..c2f569d2661b 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -208,6 +208,7 @@ enum drbd_req_state_bits {
208 __RQ_WRITE, 208 __RQ_WRITE,
209 __RQ_WSAME, 209 __RQ_WSAME,
210 __RQ_UNMAP, 210 __RQ_UNMAP,
211 __RQ_ZEROES,
211 212
212 /* Should call drbd_al_complete_io() for this request... */ 213 /* Should call drbd_al_complete_io() for this request... */
213 __RQ_IN_ACT_LOG, 214 __RQ_IN_ACT_LOG,
@@ -253,6 +254,7 @@ enum drbd_req_state_bits {
253#define RQ_WRITE (1UL << __RQ_WRITE) 254#define RQ_WRITE (1UL << __RQ_WRITE)
254#define RQ_WSAME (1UL << __RQ_WSAME) 255#define RQ_WSAME (1UL << __RQ_WSAME)
255#define RQ_UNMAP (1UL << __RQ_UNMAP) 256#define RQ_UNMAP (1UL << __RQ_UNMAP)
257#define RQ_ZEROES (1UL << __RQ_ZEROES)
256#define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG) 258#define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG)
257#define RQ_UNPLUG (1UL << __RQ_UNPLUG) 259#define RQ_UNPLUG (1UL << __RQ_UNPLUG)
258#define RQ_POSTPONED (1UL << __RQ_POSTPONED) 260#define RQ_POSTPONED (1UL << __RQ_POSTPONED)
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 0813c654c893..2b4c0db5d867 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -688,11 +688,9 @@ request_detach(struct drbd_device *device)
688 CS_VERBOSE | CS_ORDERED | CS_INHIBIT_MD_IO); 688 CS_VERBOSE | CS_ORDERED | CS_INHIBIT_MD_IO);
689} 689}
690 690
691enum drbd_state_rv 691int drbd_request_detach_interruptible(struct drbd_device *device)
692drbd_request_detach_interruptible(struct drbd_device *device)
693{ 692{
694 enum drbd_state_rv rv; 693 int ret, rv;
695 int ret;
696 694
697 drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */ 695 drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
698 wait_event_interruptible(device->state_wait, 696 wait_event_interruptible(device->state_wait,
@@ -1124,7 +1122,7 @@ static union drbd_state sanitize_state(struct drbd_device *device, union drbd_st
1124 ns.pdsk = D_UP_TO_DATE; 1122 ns.pdsk = D_UP_TO_DATE;
1125 } 1123 }
1126 1124
1127 /* Implications of the connection stat on the disk states */ 1125 /* Implications of the connection state on the disk states */
1128 disk_min = D_DISKLESS; 1126 disk_min = D_DISKLESS;
1129 disk_max = D_UP_TO_DATE; 1127 disk_max = D_UP_TO_DATE;
1130 pdsk_min = D_INCONSISTENT; 1128 pdsk_min = D_INCONSISTENT;
@@ -2109,9 +2107,8 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
2109 spin_unlock_irq(&connection->resource->req_lock); 2107 spin_unlock_irq(&connection->resource->req_lock);
2110 } 2108 }
2111 } 2109 }
2112 kref_put(&connection->kref, drbd_destroy_connection);
2113
2114 conn_md_sync(connection); 2110 conn_md_sync(connection);
2111 kref_put(&connection->kref, drbd_destroy_connection);
2115 2112
2116 return 0; 2113 return 0;
2117} 2114}
diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h
index ea58301d0895..f87371e55e68 100644
--- a/drivers/block/drbd/drbd_state.h
+++ b/drivers/block/drbd/drbd_state.h
@@ -131,7 +131,7 @@ extern enum drbd_state_rv _drbd_set_state(struct drbd_device *, union drbd_state
131 enum chg_state_flags, 131 enum chg_state_flags,
132 struct completion *done); 132 struct completion *done);
133extern void print_st_err(struct drbd_device *, union drbd_state, 133extern void print_st_err(struct drbd_device *, union drbd_state,
134 union drbd_state, int); 134 union drbd_state, enum drbd_state_rv);
135 135
136enum drbd_state_rv 136enum drbd_state_rv
137_conn_request_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val, 137_conn_request_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val,
@@ -162,8 +162,7 @@ static inline int drbd_request_state(struct drbd_device *device,
162} 162}
163 163
164/* for use in adm_detach() (drbd_adm_detach(), drbd_adm_down()) */ 164/* for use in adm_detach() (drbd_adm_detach(), drbd_adm_down()) */
165enum drbd_state_rv 165int drbd_request_detach_interruptible(struct drbd_device *device);
166drbd_request_detach_interruptible(struct drbd_device *device);
167 166
168enum drbd_role conn_highest_role(struct drbd_connection *connection); 167enum drbd_role conn_highest_role(struct drbd_connection *connection);
169enum drbd_role conn_highest_peer(struct drbd_connection *connection); 168enum drbd_role conn_highest_peer(struct drbd_connection *connection);
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 99255d0c9e2f..268ef0c5d4ab 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -153,7 +153,7 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
153 do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee); 153 do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
154 154
155 /* FIXME do we want to detach for failed REQ_OP_DISCARD? 155 /* FIXME do we want to detach for failed REQ_OP_DISCARD?
156 * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */ 156 * ((peer_req->flags & (EE_WAS_ERROR|EE_TRIM)) == EE_WAS_ERROR) */
157 if (peer_req->flags & EE_WAS_ERROR) 157 if (peer_req->flags & EE_WAS_ERROR)
158 __drbd_chk_io_error(device, DRBD_WRITE_ERROR); 158 __drbd_chk_io_error(device, DRBD_WRITE_ERROR);
159 159
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 0939f36548c9..b8a0720d3653 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -616,7 +616,6 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
616 default: 616 default:
617 WARN_ON_ONCE(1); 617 WARN_ON_ONCE(1);
618 return -EIO; 618 return -EIO;
619 break;
620 } 619 }
621} 620}
622 621
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 9c0553dd13e7..0ff27e2d98c4 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -633,7 +633,6 @@ static int generic_request(struct vdc_port *port, u8 op, void *buf, int len)
633 case VD_OP_GET_EFI: 633 case VD_OP_GET_EFI:
634 case VD_OP_SET_EFI: 634 case VD_OP_SET_EFI:
635 return -EOPNOTSUPP; 635 return -EOPNOTSUPP;
636 break;
637 }; 636 };
638 637
639 map_perm |= LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO; 638 map_perm |= LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO;
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index c1c676a33e4a..1e2ae90d7715 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -995,7 +995,11 @@ static void floppy_release(struct gendisk *disk, fmode_t mode)
995 struct swim3 __iomem *sw = fs->swim3; 995 struct swim3 __iomem *sw = fs->swim3;
996 996
997 mutex_lock(&swim3_mutex); 997 mutex_lock(&swim3_mutex);
998 if (fs->ref_count > 0 && --fs->ref_count == 0) { 998 if (fs->ref_count > 0)
999 --fs->ref_count;
1000 else if (fs->ref_count == -1)
1001 fs->ref_count = 0;
1002 if (fs->ref_count == 0) {
999 swim3_action(fs, MOTOR_OFF); 1003 swim3_action(fs, MOTOR_OFF);
1000 out_8(&sw->control_bic, 0xff); 1004 out_8(&sw->control_bic, 0xff);
1001 swim3_select(fs, RELAX); 1005 swim3_select(fs, RELAX);
@@ -1087,8 +1091,6 @@ static int swim3_add_device(struct macio_dev *mdev, int index)
1087 struct floppy_state *fs = &floppy_states[index]; 1091 struct floppy_state *fs = &floppy_states[index];
1088 int rc = -EBUSY; 1092 int rc = -EBUSY;
1089 1093
1090 /* Do this first for message macros */
1091 memset(fs, 0, sizeof(*fs));
1092 fs->mdev = mdev; 1094 fs->mdev = mdev;
1093 fs->index = index; 1095 fs->index = index;
1094 1096
@@ -1151,7 +1153,6 @@ static int swim3_add_device(struct macio_dev *mdev, int index)
1151 swim3_err("%s", "Couldn't request interrupt\n"); 1153 swim3_err("%s", "Couldn't request interrupt\n");
1152 pmac_call_feature(PMAC_FTR_SWIM3_ENABLE, swim, 0, 0); 1154 pmac_call_feature(PMAC_FTR_SWIM3_ENABLE, swim, 0, 0);
1153 goto out_unmap; 1155 goto out_unmap;
1154 return -EBUSY;
1155 } 1156 }
1156 1157
1157 timer_setup(&fs->timeout, NULL, 0); 1158 timer_setup(&fs->timeout, NULL, 0);
@@ -1188,14 +1189,15 @@ static int swim3_attach(struct macio_dev *mdev,
1188 return rc; 1189 return rc;
1189 } 1190 }
1190 1191
1191 fs = &floppy_states[floppy_count];
1192
1193 disk = alloc_disk(1); 1192 disk = alloc_disk(1);
1194 if (disk == NULL) { 1193 if (disk == NULL) {
1195 rc = -ENOMEM; 1194 rc = -ENOMEM;
1196 goto out_unregister; 1195 goto out_unregister;
1197 } 1196 }
1198 1197
1198 fs = &floppy_states[floppy_count];
1199 memset(fs, 0, sizeof(*fs));
1200
1199 disk->queue = blk_mq_init_sq_queue(&fs->tag_set, &swim3_mq_ops, 2, 1201 disk->queue = blk_mq_init_sq_queue(&fs->tag_set, &swim3_mq_ops, 2,
1200 BLK_MQ_F_SHOULD_MERGE); 1202 BLK_MQ_F_SHOULD_MERGE);
1201 if (IS_ERR(disk->queue)) { 1203 if (IS_ERR(disk->queue)) {
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index a5b8afe3609c..f8b7345fe1cb 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -873,6 +873,7 @@ static void __exit exit_gdrom(void)
873 platform_device_unregister(pd); 873 platform_device_unregister(pd);
874 platform_driver_unregister(&gdrom_driver); 874 platform_driver_unregister(&gdrom_driver);
875 kfree(gd.toc); 875 kfree(gd.toc);
876 kfree(gd.cd_info);
876} 877}
877 878
878module_init(init_gdrom); 879module_init(init_gdrom);
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index 3fcf062d752c..5ee20da7bdb3 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -418,7 +418,6 @@ retry_rq:
418 if (ret) { 418 if (ret) {
419 pblk_err(pblk, "I/O submission failed: %d\n", ret); 419 pblk_err(pblk, "I/O submission failed: %d\n", ret);
420 bio_put(bio); 420 bio_put(bio);
421 bio_put(bio);
422 return ret; 421 return ret;
423 } 422 }
424 423
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index f025fd1e22e6..76c61318fda5 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -499,22 +499,33 @@ static inline void blkg_get(struct blkcg_gq *blkg)
499 */ 499 */
500static inline bool blkg_tryget(struct blkcg_gq *blkg) 500static inline bool blkg_tryget(struct blkcg_gq *blkg)
501{ 501{
502 return percpu_ref_tryget(&blkg->refcnt); 502 return blkg && percpu_ref_tryget(&blkg->refcnt);
503} 503}
504 504
505/** 505/**
506 * blkg_tryget_closest - try and get a blkg ref on the closet blkg 506 * blkg_tryget_closest - try and get a blkg ref on the closet blkg
507 * @blkg: blkg to get 507 * @blkg: blkg to get
508 * 508 *
509 * This walks up the blkg tree to find the closest non-dying blkg and returns 509 * This needs to be called rcu protected. As the failure mode here is to walk
510 * the blkg that it did association with as it may not be the passed in blkg. 510 * up the blkg tree, this ensure that the blkg->parent pointers are always
511 * valid. This returns the blkg that it ended up taking a reference on or %NULL
512 * if no reference was taken.
511 */ 513 */
512static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg) 514static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
513{ 515{
514 while (blkg && !percpu_ref_tryget(&blkg->refcnt)) 516 struct blkcg_gq *ret_blkg = NULL;
517
518 WARN_ON_ONCE(!rcu_read_lock_held());
519
520 while (blkg) {
521 if (blkg_tryget(blkg)) {
522 ret_blkg = blkg;
523 break;
524 }
515 blkg = blkg->parent; 525 blkg = blkg->parent;
526 }
516 527
517 return blkg; 528 return ret_blkg;
518} 529}
519 530
520/** 531/**
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 2d0259327721..a19d98367f08 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -51,7 +51,7 @@
51#endif 51#endif
52 52
53extern const char *drbd_buildtag(void); 53extern const char *drbd_buildtag(void);
54#define REL_VERSION "8.4.10" 54#define REL_VERSION "8.4.11"
55#define API_VERSION 1 55#define API_VERSION 1
56#define PRO_VERSION_MIN 86 56#define PRO_VERSION_MIN 86
57#define PRO_VERSION_MAX 101 57#define PRO_VERSION_MAX 101
diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h
index 5972e4969197..eeae59d3ceb7 100644
--- a/include/linux/genl_magic_struct.h
+++ b/include/linux/genl_magic_struct.h
@@ -191,6 +191,7 @@ static inline void ct_assert_unique_operations(void)
191{ 191{
192 switch (0) { 192 switch (0) {
193#include GENL_MAGIC_INCLUDE_FILE 193#include GENL_MAGIC_INCLUDE_FILE
194 case 0:
194 ; 195 ;
195 } 196 }
196} 197}
@@ -209,6 +210,7 @@ static inline void ct_assert_unique_top_level_attributes(void)
209{ 210{
210 switch (0) { 211 switch (0) {
211#include GENL_MAGIC_INCLUDE_FILE 212#include GENL_MAGIC_INCLUDE_FILE
213 case 0:
212 ; 214 ;
213 } 215 }
214} 216}
@@ -218,7 +220,8 @@ static inline void ct_assert_unique_top_level_attributes(void)
218static inline void ct_assert_unique_ ## s_name ## _attributes(void) \ 220static inline void ct_assert_unique_ ## s_name ## _attributes(void) \
219{ \ 221{ \
220 switch (0) { \ 222 switch (0) { \
221 s_fields \ 223 s_fields \
224 case 0: \
222 ; \ 225 ; \
223 } \ 226 } \
224} 227}