aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-06-04 18:37:44 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-06-04 18:37:44 -0400
commitd2dd328b7f7bc6cebe167648289337755944ad2a (patch)
tree5d664a2db1ac209f7537452ddc02597972f7aa37 /drivers/block
parentc1518f12bab97a6d409a25aaccb02dc8895800f3 (diff)
parent1abec4fdbb142e3ccb6ce99832fae42129134a96 (diff)
Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block
* 'for-linus' of git://git.kernel.dk/linux-2.6-block: (27 commits) block: make blk_init_free_list and elevator_init idempotent block: avoid unconditionally freeing previously allocated request_queue pipe: change /proc/sys/fs/pipe-max-pages to byte sized interface pipe: change the privilege required for growing a pipe beyond system max pipe: adjust minimum pipe size to 1 page block: disable preemption before using sched_clock() cciss: call BUG() earlier Preparing 8.3.8rc2 drbd: Reduce verbosity drbd: use drbd specific ratelimit instead of global printk_ratelimit drbd: fix hang on local read errors while disconnected drbd: Removed the now empty w_io_error() function drbd: removed duplicated #includes drbd: improve usage of MSG_MORE drbd: need to set socket bufsize early to take effect drbd: improve network latency, TCP_QUICKACK drbd: Revert "drbd: Create new current UUID as late as possible" brd: support discard Revert "writeback: fix WB_SYNC_NONE writeback from umount" Revert "writeback: ensure that WB_SYNC_NONE writeback with sb pinned is sync" ...
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/brd.c53
-rw-r--r--drivers/block/cciss_scsi.c2
-rw-r--r--drivers/block/drbd/drbd_int.h14
-rw-r--r--drivers/block/drbd/drbd_main.c68
-rw-r--r--drivers/block/drbd/drbd_receiver.c45
-rw-r--r--drivers/block/drbd/drbd_req.c54
-rw-r--r--drivers/block/drbd/drbd_req.h1
-rw-r--r--drivers/block/drbd/drbd_worker.c24
8 files changed, 122 insertions, 139 deletions
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 6081e81d5738..f1bf79d9bc0a 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -133,6 +133,28 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
133 return page; 133 return page;
134} 134}
135 135
136static void brd_free_page(struct brd_device *brd, sector_t sector)
137{
138 struct page *page;
139 pgoff_t idx;
140
141 spin_lock(&brd->brd_lock);
142 idx = sector >> PAGE_SECTORS_SHIFT;
143 page = radix_tree_delete(&brd->brd_pages, idx);
144 spin_unlock(&brd->brd_lock);
145 if (page)
146 __free_page(page);
147}
148
149static void brd_zero_page(struct brd_device *brd, sector_t sector)
150{
151 struct page *page;
152
153 page = brd_lookup_page(brd, sector);
154 if (page)
155 clear_highpage(page);
156}
157
136/* 158/*
137 * Free all backing store pages and radix tree. This must only be called when 159 * Free all backing store pages and radix tree. This must only be called when
138 * there are no other users of the device. 160 * there are no other users of the device.
@@ -189,6 +211,24 @@ static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n)
189 return 0; 211 return 0;
190} 212}
191 213
214static void discard_from_brd(struct brd_device *brd,
215 sector_t sector, size_t n)
216{
217 while (n >= PAGE_SIZE) {
218 /*
219 * Don't want to actually discard pages here because
220 * re-allocating the pages can result in writeback
221 * deadlocks under heavy load.
222 */
223 if (0)
224 brd_free_page(brd, sector);
225 else
226 brd_zero_page(brd, sector);
227 sector += PAGE_SIZE >> SECTOR_SHIFT;
228 n -= PAGE_SIZE;
229 }
230}
231
192/* 232/*
193 * Copy n bytes from src to the brd starting at sector. Does not sleep. 233 * Copy n bytes from src to the brd starting at sector. Does not sleep.
194 */ 234 */
@@ -300,6 +340,12 @@ static int brd_make_request(struct request_queue *q, struct bio *bio)
300 get_capacity(bdev->bd_disk)) 340 get_capacity(bdev->bd_disk))
301 goto out; 341 goto out;
302 342
343 if (unlikely(bio_rw_flagged(bio, BIO_RW_DISCARD))) {
344 err = 0;
345 discard_from_brd(brd, sector, bio->bi_size);
346 goto out;
347 }
348
303 rw = bio_rw(bio); 349 rw = bio_rw(bio);
304 if (rw == READA) 350 if (rw == READA)
305 rw = READ; 351 rw = READ;
@@ -320,7 +366,7 @@ out:
320} 366}
321 367
322#ifdef CONFIG_BLK_DEV_XIP 368#ifdef CONFIG_BLK_DEV_XIP
323static int brd_direct_access (struct block_device *bdev, sector_t sector, 369static int brd_direct_access(struct block_device *bdev, sector_t sector,
324 void **kaddr, unsigned long *pfn) 370 void **kaddr, unsigned long *pfn)
325{ 371{
326 struct brd_device *brd = bdev->bd_disk->private_data; 372 struct brd_device *brd = bdev->bd_disk->private_data;
@@ -437,6 +483,11 @@ static struct brd_device *brd_alloc(int i)
437 blk_queue_max_hw_sectors(brd->brd_queue, 1024); 483 blk_queue_max_hw_sectors(brd->brd_queue, 1024);
438 blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); 484 blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
439 485
486 brd->brd_queue->limits.discard_granularity = PAGE_SIZE;
487 brd->brd_queue->limits.max_discard_sectors = UINT_MAX;
488 brd->brd_queue->limits.discard_zeroes_data = 1;
489 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue);
490
440 disk = brd->brd_disk = alloc_disk(1 << part_shift); 491 disk = brd->brd_disk = alloc_disk(1 << part_shift);
441 if (!disk) 492 if (!disk)
442 goto out_free_queue; 493 goto out_free_queue;
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index e1d0e2cfec72..3381505c8a6c 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -188,11 +188,11 @@ scsi_cmd_free(ctlr_info_t *h, CommandList_struct *cmd)
188 188
189 sa = h->scsi_ctlr; 189 sa = h->scsi_ctlr;
190 stk = &sa->cmd_stack; 190 stk = &sa->cmd_stack;
191 stk->top++;
191 if (stk->top >= CMD_STACK_SIZE) { 192 if (stk->top >= CMD_STACK_SIZE) {
192 printk("cciss: scsi_cmd_free called too many times.\n"); 193 printk("cciss: scsi_cmd_free called too many times.\n");
193 BUG(); 194 BUG();
194 } 195 }
195 stk->top++;
196 stk->elem[stk->top] = (struct cciss_scsi_cmd_stack_elem_t *) cmd; 196 stk->elem[stk->top] = (struct cciss_scsi_cmd_stack_elem_t *) cmd;
197} 197}
198 198
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index e9654c8d5b62..485ed8c7d623 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -943,8 +943,7 @@ struct drbd_conf {
943 struct drbd_work resync_work, 943 struct drbd_work resync_work,
944 unplug_work, 944 unplug_work,
945 md_sync_work, 945 md_sync_work,
946 delay_probe_work, 946 delay_probe_work;
947 uuid_work;
948 struct timer_list resync_timer; 947 struct timer_list resync_timer;
949 struct timer_list md_sync_timer; 948 struct timer_list md_sync_timer;
950 struct timer_list delay_probe_timer; 949 struct timer_list delay_probe_timer;
@@ -1069,7 +1068,6 @@ struct drbd_conf {
1069 struct timeval dps_time; /* delay-probes-start-time */ 1068 struct timeval dps_time; /* delay-probes-start-time */
1070 unsigned int dp_volume_last; /* send_cnt of last delay probe */ 1069 unsigned int dp_volume_last; /* send_cnt of last delay probe */
1071 int c_sync_rate; /* current resync rate after delay_probe magic */ 1070 int c_sync_rate; /* current resync rate after delay_probe magic */
1072 atomic_t new_c_uuid;
1073}; 1071};
1074 1072
1075static inline struct drbd_conf *minor_to_mdev(unsigned int minor) 1073static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
@@ -1476,7 +1474,6 @@ extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int);
1476extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int); 1474extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int);
1477extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int); 1475extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int);
1478extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int); 1476extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int);
1479extern int w_io_error(struct drbd_conf *, struct drbd_work *, int);
1480extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int); 1477extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int);
1481extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int); 1478extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int);
1482extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int); 1479extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int);
@@ -1542,7 +1539,7 @@ static inline void drbd_tcp_nodelay(struct socket *sock)
1542 1539
1543static inline void drbd_tcp_quickack(struct socket *sock) 1540static inline void drbd_tcp_quickack(struct socket *sock)
1544{ 1541{
1545 int __user val = 1; 1542 int __user val = 2;
1546 (void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK, 1543 (void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK,
1547 (char __user *)&val, sizeof(val)); 1544 (char __user *)&val, sizeof(val));
1548} 1545}
@@ -1728,7 +1725,7 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach,
1728 switch (mdev->ldev->dc.on_io_error) { 1725 switch (mdev->ldev->dc.on_io_error) {
1729 case EP_PASS_ON: 1726 case EP_PASS_ON:
1730 if (!forcedetach) { 1727 if (!forcedetach) {
1731 if (printk_ratelimit()) 1728 if (__ratelimit(&drbd_ratelimit_state))
1732 dev_err(DEV, "Local IO failed in %s." 1729 dev_err(DEV, "Local IO failed in %s."
1733 "Passing error on...\n", where); 1730 "Passing error on...\n", where);
1734 break; 1731 break;
@@ -2219,8 +2216,6 @@ static inline int __inc_ap_bio_cond(struct drbd_conf *mdev)
2219 return 0; 2216 return 0;
2220 if (test_bit(BITMAP_IO, &mdev->flags)) 2217 if (test_bit(BITMAP_IO, &mdev->flags))
2221 return 0; 2218 return 0;
2222 if (atomic_read(&mdev->new_c_uuid))
2223 return 0;
2224 return 1; 2219 return 1;
2225} 2220}
2226 2221
@@ -2241,9 +2236,6 @@ static inline void inc_ap_bio(struct drbd_conf *mdev, int count)
2241 * to avoid races with the reconnect code, 2236 * to avoid races with the reconnect code,
2242 * we need to atomic_inc within the spinlock. */ 2237 * we need to atomic_inc within the spinlock. */
2243 2238
2244 if (atomic_read(&mdev->new_c_uuid) && atomic_add_unless(&mdev->new_c_uuid, -1, 1))
2245 drbd_queue_work_front(&mdev->data.work, &mdev->uuid_work);
2246
2247 spin_lock_irq(&mdev->req_lock); 2239 spin_lock_irq(&mdev->req_lock);
2248 while (!__inc_ap_bio_cond(mdev)) { 2240 while (!__inc_ap_bio_cond(mdev)) {
2249 prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE); 2241 prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index be2d2da9cdba..6b077f93acc6 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1215,18 +1215,17 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1215 ns.pdsk == D_OUTDATED)) { 1215 ns.pdsk == D_OUTDATED)) {
1216 if (get_ldev(mdev)) { 1216 if (get_ldev(mdev)) {
1217 if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && 1217 if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
1218 mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE && 1218 mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
1219 !atomic_read(&mdev->new_c_uuid)) 1219 drbd_uuid_new_current(mdev);
1220 atomic_set(&mdev->new_c_uuid, 2); 1220 drbd_send_uuids(mdev);
1221 }
1221 put_ldev(mdev); 1222 put_ldev(mdev);
1222 } 1223 }
1223 } 1224 }
1224 1225
1225 if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { 1226 if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) {
1226 /* Diskless peer becomes primary or got connected do diskless, primary peer. */ 1227 if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0)
1227 if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0 && 1228 drbd_uuid_new_current(mdev);
1228 !atomic_read(&mdev->new_c_uuid))
1229 atomic_set(&mdev->new_c_uuid, 2);
1230 1229
1231 /* D_DISKLESS Peer becomes secondary */ 1230 /* D_DISKLESS Peer becomes secondary */
1232 if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) 1231 if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
@@ -1350,24 +1349,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1350 drbd_md_sync(mdev); 1349 drbd_md_sync(mdev);
1351} 1350}
1352 1351
1353static int w_new_current_uuid(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1354{
1355 if (get_ldev(mdev)) {
1356 if (mdev->ldev->md.uuid[UI_BITMAP] == 0) {
1357 drbd_uuid_new_current(mdev);
1358 if (get_net_conf(mdev)) {
1359 drbd_send_uuids(mdev);
1360 put_net_conf(mdev);
1361 }
1362 drbd_md_sync(mdev);
1363 }
1364 put_ldev(mdev);
1365 }
1366 atomic_dec(&mdev->new_c_uuid);
1367 wake_up(&mdev->misc_wait);
1368
1369 return 1;
1370}
1371 1352
1372static int drbd_thread_setup(void *arg) 1353static int drbd_thread_setup(void *arg)
1373{ 1354{
@@ -2291,9 +2272,9 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *
2291 * with page_count == 0 or PageSlab. 2272 * with page_count == 0 or PageSlab.
2292 */ 2273 */
2293static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, 2274static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
2294 int offset, size_t size) 2275 int offset, size_t size, unsigned msg_flags)
2295{ 2276{
2296 int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, 0); 2277 int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, msg_flags);
2297 kunmap(page); 2278 kunmap(page);
2298 if (sent == size) 2279 if (sent == size)
2299 mdev->send_cnt += size>>9; 2280 mdev->send_cnt += size>>9;
@@ -2301,7 +2282,7 @@ static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
2301} 2282}
2302 2283
2303static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, 2284static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
2304 int offset, size_t size) 2285 int offset, size_t size, unsigned msg_flags)
2305{ 2286{
2306 mm_segment_t oldfs = get_fs(); 2287 mm_segment_t oldfs = get_fs();
2307 int sent, ok; 2288 int sent, ok;
@@ -2314,14 +2295,15 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
2314 * __page_cache_release a page that would actually still be referenced 2295 * __page_cache_release a page that would actually still be referenced
2315 * by someone, leading to some obscure delayed Oops somewhere else. */ 2296 * by someone, leading to some obscure delayed Oops somewhere else. */
2316 if (disable_sendpage || (page_count(page) < 1) || PageSlab(page)) 2297 if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
2317 return _drbd_no_send_page(mdev, page, offset, size); 2298 return _drbd_no_send_page(mdev, page, offset, size, msg_flags);
2318 2299
2300 msg_flags |= MSG_NOSIGNAL;
2319 drbd_update_congested(mdev); 2301 drbd_update_congested(mdev);
2320 set_fs(KERNEL_DS); 2302 set_fs(KERNEL_DS);
2321 do { 2303 do {
2322 sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page, 2304 sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page,
2323 offset, len, 2305 offset, len,
2324 MSG_NOSIGNAL); 2306 msg_flags);
2325 if (sent == -EAGAIN) { 2307 if (sent == -EAGAIN) {
2326 if (we_should_drop_the_connection(mdev, 2308 if (we_should_drop_the_connection(mdev,
2327 mdev->data.socket)) 2309 mdev->data.socket))
@@ -2350,9 +2332,11 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
2350{ 2332{
2351 struct bio_vec *bvec; 2333 struct bio_vec *bvec;
2352 int i; 2334 int i;
2335 /* hint all but last page with MSG_MORE */
2353 __bio_for_each_segment(bvec, bio, i, 0) { 2336 __bio_for_each_segment(bvec, bio, i, 0) {
2354 if (!_drbd_no_send_page(mdev, bvec->bv_page, 2337 if (!_drbd_no_send_page(mdev, bvec->bv_page,
2355 bvec->bv_offset, bvec->bv_len)) 2338 bvec->bv_offset, bvec->bv_len,
2339 i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
2356 return 0; 2340 return 0;
2357 } 2341 }
2358 return 1; 2342 return 1;
@@ -2362,12 +2346,13 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
2362{ 2346{
2363 struct bio_vec *bvec; 2347 struct bio_vec *bvec;
2364 int i; 2348 int i;
2349 /* hint all but last page with MSG_MORE */
2365 __bio_for_each_segment(bvec, bio, i, 0) { 2350 __bio_for_each_segment(bvec, bio, i, 0) {
2366 if (!_drbd_send_page(mdev, bvec->bv_page, 2351 if (!_drbd_send_page(mdev, bvec->bv_page,
2367 bvec->bv_offset, bvec->bv_len)) 2352 bvec->bv_offset, bvec->bv_len,
2353 i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
2368 return 0; 2354 return 0;
2369 } 2355 }
2370
2371 return 1; 2356 return 1;
2372} 2357}
2373 2358
@@ -2375,9 +2360,11 @@ static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e)
2375{ 2360{
2376 struct page *page = e->pages; 2361 struct page *page = e->pages;
2377 unsigned len = e->size; 2362 unsigned len = e->size;
2363 /* hint all but last page with MSG_MORE */
2378 page_chain_for_each(page) { 2364 page_chain_for_each(page) {
2379 unsigned l = min_t(unsigned, len, PAGE_SIZE); 2365 unsigned l = min_t(unsigned, len, PAGE_SIZE);
2380 if (!_drbd_send_page(mdev, page, 0, l)) 2366 if (!_drbd_send_page(mdev, page, 0, l,
2367 page_chain_next(page) ? MSG_MORE : 0))
2381 return 0; 2368 return 0;
2382 len -= l; 2369 len -= l;
2383 } 2370 }
@@ -2457,11 +2444,11 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
2457 p.dp_flags = cpu_to_be32(dp_flags); 2444 p.dp_flags = cpu_to_be32(dp_flags);
2458 set_bit(UNPLUG_REMOTE, &mdev->flags); 2445 set_bit(UNPLUG_REMOTE, &mdev->flags);
2459 ok = (sizeof(p) == 2446 ok = (sizeof(p) ==
2460 drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE)); 2447 drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0));
2461 if (ok && dgs) { 2448 if (ok && dgs) {
2462 dgb = mdev->int_dig_out; 2449 dgb = mdev->int_dig_out;
2463 drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); 2450 drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb);
2464 ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); 2451 ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
2465 } 2452 }
2466 if (ok) { 2453 if (ok) {
2467 if (mdev->net_conf->wire_protocol == DRBD_PROT_A) 2454 if (mdev->net_conf->wire_protocol == DRBD_PROT_A)
@@ -2510,11 +2497,11 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
2510 return 0; 2497 return 0;
2511 2498
2512 ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, 2499 ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p,
2513 sizeof(p), MSG_MORE); 2500 sizeof(p), dgs ? MSG_MORE : 0);
2514 if (ok && dgs) { 2501 if (ok && dgs) {
2515 dgb = mdev->int_dig_out; 2502 dgb = mdev->int_dig_out;
2516 drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb); 2503 drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb);
2517 ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); 2504 ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
2518 } 2505 }
2519 if (ok) 2506 if (ok)
2520 ok = _drbd_send_zc_ee(mdev, e); 2507 ok = _drbd_send_zc_ee(mdev, e);
@@ -2708,7 +2695,6 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
2708 atomic_set(&mdev->net_cnt, 0); 2695 atomic_set(&mdev->net_cnt, 0);
2709 atomic_set(&mdev->packet_seq, 0); 2696 atomic_set(&mdev->packet_seq, 0);
2710 atomic_set(&mdev->pp_in_use, 0); 2697 atomic_set(&mdev->pp_in_use, 0);
2711 atomic_set(&mdev->new_c_uuid, 0);
2712 2698
2713 mutex_init(&mdev->md_io_mutex); 2699 mutex_init(&mdev->md_io_mutex);
2714 mutex_init(&mdev->data.mutex); 2700 mutex_init(&mdev->data.mutex);
@@ -2739,14 +2725,12 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
2739 INIT_LIST_HEAD(&mdev->bm_io_work.w.list); 2725 INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
2740 INIT_LIST_HEAD(&mdev->delay_probes); 2726 INIT_LIST_HEAD(&mdev->delay_probes);
2741 INIT_LIST_HEAD(&mdev->delay_probe_work.list); 2727 INIT_LIST_HEAD(&mdev->delay_probe_work.list);
2742 INIT_LIST_HEAD(&mdev->uuid_work.list);
2743 2728
2744 mdev->resync_work.cb = w_resync_inactive; 2729 mdev->resync_work.cb = w_resync_inactive;
2745 mdev->unplug_work.cb = w_send_write_hint; 2730 mdev->unplug_work.cb = w_send_write_hint;
2746 mdev->md_sync_work.cb = w_md_sync; 2731 mdev->md_sync_work.cb = w_md_sync;
2747 mdev->bm_io_work.w.cb = w_bitmap_io; 2732 mdev->bm_io_work.w.cb = w_bitmap_io;
2748 mdev->delay_probe_work.cb = w_delay_probes; 2733 mdev->delay_probe_work.cb = w_delay_probes;
2749 mdev->uuid_work.cb = w_new_current_uuid;
2750 init_timer(&mdev->resync_timer); 2734 init_timer(&mdev->resync_timer);
2751 init_timer(&mdev->md_sync_timer); 2735 init_timer(&mdev->md_sync_timer);
2752 init_timer(&mdev->delay_probe_timer); 2736 init_timer(&mdev->delay_probe_timer);
@@ -3799,7 +3783,7 @@ _drbd_insert_fault(struct drbd_conf *mdev, unsigned int type)
3799 if (ret) { 3783 if (ret) {
3800 fault_count++; 3784 fault_count++;
3801 3785
3802 if (printk_ratelimit()) 3786 if (__ratelimit(&drbd_ratelimit_state))
3803 dev_warn(DEV, "***Simulating %s failure\n", 3787 dev_warn(DEV, "***Simulating %s failure\n",
3804 _drbd_fault_str(type)); 3788 _drbd_fault_str(type));
3805 } 3789 }
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index bc9ab7fb2cc7..dff48701b84d 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -42,7 +42,6 @@
42#include <linux/unistd.h> 42#include <linux/unistd.h>
43#include <linux/vmalloc.h> 43#include <linux/vmalloc.h>
44#include <linux/random.h> 44#include <linux/random.h>
45#include <linux/mm.h>
46#include <linux/string.h> 45#include <linux/string.h>
47#include <linux/scatterlist.h> 46#include <linux/scatterlist.h>
48#include "drbd_int.h" 47#include "drbd_int.h"
@@ -571,6 +570,25 @@ static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size)
571 return rv; 570 return rv;
572} 571}
573 572
573/* quoting tcp(7):
574 * On individual connections, the socket buffer size must be set prior to the
575 * listen(2) or connect(2) calls in order to have it take effect.
576 * This is our wrapper to do so.
577 */
578static void drbd_setbufsize(struct socket *sock, unsigned int snd,
579 unsigned int rcv)
580{
581 /* open coded SO_SNDBUF, SO_RCVBUF */
582 if (snd) {
583 sock->sk->sk_sndbuf = snd;
584 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
585 }
586 if (rcv) {
587 sock->sk->sk_rcvbuf = rcv;
588 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
589 }
590}
591
574static struct socket *drbd_try_connect(struct drbd_conf *mdev) 592static struct socket *drbd_try_connect(struct drbd_conf *mdev)
575{ 593{
576 const char *what; 594 const char *what;
@@ -592,6 +610,8 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev)
592 610
593 sock->sk->sk_rcvtimeo = 611 sock->sk->sk_rcvtimeo =
594 sock->sk->sk_sndtimeo = mdev->net_conf->try_connect_int*HZ; 612 sock->sk->sk_sndtimeo = mdev->net_conf->try_connect_int*HZ;
613 drbd_setbufsize(sock, mdev->net_conf->sndbuf_size,
614 mdev->net_conf->rcvbuf_size);
595 615
596 /* explicitly bind to the configured IP as source IP 616 /* explicitly bind to the configured IP as source IP
597 * for the outgoing connections. 617 * for the outgoing connections.
@@ -670,6 +690,8 @@ static struct socket *drbd_wait_for_connect(struct drbd_conf *mdev)
670 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */ 690 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
671 s_listen->sk->sk_rcvtimeo = timeo; 691 s_listen->sk->sk_rcvtimeo = timeo;
672 s_listen->sk->sk_sndtimeo = timeo; 692 s_listen->sk->sk_sndtimeo = timeo;
693 drbd_setbufsize(s_listen, mdev->net_conf->sndbuf_size,
694 mdev->net_conf->rcvbuf_size);
673 695
674 what = "bind before listen"; 696 what = "bind before listen";
675 err = s_listen->ops->bind(s_listen, 697 err = s_listen->ops->bind(s_listen,
@@ -856,16 +878,6 @@ retry:
856 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK; 878 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
857 msock->sk->sk_priority = TC_PRIO_INTERACTIVE; 879 msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
858 880
859 if (mdev->net_conf->sndbuf_size) {
860 sock->sk->sk_sndbuf = mdev->net_conf->sndbuf_size;
861 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
862 }
863
864 if (mdev->net_conf->rcvbuf_size) {
865 sock->sk->sk_rcvbuf = mdev->net_conf->rcvbuf_size;
866 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
867 }
868
869 /* NOT YET ... 881 /* NOT YET ...
870 * sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; 882 * sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10;
871 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; 883 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
@@ -1154,17 +1166,6 @@ int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
1154 unsigned n_bios = 0; 1166 unsigned n_bios = 0;
1155 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; 1167 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
1156 1168
1157 if (atomic_read(&mdev->new_c_uuid)) {
1158 if (atomic_add_unless(&mdev->new_c_uuid, -1, 1)) {
1159 drbd_uuid_new_current(mdev);
1160 drbd_md_sync(mdev);
1161
1162 atomic_dec(&mdev->new_c_uuid);
1163 wake_up(&mdev->misc_wait);
1164 }
1165 wait_event(mdev->misc_wait, !atomic_read(&mdev->new_c_uuid));
1166 }
1167
1168 /* In most cases, we will only need one bio. But in case the lower 1169 /* In most cases, we will only need one bio. But in case the lower
1169 * level restrictions happen to be different at this offset on this 1170 * level restrictions happen to be different at this offset on this
1170 * side than those of the sending peer, we may need to submit the 1171 * side than those of the sending peer, we may need to submit the
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 3397f11d0ba9..654f1ef5cbb0 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -102,32 +102,7 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const
102 } 102 }
103 } 103 }
104 104
105 /* if it was a local io error, we want to notify our 105 drbd_req_free(req);
106 * peer about that, and see if we need to
107 * detach the disk and stuff.
108 * to avoid allocating some special work
109 * struct, reuse the request. */
110
111 /* THINK
112 * why do we do this not when we detect the error,
113 * but delay it until it is "done", i.e. possibly
114 * until the next barrier ack? */
115
116 if (rw == WRITE &&
117 ((s & RQ_LOCAL_MASK) && !(s & RQ_LOCAL_OK))) {
118 if (!(req->w.list.next == LIST_POISON1 ||
119 list_empty(&req->w.list))) {
120 /* DEBUG ASSERT only; if this triggers, we
121 * probably corrupt the worker list here */
122 dev_err(DEV, "req->w.list.next = %p\n", req->w.list.next);
123 dev_err(DEV, "req->w.list.prev = %p\n", req->w.list.prev);
124 }
125 req->w.cb = w_io_error;
126 drbd_queue_work(&mdev->data.work, &req->w);
127 /* drbd_req_free() is done in w_io_error */
128 } else {
129 drbd_req_free(req);
130 }
131} 106}
132 107
133static void queue_barrier(struct drbd_conf *mdev) 108static void queue_barrier(struct drbd_conf *mdev)
@@ -453,9 +428,6 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
453 req->rq_state |= RQ_LOCAL_COMPLETED; 428 req->rq_state |= RQ_LOCAL_COMPLETED;
454 req->rq_state &= ~RQ_LOCAL_PENDING; 429 req->rq_state &= ~RQ_LOCAL_PENDING;
455 430
456 dev_alert(DEV, "Local WRITE failed sec=%llus size=%u\n",
457 (unsigned long long)req->sector, req->size);
458 /* and now: check how to handle local io error. */
459 __drbd_chk_io_error(mdev, FALSE); 431 __drbd_chk_io_error(mdev, FALSE);
460 _req_may_be_done(req, m); 432 _req_may_be_done(req, m);
461 put_ldev(mdev); 433 put_ldev(mdev);
@@ -475,22 +447,21 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
475 req->rq_state |= RQ_LOCAL_COMPLETED; 447 req->rq_state |= RQ_LOCAL_COMPLETED;
476 req->rq_state &= ~RQ_LOCAL_PENDING; 448 req->rq_state &= ~RQ_LOCAL_PENDING;
477 449
478 dev_alert(DEV, "Local READ failed sec=%llus size=%u\n",
479 (unsigned long long)req->sector, req->size);
480 /* _req_mod(req,to_be_send); oops, recursion... */
481 D_ASSERT(!(req->rq_state & RQ_NET_MASK)); 450 D_ASSERT(!(req->rq_state & RQ_NET_MASK));
482 req->rq_state |= RQ_NET_PENDING;
483 inc_ap_pending(mdev);
484 451
485 __drbd_chk_io_error(mdev, FALSE); 452 __drbd_chk_io_error(mdev, FALSE);
486 put_ldev(mdev); 453 put_ldev(mdev);
487 /* NOTE: if we have no connection,
488 * or know the peer has no good data either,
489 * then we don't actually need to "queue_for_net_read",
490 * but we do so anyways, since the drbd_io_error()
491 * and the potential state change to "Diskless"
492 * needs to be done from process context */
493 454
455 /* no point in retrying if there is no good remote data,
456 * or we have no connection. */
457 if (mdev->state.pdsk != D_UP_TO_DATE) {
458 _req_may_be_done(req, m);
459 break;
460 }
461
462 /* _req_mod(req,to_be_send); oops, recursion... */
463 req->rq_state |= RQ_NET_PENDING;
464 inc_ap_pending(mdev);
494 /* fall through: _req_mod(req,queue_for_net_read); */ 465 /* fall through: _req_mod(req,queue_for_net_read); */
495 466
496 case queue_for_net_read: 467 case queue_for_net_read:
@@ -600,6 +571,9 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
600 _req_may_be_done(req, m); 571 _req_may_be_done(req, m);
601 break; 572 break;
602 573
574 case read_retry_remote_canceled:
575 req->rq_state &= ~RQ_NET_QUEUED;
576 /* fall through, in case we raced with drbd_disconnect */
603 case connection_lost_while_pending: 577 case connection_lost_while_pending:
604 /* transfer log cleanup after connection loss */ 578 /* transfer log cleanup after connection loss */
605 /* assert something? */ 579 /* assert something? */
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 16119d7056cc..02d575d24518 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -91,6 +91,7 @@ enum drbd_req_event {
91 send_failed, 91 send_failed,
92 handed_over_to_network, 92 handed_over_to_network,
93 connection_lost_while_pending, 93 connection_lost_while_pending,
94 read_retry_remote_canceled,
94 recv_acked_by_peer, 95 recv_acked_by_peer,
95 write_acked_by_peer, 96 write_acked_by_peer,
96 write_acked_by_peer_and_sis, /* and set_in_sync */ 97 write_acked_by_peer_and_sis, /* and set_in_sync */
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 727ff6339754..b623ceee2a4a 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -224,9 +224,6 @@ void drbd_endio_pri(struct bio *bio, int error)
224 enum drbd_req_event what; 224 enum drbd_req_event what;
225 int uptodate = bio_flagged(bio, BIO_UPTODATE); 225 int uptodate = bio_flagged(bio, BIO_UPTODATE);
226 226
227 if (error)
228 dev_warn(DEV, "p %s: error=%d\n",
229 bio_data_dir(bio) == WRITE ? "write" : "read", error);
230 if (!error && !uptodate) { 227 if (!error && !uptodate) {
231 dev_warn(DEV, "p %s: setting error to -EIO\n", 228 dev_warn(DEV, "p %s: setting error to -EIO\n",
232 bio_data_dir(bio) == WRITE ? "write" : "read"); 229 bio_data_dir(bio) == WRITE ? "write" : "read");
@@ -257,20 +254,6 @@ void drbd_endio_pri(struct bio *bio, int error)
257 complete_master_bio(mdev, &m); 254 complete_master_bio(mdev, &m);
258} 255}
259 256
260int w_io_error(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
261{
262 struct drbd_request *req = container_of(w, struct drbd_request, w);
263
264 /* NOTE: mdev->ldev can be NULL by the time we get here! */
265 /* D_ASSERT(mdev->ldev->dc.on_io_error != EP_PASS_ON); */
266
267 /* the only way this callback is scheduled is from _req_may_be_done,
268 * when it is done and had a local write error, see comments there */
269 drbd_req_free(req);
270
271 return TRUE;
272}
273
274int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 257int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
275{ 258{
276 struct drbd_request *req = container_of(w, struct drbd_request, w); 259 struct drbd_request *req = container_of(w, struct drbd_request, w);
@@ -280,12 +263,9 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
280 * to give the disk the chance to relocate that block */ 263 * to give the disk the chance to relocate that block */
281 264
282 spin_lock_irq(&mdev->req_lock); 265 spin_lock_irq(&mdev->req_lock);
283 if (cancel || 266 if (cancel || mdev->state.pdsk != D_UP_TO_DATE) {
284 mdev->state.conn < C_CONNECTED || 267 _req_mod(req, read_retry_remote_canceled);
285 mdev->state.pdsk <= D_INCONSISTENT) {
286 _req_mod(req, send_canceled);
287 spin_unlock_irq(&mdev->req_lock); 268 spin_unlock_irq(&mdev->req_lock);
288 dev_alert(DEV, "WE ARE LOST. Local IO failure, no peer.\n");
289 return 1; 269 return 1;
290 } 270 }
291 spin_unlock_irq(&mdev->req_lock); 271 spin_unlock_irq(&mdev->req_lock);