diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-06-04 18:37:44 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-06-04 18:37:44 -0400 |
commit | d2dd328b7f7bc6cebe167648289337755944ad2a (patch) | |
tree | 5d664a2db1ac209f7537452ddc02597972f7aa37 /drivers/block | |
parent | c1518f12bab97a6d409a25aaccb02dc8895800f3 (diff) | |
parent | 1abec4fdbb142e3ccb6ce99832fae42129134a96 (diff) |
Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block
* 'for-linus' of git://git.kernel.dk/linux-2.6-block: (27 commits)
block: make blk_init_free_list and elevator_init idempotent
block: avoid unconditionally freeing previously allocated request_queue
pipe: change /proc/sys/fs/pipe-max-pages to byte sized interface
pipe: change the privilege required for growing a pipe beyond system max
pipe: adjust minimum pipe size to 1 page
block: disable preemption before using sched_clock()
cciss: call BUG() earlier
Preparing 8.3.8rc2
drbd: Reduce verbosity
drbd: use drbd specific ratelimit instead of global printk_ratelimit
drbd: fix hang on local read errors while disconnected
drbd: Removed the now empty w_io_error() function
drbd: removed duplicated #includes
drbd: improve usage of MSG_MORE
drbd: need to set socket bufsize early to take effect
drbd: improve network latency, TCP_QUICKACK
drbd: Revert "drbd: Create new current UUID as late as possible"
brd: support discard
Revert "writeback: fix WB_SYNC_NONE writeback from umount"
Revert "writeback: ensure that WB_SYNC_NONE writeback with sb pinned is sync"
...
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/brd.c | 53 | ||||
-rw-r--r-- | drivers/block/cciss_scsi.c | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 14 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 68 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 45 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 54 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.h | 1 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_worker.c | 24 |
8 files changed, 122 insertions, 139 deletions
diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 6081e81d5738..f1bf79d9bc0a 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c | |||
@@ -133,6 +133,28 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector) | |||
133 | return page; | 133 | return page; |
134 | } | 134 | } |
135 | 135 | ||
136 | static void brd_free_page(struct brd_device *brd, sector_t sector) | ||
137 | { | ||
138 | struct page *page; | ||
139 | pgoff_t idx; | ||
140 | |||
141 | spin_lock(&brd->brd_lock); | ||
142 | idx = sector >> PAGE_SECTORS_SHIFT; | ||
143 | page = radix_tree_delete(&brd->brd_pages, idx); | ||
144 | spin_unlock(&brd->brd_lock); | ||
145 | if (page) | ||
146 | __free_page(page); | ||
147 | } | ||
148 | |||
149 | static void brd_zero_page(struct brd_device *brd, sector_t sector) | ||
150 | { | ||
151 | struct page *page; | ||
152 | |||
153 | page = brd_lookup_page(brd, sector); | ||
154 | if (page) | ||
155 | clear_highpage(page); | ||
156 | } | ||
157 | |||
136 | /* | 158 | /* |
137 | * Free all backing store pages and radix tree. This must only be called when | 159 | * Free all backing store pages and radix tree. This must only be called when |
138 | * there are no other users of the device. | 160 | * there are no other users of the device. |
@@ -189,6 +211,24 @@ static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n) | |||
189 | return 0; | 211 | return 0; |
190 | } | 212 | } |
191 | 213 | ||
214 | static void discard_from_brd(struct brd_device *brd, | ||
215 | sector_t sector, size_t n) | ||
216 | { | ||
217 | while (n >= PAGE_SIZE) { | ||
218 | /* | ||
219 | * Don't want to actually discard pages here because | ||
220 | * re-allocating the pages can result in writeback | ||
221 | * deadlocks under heavy load. | ||
222 | */ | ||
223 | if (0) | ||
224 | brd_free_page(brd, sector); | ||
225 | else | ||
226 | brd_zero_page(brd, sector); | ||
227 | sector += PAGE_SIZE >> SECTOR_SHIFT; | ||
228 | n -= PAGE_SIZE; | ||
229 | } | ||
230 | } | ||
231 | |||
192 | /* | 232 | /* |
193 | * Copy n bytes from src to the brd starting at sector. Does not sleep. | 233 | * Copy n bytes from src to the brd starting at sector. Does not sleep. |
194 | */ | 234 | */ |
@@ -300,6 +340,12 @@ static int brd_make_request(struct request_queue *q, struct bio *bio) | |||
300 | get_capacity(bdev->bd_disk)) | 340 | get_capacity(bdev->bd_disk)) |
301 | goto out; | 341 | goto out; |
302 | 342 | ||
343 | if (unlikely(bio_rw_flagged(bio, BIO_RW_DISCARD))) { | ||
344 | err = 0; | ||
345 | discard_from_brd(brd, sector, bio->bi_size); | ||
346 | goto out; | ||
347 | } | ||
348 | |||
303 | rw = bio_rw(bio); | 349 | rw = bio_rw(bio); |
304 | if (rw == READA) | 350 | if (rw == READA) |
305 | rw = READ; | 351 | rw = READ; |
@@ -320,7 +366,7 @@ out: | |||
320 | } | 366 | } |
321 | 367 | ||
322 | #ifdef CONFIG_BLK_DEV_XIP | 368 | #ifdef CONFIG_BLK_DEV_XIP |
323 | static int brd_direct_access (struct block_device *bdev, sector_t sector, | 369 | static int brd_direct_access(struct block_device *bdev, sector_t sector, |
324 | void **kaddr, unsigned long *pfn) | 370 | void **kaddr, unsigned long *pfn) |
325 | { | 371 | { |
326 | struct brd_device *brd = bdev->bd_disk->private_data; | 372 | struct brd_device *brd = bdev->bd_disk->private_data; |
@@ -437,6 +483,11 @@ static struct brd_device *brd_alloc(int i) | |||
437 | blk_queue_max_hw_sectors(brd->brd_queue, 1024); | 483 | blk_queue_max_hw_sectors(brd->brd_queue, 1024); |
438 | blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); | 484 | blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); |
439 | 485 | ||
486 | brd->brd_queue->limits.discard_granularity = PAGE_SIZE; | ||
487 | brd->brd_queue->limits.max_discard_sectors = UINT_MAX; | ||
488 | brd->brd_queue->limits.discard_zeroes_data = 1; | ||
489 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue); | ||
490 | |||
440 | disk = brd->brd_disk = alloc_disk(1 << part_shift); | 491 | disk = brd->brd_disk = alloc_disk(1 << part_shift); |
441 | if (!disk) | 492 | if (!disk) |
442 | goto out_free_queue; | 493 | goto out_free_queue; |
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c index e1d0e2cfec72..3381505c8a6c 100644 --- a/drivers/block/cciss_scsi.c +++ b/drivers/block/cciss_scsi.c | |||
@@ -188,11 +188,11 @@ scsi_cmd_free(ctlr_info_t *h, CommandList_struct *cmd) | |||
188 | 188 | ||
189 | sa = h->scsi_ctlr; | 189 | sa = h->scsi_ctlr; |
190 | stk = &sa->cmd_stack; | 190 | stk = &sa->cmd_stack; |
191 | stk->top++; | ||
191 | if (stk->top >= CMD_STACK_SIZE) { | 192 | if (stk->top >= CMD_STACK_SIZE) { |
192 | printk("cciss: scsi_cmd_free called too many times.\n"); | 193 | printk("cciss: scsi_cmd_free called too many times.\n"); |
193 | BUG(); | 194 | BUG(); |
194 | } | 195 | } |
195 | stk->top++; | ||
196 | stk->elem[stk->top] = (struct cciss_scsi_cmd_stack_elem_t *) cmd; | 196 | stk->elem[stk->top] = (struct cciss_scsi_cmd_stack_elem_t *) cmd; |
197 | } | 197 | } |
198 | 198 | ||
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e9654c8d5b62..485ed8c7d623 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -943,8 +943,7 @@ struct drbd_conf { | |||
943 | struct drbd_work resync_work, | 943 | struct drbd_work resync_work, |
944 | unplug_work, | 944 | unplug_work, |
945 | md_sync_work, | 945 | md_sync_work, |
946 | delay_probe_work, | 946 | delay_probe_work; |
947 | uuid_work; | ||
948 | struct timer_list resync_timer; | 947 | struct timer_list resync_timer; |
949 | struct timer_list md_sync_timer; | 948 | struct timer_list md_sync_timer; |
950 | struct timer_list delay_probe_timer; | 949 | struct timer_list delay_probe_timer; |
@@ -1069,7 +1068,6 @@ struct drbd_conf { | |||
1069 | struct timeval dps_time; /* delay-probes-start-time */ | 1068 | struct timeval dps_time; /* delay-probes-start-time */ |
1070 | unsigned int dp_volume_last; /* send_cnt of last delay probe */ | 1069 | unsigned int dp_volume_last; /* send_cnt of last delay probe */ |
1071 | int c_sync_rate; /* current resync rate after delay_probe magic */ | 1070 | int c_sync_rate; /* current resync rate after delay_probe magic */ |
1072 | atomic_t new_c_uuid; | ||
1073 | }; | 1071 | }; |
1074 | 1072 | ||
1075 | static inline struct drbd_conf *minor_to_mdev(unsigned int minor) | 1073 | static inline struct drbd_conf *minor_to_mdev(unsigned int minor) |
@@ -1476,7 +1474,6 @@ extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int); | |||
1476 | extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int); | 1474 | extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int); |
1477 | extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int); | 1475 | extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int); |
1478 | extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int); | 1476 | extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int); |
1479 | extern int w_io_error(struct drbd_conf *, struct drbd_work *, int); | ||
1480 | extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int); | 1477 | extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int); |
1481 | extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int); | 1478 | extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int); |
1482 | extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int); | 1479 | extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int); |
@@ -1542,7 +1539,7 @@ static inline void drbd_tcp_nodelay(struct socket *sock) | |||
1542 | 1539 | ||
1543 | static inline void drbd_tcp_quickack(struct socket *sock) | 1540 | static inline void drbd_tcp_quickack(struct socket *sock) |
1544 | { | 1541 | { |
1545 | int __user val = 1; | 1542 | int __user val = 2; |
1546 | (void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK, | 1543 | (void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK, |
1547 | (char __user *)&val, sizeof(val)); | 1544 | (char __user *)&val, sizeof(val)); |
1548 | } | 1545 | } |
@@ -1728,7 +1725,7 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, | |||
1728 | switch (mdev->ldev->dc.on_io_error) { | 1725 | switch (mdev->ldev->dc.on_io_error) { |
1729 | case EP_PASS_ON: | 1726 | case EP_PASS_ON: |
1730 | if (!forcedetach) { | 1727 | if (!forcedetach) { |
1731 | if (printk_ratelimit()) | 1728 | if (__ratelimit(&drbd_ratelimit_state)) |
1732 | dev_err(DEV, "Local IO failed in %s." | 1729 | dev_err(DEV, "Local IO failed in %s." |
1733 | "Passing error on...\n", where); | 1730 | "Passing error on...\n", where); |
1734 | break; | 1731 | break; |
@@ -2219,8 +2216,6 @@ static inline int __inc_ap_bio_cond(struct drbd_conf *mdev) | |||
2219 | return 0; | 2216 | return 0; |
2220 | if (test_bit(BITMAP_IO, &mdev->flags)) | 2217 | if (test_bit(BITMAP_IO, &mdev->flags)) |
2221 | return 0; | 2218 | return 0; |
2222 | if (atomic_read(&mdev->new_c_uuid)) | ||
2223 | return 0; | ||
2224 | return 1; | 2219 | return 1; |
2225 | } | 2220 | } |
2226 | 2221 | ||
@@ -2241,9 +2236,6 @@ static inline void inc_ap_bio(struct drbd_conf *mdev, int count) | |||
2241 | * to avoid races with the reconnect code, | 2236 | * to avoid races with the reconnect code, |
2242 | * we need to atomic_inc within the spinlock. */ | 2237 | * we need to atomic_inc within the spinlock. */ |
2243 | 2238 | ||
2244 | if (atomic_read(&mdev->new_c_uuid) && atomic_add_unless(&mdev->new_c_uuid, -1, 1)) | ||
2245 | drbd_queue_work_front(&mdev->data.work, &mdev->uuid_work); | ||
2246 | |||
2247 | spin_lock_irq(&mdev->req_lock); | 2239 | spin_lock_irq(&mdev->req_lock); |
2248 | while (!__inc_ap_bio_cond(mdev)) { | 2240 | while (!__inc_ap_bio_cond(mdev)) { |
2249 | prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE); | 2241 | prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE); |
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index be2d2da9cdba..6b077f93acc6 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
@@ -1215,18 +1215,17 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1215 | ns.pdsk == D_OUTDATED)) { | 1215 | ns.pdsk == D_OUTDATED)) { |
1216 | if (get_ldev(mdev)) { | 1216 | if (get_ldev(mdev)) { |
1217 | if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && | 1217 | if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && |
1218 | mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE && | 1218 | mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { |
1219 | !atomic_read(&mdev->new_c_uuid)) | 1219 | drbd_uuid_new_current(mdev); |
1220 | atomic_set(&mdev->new_c_uuid, 2); | 1220 | drbd_send_uuids(mdev); |
1221 | } | ||
1221 | put_ldev(mdev); | 1222 | put_ldev(mdev); |
1222 | } | 1223 | } |
1223 | } | 1224 | } |
1224 | 1225 | ||
1225 | if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { | 1226 | if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { |
1226 | /* Diskless peer becomes primary or got connected do diskless, primary peer. */ | 1227 | if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) |
1227 | if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0 && | 1228 | drbd_uuid_new_current(mdev); |
1228 | !atomic_read(&mdev->new_c_uuid)) | ||
1229 | atomic_set(&mdev->new_c_uuid, 2); | ||
1230 | 1229 | ||
1231 | /* D_DISKLESS Peer becomes secondary */ | 1230 | /* D_DISKLESS Peer becomes secondary */ |
1232 | if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) | 1231 | if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) |
@@ -1350,24 +1349,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1350 | drbd_md_sync(mdev); | 1349 | drbd_md_sync(mdev); |
1351 | } | 1350 | } |
1352 | 1351 | ||
1353 | static int w_new_current_uuid(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | ||
1354 | { | ||
1355 | if (get_ldev(mdev)) { | ||
1356 | if (mdev->ldev->md.uuid[UI_BITMAP] == 0) { | ||
1357 | drbd_uuid_new_current(mdev); | ||
1358 | if (get_net_conf(mdev)) { | ||
1359 | drbd_send_uuids(mdev); | ||
1360 | put_net_conf(mdev); | ||
1361 | } | ||
1362 | drbd_md_sync(mdev); | ||
1363 | } | ||
1364 | put_ldev(mdev); | ||
1365 | } | ||
1366 | atomic_dec(&mdev->new_c_uuid); | ||
1367 | wake_up(&mdev->misc_wait); | ||
1368 | |||
1369 | return 1; | ||
1370 | } | ||
1371 | 1352 | ||
1372 | static int drbd_thread_setup(void *arg) | 1353 | static int drbd_thread_setup(void *arg) |
1373 | { | 1354 | { |
@@ -2291,9 +2272,9 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * | |||
2291 | * with page_count == 0 or PageSlab. | 2272 | * with page_count == 0 or PageSlab. |
2292 | */ | 2273 | */ |
2293 | static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, | 2274 | static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, |
2294 | int offset, size_t size) | 2275 | int offset, size_t size, unsigned msg_flags) |
2295 | { | 2276 | { |
2296 | int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, 0); | 2277 | int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, msg_flags); |
2297 | kunmap(page); | 2278 | kunmap(page); |
2298 | if (sent == size) | 2279 | if (sent == size) |
2299 | mdev->send_cnt += size>>9; | 2280 | mdev->send_cnt += size>>9; |
@@ -2301,7 +2282,7 @@ static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, | |||
2301 | } | 2282 | } |
2302 | 2283 | ||
2303 | static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, | 2284 | static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, |
2304 | int offset, size_t size) | 2285 | int offset, size_t size, unsigned msg_flags) |
2305 | { | 2286 | { |
2306 | mm_segment_t oldfs = get_fs(); | 2287 | mm_segment_t oldfs = get_fs(); |
2307 | int sent, ok; | 2288 | int sent, ok; |
@@ -2314,14 +2295,15 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, | |||
2314 | * __page_cache_release a page that would actually still be referenced | 2295 | * __page_cache_release a page that would actually still be referenced |
2315 | * by someone, leading to some obscure delayed Oops somewhere else. */ | 2296 | * by someone, leading to some obscure delayed Oops somewhere else. */ |
2316 | if (disable_sendpage || (page_count(page) < 1) || PageSlab(page)) | 2297 | if (disable_sendpage || (page_count(page) < 1) || PageSlab(page)) |
2317 | return _drbd_no_send_page(mdev, page, offset, size); | 2298 | return _drbd_no_send_page(mdev, page, offset, size, msg_flags); |
2318 | 2299 | ||
2300 | msg_flags |= MSG_NOSIGNAL; | ||
2319 | drbd_update_congested(mdev); | 2301 | drbd_update_congested(mdev); |
2320 | set_fs(KERNEL_DS); | 2302 | set_fs(KERNEL_DS); |
2321 | do { | 2303 | do { |
2322 | sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page, | 2304 | sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page, |
2323 | offset, len, | 2305 | offset, len, |
2324 | MSG_NOSIGNAL); | 2306 | msg_flags); |
2325 | if (sent == -EAGAIN) { | 2307 | if (sent == -EAGAIN) { |
2326 | if (we_should_drop_the_connection(mdev, | 2308 | if (we_should_drop_the_connection(mdev, |
2327 | mdev->data.socket)) | 2309 | mdev->data.socket)) |
@@ -2350,9 +2332,11 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio) | |||
2350 | { | 2332 | { |
2351 | struct bio_vec *bvec; | 2333 | struct bio_vec *bvec; |
2352 | int i; | 2334 | int i; |
2335 | /* hint all but last page with MSG_MORE */ | ||
2353 | __bio_for_each_segment(bvec, bio, i, 0) { | 2336 | __bio_for_each_segment(bvec, bio, i, 0) { |
2354 | if (!_drbd_no_send_page(mdev, bvec->bv_page, | 2337 | if (!_drbd_no_send_page(mdev, bvec->bv_page, |
2355 | bvec->bv_offset, bvec->bv_len)) | 2338 | bvec->bv_offset, bvec->bv_len, |
2339 | i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) | ||
2356 | return 0; | 2340 | return 0; |
2357 | } | 2341 | } |
2358 | return 1; | 2342 | return 1; |
@@ -2362,12 +2346,13 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) | |||
2362 | { | 2346 | { |
2363 | struct bio_vec *bvec; | 2347 | struct bio_vec *bvec; |
2364 | int i; | 2348 | int i; |
2349 | /* hint all but last page with MSG_MORE */ | ||
2365 | __bio_for_each_segment(bvec, bio, i, 0) { | 2350 | __bio_for_each_segment(bvec, bio, i, 0) { |
2366 | if (!_drbd_send_page(mdev, bvec->bv_page, | 2351 | if (!_drbd_send_page(mdev, bvec->bv_page, |
2367 | bvec->bv_offset, bvec->bv_len)) | 2352 | bvec->bv_offset, bvec->bv_len, |
2353 | i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) | ||
2368 | return 0; | 2354 | return 0; |
2369 | } | 2355 | } |
2370 | |||
2371 | return 1; | 2356 | return 1; |
2372 | } | 2357 | } |
2373 | 2358 | ||
@@ -2375,9 +2360,11 @@ static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e) | |||
2375 | { | 2360 | { |
2376 | struct page *page = e->pages; | 2361 | struct page *page = e->pages; |
2377 | unsigned len = e->size; | 2362 | unsigned len = e->size; |
2363 | /* hint all but last page with MSG_MORE */ | ||
2378 | page_chain_for_each(page) { | 2364 | page_chain_for_each(page) { |
2379 | unsigned l = min_t(unsigned, len, PAGE_SIZE); | 2365 | unsigned l = min_t(unsigned, len, PAGE_SIZE); |
2380 | if (!_drbd_send_page(mdev, page, 0, l)) | 2366 | if (!_drbd_send_page(mdev, page, 0, l, |
2367 | page_chain_next(page) ? MSG_MORE : 0)) | ||
2381 | return 0; | 2368 | return 0; |
2382 | len -= l; | 2369 | len -= l; |
2383 | } | 2370 | } |
@@ -2457,11 +2444,11 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) | |||
2457 | p.dp_flags = cpu_to_be32(dp_flags); | 2444 | p.dp_flags = cpu_to_be32(dp_flags); |
2458 | set_bit(UNPLUG_REMOTE, &mdev->flags); | 2445 | set_bit(UNPLUG_REMOTE, &mdev->flags); |
2459 | ok = (sizeof(p) == | 2446 | ok = (sizeof(p) == |
2460 | drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE)); | 2447 | drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0)); |
2461 | if (ok && dgs) { | 2448 | if (ok && dgs) { |
2462 | dgb = mdev->int_dig_out; | 2449 | dgb = mdev->int_dig_out; |
2463 | drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); | 2450 | drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); |
2464 | ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); | 2451 | ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0); |
2465 | } | 2452 | } |
2466 | if (ok) { | 2453 | if (ok) { |
2467 | if (mdev->net_conf->wire_protocol == DRBD_PROT_A) | 2454 | if (mdev->net_conf->wire_protocol == DRBD_PROT_A) |
@@ -2510,11 +2497,11 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, | |||
2510 | return 0; | 2497 | return 0; |
2511 | 2498 | ||
2512 | ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, | 2499 | ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, |
2513 | sizeof(p), MSG_MORE); | 2500 | sizeof(p), dgs ? MSG_MORE : 0); |
2514 | if (ok && dgs) { | 2501 | if (ok && dgs) { |
2515 | dgb = mdev->int_dig_out; | 2502 | dgb = mdev->int_dig_out; |
2516 | drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb); | 2503 | drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb); |
2517 | ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); | 2504 | ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0); |
2518 | } | 2505 | } |
2519 | if (ok) | 2506 | if (ok) |
2520 | ok = _drbd_send_zc_ee(mdev, e); | 2507 | ok = _drbd_send_zc_ee(mdev, e); |
@@ -2708,7 +2695,6 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) | |||
2708 | atomic_set(&mdev->net_cnt, 0); | 2695 | atomic_set(&mdev->net_cnt, 0); |
2709 | atomic_set(&mdev->packet_seq, 0); | 2696 | atomic_set(&mdev->packet_seq, 0); |
2710 | atomic_set(&mdev->pp_in_use, 0); | 2697 | atomic_set(&mdev->pp_in_use, 0); |
2711 | atomic_set(&mdev->new_c_uuid, 0); | ||
2712 | 2698 | ||
2713 | mutex_init(&mdev->md_io_mutex); | 2699 | mutex_init(&mdev->md_io_mutex); |
2714 | mutex_init(&mdev->data.mutex); | 2700 | mutex_init(&mdev->data.mutex); |
@@ -2739,14 +2725,12 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) | |||
2739 | INIT_LIST_HEAD(&mdev->bm_io_work.w.list); | 2725 | INIT_LIST_HEAD(&mdev->bm_io_work.w.list); |
2740 | INIT_LIST_HEAD(&mdev->delay_probes); | 2726 | INIT_LIST_HEAD(&mdev->delay_probes); |
2741 | INIT_LIST_HEAD(&mdev->delay_probe_work.list); | 2727 | INIT_LIST_HEAD(&mdev->delay_probe_work.list); |
2742 | INIT_LIST_HEAD(&mdev->uuid_work.list); | ||
2743 | 2728 | ||
2744 | mdev->resync_work.cb = w_resync_inactive; | 2729 | mdev->resync_work.cb = w_resync_inactive; |
2745 | mdev->unplug_work.cb = w_send_write_hint; | 2730 | mdev->unplug_work.cb = w_send_write_hint; |
2746 | mdev->md_sync_work.cb = w_md_sync; | 2731 | mdev->md_sync_work.cb = w_md_sync; |
2747 | mdev->bm_io_work.w.cb = w_bitmap_io; | 2732 | mdev->bm_io_work.w.cb = w_bitmap_io; |
2748 | mdev->delay_probe_work.cb = w_delay_probes; | 2733 | mdev->delay_probe_work.cb = w_delay_probes; |
2749 | mdev->uuid_work.cb = w_new_current_uuid; | ||
2750 | init_timer(&mdev->resync_timer); | 2734 | init_timer(&mdev->resync_timer); |
2751 | init_timer(&mdev->md_sync_timer); | 2735 | init_timer(&mdev->md_sync_timer); |
2752 | init_timer(&mdev->delay_probe_timer); | 2736 | init_timer(&mdev->delay_probe_timer); |
@@ -3799,7 +3783,7 @@ _drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) | |||
3799 | if (ret) { | 3783 | if (ret) { |
3800 | fault_count++; | 3784 | fault_count++; |
3801 | 3785 | ||
3802 | if (printk_ratelimit()) | 3786 | if (__ratelimit(&drbd_ratelimit_state)) |
3803 | dev_warn(DEV, "***Simulating %s failure\n", | 3787 | dev_warn(DEV, "***Simulating %s failure\n", |
3804 | _drbd_fault_str(type)); | 3788 | _drbd_fault_str(type)); |
3805 | } | 3789 | } |
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index bc9ab7fb2cc7..dff48701b84d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c | |||
@@ -42,7 +42,6 @@ | |||
42 | #include <linux/unistd.h> | 42 | #include <linux/unistd.h> |
43 | #include <linux/vmalloc.h> | 43 | #include <linux/vmalloc.h> |
44 | #include <linux/random.h> | 44 | #include <linux/random.h> |
45 | #include <linux/mm.h> | ||
46 | #include <linux/string.h> | 45 | #include <linux/string.h> |
47 | #include <linux/scatterlist.h> | 46 | #include <linux/scatterlist.h> |
48 | #include "drbd_int.h" | 47 | #include "drbd_int.h" |
@@ -571,6 +570,25 @@ static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) | |||
571 | return rv; | 570 | return rv; |
572 | } | 571 | } |
573 | 572 | ||
573 | /* quoting tcp(7): | ||
574 | * On individual connections, the socket buffer size must be set prior to the | ||
575 | * listen(2) or connect(2) calls in order to have it take effect. | ||
576 | * This is our wrapper to do so. | ||
577 | */ | ||
578 | static void drbd_setbufsize(struct socket *sock, unsigned int snd, | ||
579 | unsigned int rcv) | ||
580 | { | ||
581 | /* open coded SO_SNDBUF, SO_RCVBUF */ | ||
582 | if (snd) { | ||
583 | sock->sk->sk_sndbuf = snd; | ||
584 | sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK; | ||
585 | } | ||
586 | if (rcv) { | ||
587 | sock->sk->sk_rcvbuf = rcv; | ||
588 | sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK; | ||
589 | } | ||
590 | } | ||
591 | |||
574 | static struct socket *drbd_try_connect(struct drbd_conf *mdev) | 592 | static struct socket *drbd_try_connect(struct drbd_conf *mdev) |
575 | { | 593 | { |
576 | const char *what; | 594 | const char *what; |
@@ -592,6 +610,8 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev) | |||
592 | 610 | ||
593 | sock->sk->sk_rcvtimeo = | 611 | sock->sk->sk_rcvtimeo = |
594 | sock->sk->sk_sndtimeo = mdev->net_conf->try_connect_int*HZ; | 612 | sock->sk->sk_sndtimeo = mdev->net_conf->try_connect_int*HZ; |
613 | drbd_setbufsize(sock, mdev->net_conf->sndbuf_size, | ||
614 | mdev->net_conf->rcvbuf_size); | ||
595 | 615 | ||
596 | /* explicitly bind to the configured IP as source IP | 616 | /* explicitly bind to the configured IP as source IP |
597 | * for the outgoing connections. | 617 | * for the outgoing connections. |
@@ -670,6 +690,8 @@ static struct socket *drbd_wait_for_connect(struct drbd_conf *mdev) | |||
670 | s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */ | 690 | s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */ |
671 | s_listen->sk->sk_rcvtimeo = timeo; | 691 | s_listen->sk->sk_rcvtimeo = timeo; |
672 | s_listen->sk->sk_sndtimeo = timeo; | 692 | s_listen->sk->sk_sndtimeo = timeo; |
693 | drbd_setbufsize(s_listen, mdev->net_conf->sndbuf_size, | ||
694 | mdev->net_conf->rcvbuf_size); | ||
673 | 695 | ||
674 | what = "bind before listen"; | 696 | what = "bind before listen"; |
675 | err = s_listen->ops->bind(s_listen, | 697 | err = s_listen->ops->bind(s_listen, |
@@ -856,16 +878,6 @@ retry: | |||
856 | sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK; | 878 | sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK; |
857 | msock->sk->sk_priority = TC_PRIO_INTERACTIVE; | 879 | msock->sk->sk_priority = TC_PRIO_INTERACTIVE; |
858 | 880 | ||
859 | if (mdev->net_conf->sndbuf_size) { | ||
860 | sock->sk->sk_sndbuf = mdev->net_conf->sndbuf_size; | ||
861 | sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK; | ||
862 | } | ||
863 | |||
864 | if (mdev->net_conf->rcvbuf_size) { | ||
865 | sock->sk->sk_rcvbuf = mdev->net_conf->rcvbuf_size; | ||
866 | sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK; | ||
867 | } | ||
868 | |||
869 | /* NOT YET ... | 881 | /* NOT YET ... |
870 | * sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; | 882 | * sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; |
871 | * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; | 883 | * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; |
@@ -1154,17 +1166,6 @@ int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, | |||
1154 | unsigned n_bios = 0; | 1166 | unsigned n_bios = 0; |
1155 | unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; | 1167 | unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; |
1156 | 1168 | ||
1157 | if (atomic_read(&mdev->new_c_uuid)) { | ||
1158 | if (atomic_add_unless(&mdev->new_c_uuid, -1, 1)) { | ||
1159 | drbd_uuid_new_current(mdev); | ||
1160 | drbd_md_sync(mdev); | ||
1161 | |||
1162 | atomic_dec(&mdev->new_c_uuid); | ||
1163 | wake_up(&mdev->misc_wait); | ||
1164 | } | ||
1165 | wait_event(mdev->misc_wait, !atomic_read(&mdev->new_c_uuid)); | ||
1166 | } | ||
1167 | |||
1168 | /* In most cases, we will only need one bio. But in case the lower | 1169 | /* In most cases, we will only need one bio. But in case the lower |
1169 | * level restrictions happen to be different at this offset on this | 1170 | * level restrictions happen to be different at this offset on this |
1170 | * side than those of the sending peer, we may need to submit the | 1171 | * side than those of the sending peer, we may need to submit the |
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 3397f11d0ba9..654f1ef5cbb0 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c | |||
@@ -102,32 +102,7 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const | |||
102 | } | 102 | } |
103 | } | 103 | } |
104 | 104 | ||
105 | /* if it was a local io error, we want to notify our | 105 | drbd_req_free(req); |
106 | * peer about that, and see if we need to | ||
107 | * detach the disk and stuff. | ||
108 | * to avoid allocating some special work | ||
109 | * struct, reuse the request. */ | ||
110 | |||
111 | /* THINK | ||
112 | * why do we do this not when we detect the error, | ||
113 | * but delay it until it is "done", i.e. possibly | ||
114 | * until the next barrier ack? */ | ||
115 | |||
116 | if (rw == WRITE && | ||
117 | ((s & RQ_LOCAL_MASK) && !(s & RQ_LOCAL_OK))) { | ||
118 | if (!(req->w.list.next == LIST_POISON1 || | ||
119 | list_empty(&req->w.list))) { | ||
120 | /* DEBUG ASSERT only; if this triggers, we | ||
121 | * probably corrupt the worker list here */ | ||
122 | dev_err(DEV, "req->w.list.next = %p\n", req->w.list.next); | ||
123 | dev_err(DEV, "req->w.list.prev = %p\n", req->w.list.prev); | ||
124 | } | ||
125 | req->w.cb = w_io_error; | ||
126 | drbd_queue_work(&mdev->data.work, &req->w); | ||
127 | /* drbd_req_free() is done in w_io_error */ | ||
128 | } else { | ||
129 | drbd_req_free(req); | ||
130 | } | ||
131 | } | 106 | } |
132 | 107 | ||
133 | static void queue_barrier(struct drbd_conf *mdev) | 108 | static void queue_barrier(struct drbd_conf *mdev) |
@@ -453,9 +428,6 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
453 | req->rq_state |= RQ_LOCAL_COMPLETED; | 428 | req->rq_state |= RQ_LOCAL_COMPLETED; |
454 | req->rq_state &= ~RQ_LOCAL_PENDING; | 429 | req->rq_state &= ~RQ_LOCAL_PENDING; |
455 | 430 | ||
456 | dev_alert(DEV, "Local WRITE failed sec=%llus size=%u\n", | ||
457 | (unsigned long long)req->sector, req->size); | ||
458 | /* and now: check how to handle local io error. */ | ||
459 | __drbd_chk_io_error(mdev, FALSE); | 431 | __drbd_chk_io_error(mdev, FALSE); |
460 | _req_may_be_done(req, m); | 432 | _req_may_be_done(req, m); |
461 | put_ldev(mdev); | 433 | put_ldev(mdev); |
@@ -475,22 +447,21 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
475 | req->rq_state |= RQ_LOCAL_COMPLETED; | 447 | req->rq_state |= RQ_LOCAL_COMPLETED; |
476 | req->rq_state &= ~RQ_LOCAL_PENDING; | 448 | req->rq_state &= ~RQ_LOCAL_PENDING; |
477 | 449 | ||
478 | dev_alert(DEV, "Local READ failed sec=%llus size=%u\n", | ||
479 | (unsigned long long)req->sector, req->size); | ||
480 | /* _req_mod(req,to_be_send); oops, recursion... */ | ||
481 | D_ASSERT(!(req->rq_state & RQ_NET_MASK)); | 450 | D_ASSERT(!(req->rq_state & RQ_NET_MASK)); |
482 | req->rq_state |= RQ_NET_PENDING; | ||
483 | inc_ap_pending(mdev); | ||
484 | 451 | ||
485 | __drbd_chk_io_error(mdev, FALSE); | 452 | __drbd_chk_io_error(mdev, FALSE); |
486 | put_ldev(mdev); | 453 | put_ldev(mdev); |
487 | /* NOTE: if we have no connection, | ||
488 | * or know the peer has no good data either, | ||
489 | * then we don't actually need to "queue_for_net_read", | ||
490 | * but we do so anyways, since the drbd_io_error() | ||
491 | * and the potential state change to "Diskless" | ||
492 | * needs to be done from process context */ | ||
493 | 454 | ||
455 | /* no point in retrying if there is no good remote data, | ||
456 | * or we have no connection. */ | ||
457 | if (mdev->state.pdsk != D_UP_TO_DATE) { | ||
458 | _req_may_be_done(req, m); | ||
459 | break; | ||
460 | } | ||
461 | |||
462 | /* _req_mod(req,to_be_send); oops, recursion... */ | ||
463 | req->rq_state |= RQ_NET_PENDING; | ||
464 | inc_ap_pending(mdev); | ||
494 | /* fall through: _req_mod(req,queue_for_net_read); */ | 465 | /* fall through: _req_mod(req,queue_for_net_read); */ |
495 | 466 | ||
496 | case queue_for_net_read: | 467 | case queue_for_net_read: |
@@ -600,6 +571,9 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
600 | _req_may_be_done(req, m); | 571 | _req_may_be_done(req, m); |
601 | break; | 572 | break; |
602 | 573 | ||
574 | case read_retry_remote_canceled: | ||
575 | req->rq_state &= ~RQ_NET_QUEUED; | ||
576 | /* fall through, in case we raced with drbd_disconnect */ | ||
603 | case connection_lost_while_pending: | 577 | case connection_lost_while_pending: |
604 | /* transfer log cleanup after connection loss */ | 578 | /* transfer log cleanup after connection loss */ |
605 | /* assert something? */ | 579 | /* assert something? */ |
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 16119d7056cc..02d575d24518 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h | |||
@@ -91,6 +91,7 @@ enum drbd_req_event { | |||
91 | send_failed, | 91 | send_failed, |
92 | handed_over_to_network, | 92 | handed_over_to_network, |
93 | connection_lost_while_pending, | 93 | connection_lost_while_pending, |
94 | read_retry_remote_canceled, | ||
94 | recv_acked_by_peer, | 95 | recv_acked_by_peer, |
95 | write_acked_by_peer, | 96 | write_acked_by_peer, |
96 | write_acked_by_peer_and_sis, /* and set_in_sync */ | 97 | write_acked_by_peer_and_sis, /* and set_in_sync */ |
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 727ff6339754..b623ceee2a4a 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c | |||
@@ -224,9 +224,6 @@ void drbd_endio_pri(struct bio *bio, int error) | |||
224 | enum drbd_req_event what; | 224 | enum drbd_req_event what; |
225 | int uptodate = bio_flagged(bio, BIO_UPTODATE); | 225 | int uptodate = bio_flagged(bio, BIO_UPTODATE); |
226 | 226 | ||
227 | if (error) | ||
228 | dev_warn(DEV, "p %s: error=%d\n", | ||
229 | bio_data_dir(bio) == WRITE ? "write" : "read", error); | ||
230 | if (!error && !uptodate) { | 227 | if (!error && !uptodate) { |
231 | dev_warn(DEV, "p %s: setting error to -EIO\n", | 228 | dev_warn(DEV, "p %s: setting error to -EIO\n", |
232 | bio_data_dir(bio) == WRITE ? "write" : "read"); | 229 | bio_data_dir(bio) == WRITE ? "write" : "read"); |
@@ -257,20 +254,6 @@ void drbd_endio_pri(struct bio *bio, int error) | |||
257 | complete_master_bio(mdev, &m); | 254 | complete_master_bio(mdev, &m); |
258 | } | 255 | } |
259 | 256 | ||
260 | int w_io_error(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | ||
261 | { | ||
262 | struct drbd_request *req = container_of(w, struct drbd_request, w); | ||
263 | |||
264 | /* NOTE: mdev->ldev can be NULL by the time we get here! */ | ||
265 | /* D_ASSERT(mdev->ldev->dc.on_io_error != EP_PASS_ON); */ | ||
266 | |||
267 | /* the only way this callback is scheduled is from _req_may_be_done, | ||
268 | * when it is done and had a local write error, see comments there */ | ||
269 | drbd_req_free(req); | ||
270 | |||
271 | return TRUE; | ||
272 | } | ||
273 | |||
274 | int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | 257 | int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) |
275 | { | 258 | { |
276 | struct drbd_request *req = container_of(w, struct drbd_request, w); | 259 | struct drbd_request *req = container_of(w, struct drbd_request, w); |
@@ -280,12 +263,9 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
280 | * to give the disk the chance to relocate that block */ | 263 | * to give the disk the chance to relocate that block */ |
281 | 264 | ||
282 | spin_lock_irq(&mdev->req_lock); | 265 | spin_lock_irq(&mdev->req_lock); |
283 | if (cancel || | 266 | if (cancel || mdev->state.pdsk != D_UP_TO_DATE) { |
284 | mdev->state.conn < C_CONNECTED || | 267 | _req_mod(req, read_retry_remote_canceled); |
285 | mdev->state.pdsk <= D_INCONSISTENT) { | ||
286 | _req_mod(req, send_canceled); | ||
287 | spin_unlock_irq(&mdev->req_lock); | 268 | spin_unlock_irq(&mdev->req_lock); |
288 | dev_alert(DEV, "WE ARE LOST. Local IO failure, no peer.\n"); | ||
289 | return 1; | 269 | return 1; |
290 | } | 270 | } |
291 | spin_unlock_irq(&mdev->req_lock); | 271 | spin_unlock_irq(&mdev->req_lock); |