aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorPaul Mundt <lethal@linux-sh.org>2010-07-05 02:46:08 -0400
committerPaul Mundt <lethal@linux-sh.org>2010-07-05 02:46:08 -0400
commit285eba57db7bd7d7c3c5929fb8621fdcaaea1b00 (patch)
treea9e7f0563cef296b24c53b20dbb388ec5c210172 /drivers/block
parent1c14e6cecb1811543b1016f27e5d308fbea8c08a (diff)
parent815c4163b6c8ebf8152f42b0a5fd015cfdcedc78 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts: include/linux/serial_sci.h Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/brd.c53
-rw-r--r--drivers/block/cciss_scsi.c3
-rw-r--r--drivers/block/cpqarray.c6
-rw-r--r--drivers/block/drbd/drbd_int.h14
-rw-r--r--drivers/block/drbd/drbd_main.c70
-rw-r--r--drivers/block/drbd/drbd_nl.c6
-rw-r--r--drivers/block/drbd/drbd_receiver.c45
-rw-r--r--drivers/block/drbd/drbd_req.c54
-rw-r--r--drivers/block/drbd/drbd_req.h1
-rw-r--r--drivers/block/drbd/drbd_worker.c24
-rw-r--r--drivers/block/swim3.c6
-rw-r--r--drivers/block/virtio_blk.c4
12 files changed, 139 insertions, 147 deletions
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 6081e81d5738..f1bf79d9bc0a 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -133,6 +133,28 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
133 return page; 133 return page;
134} 134}
135 135
136static void brd_free_page(struct brd_device *brd, sector_t sector)
137{
138 struct page *page;
139 pgoff_t idx;
140
141 spin_lock(&brd->brd_lock);
142 idx = sector >> PAGE_SECTORS_SHIFT;
143 page = radix_tree_delete(&brd->brd_pages, idx);
144 spin_unlock(&brd->brd_lock);
145 if (page)
146 __free_page(page);
147}
148
149static void brd_zero_page(struct brd_device *brd, sector_t sector)
150{
151 struct page *page;
152
153 page = brd_lookup_page(brd, sector);
154 if (page)
155 clear_highpage(page);
156}
157
136/* 158/*
137 * Free all backing store pages and radix tree. This must only be called when 159 * Free all backing store pages and radix tree. This must only be called when
138 * there are no other users of the device. 160 * there are no other users of the device.
@@ -189,6 +211,24 @@ static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n)
189 return 0; 211 return 0;
190} 212}
191 213
214static void discard_from_brd(struct brd_device *brd,
215 sector_t sector, size_t n)
216{
217 while (n >= PAGE_SIZE) {
218 /*
219 * Don't want to actually discard pages here because
220 * re-allocating the pages can result in writeback
221 * deadlocks under heavy load.
222 */
223 if (0)
224 brd_free_page(brd, sector);
225 else
226 brd_zero_page(brd, sector);
227 sector += PAGE_SIZE >> SECTOR_SHIFT;
228 n -= PAGE_SIZE;
229 }
230}
231
192/* 232/*
193 * Copy n bytes from src to the brd starting at sector. Does not sleep. 233 * Copy n bytes from src to the brd starting at sector. Does not sleep.
194 */ 234 */
@@ -300,6 +340,12 @@ static int brd_make_request(struct request_queue *q, struct bio *bio)
300 get_capacity(bdev->bd_disk)) 340 get_capacity(bdev->bd_disk))
301 goto out; 341 goto out;
302 342
343 if (unlikely(bio_rw_flagged(bio, BIO_RW_DISCARD))) {
344 err = 0;
345 discard_from_brd(brd, sector, bio->bi_size);
346 goto out;
347 }
348
303 rw = bio_rw(bio); 349 rw = bio_rw(bio);
304 if (rw == READA) 350 if (rw == READA)
305 rw = READ; 351 rw = READ;
@@ -320,7 +366,7 @@ out:
320} 366}
321 367
322#ifdef CONFIG_BLK_DEV_XIP 368#ifdef CONFIG_BLK_DEV_XIP
323static int brd_direct_access (struct block_device *bdev, sector_t sector, 369static int brd_direct_access(struct block_device *bdev, sector_t sector,
324 void **kaddr, unsigned long *pfn) 370 void **kaddr, unsigned long *pfn)
325{ 371{
326 struct brd_device *brd = bdev->bd_disk->private_data; 372 struct brd_device *brd = bdev->bd_disk->private_data;
@@ -437,6 +483,11 @@ static struct brd_device *brd_alloc(int i)
437 blk_queue_max_hw_sectors(brd->brd_queue, 1024); 483 blk_queue_max_hw_sectors(brd->brd_queue, 1024);
438 blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); 484 blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
439 485
486 brd->brd_queue->limits.discard_granularity = PAGE_SIZE;
487 brd->brd_queue->limits.max_discard_sectors = UINT_MAX;
488 brd->brd_queue->limits.discard_zeroes_data = 1;
489 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue);
490
440 disk = brd->brd_disk = alloc_disk(1 << part_shift); 491 disk = brd->brd_disk = alloc_disk(1 << part_shift);
441 if (!disk) 492 if (!disk)
442 goto out_free_queue; 493 goto out_free_queue;
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index e1d0e2cfec72..72dae92f3cab 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -188,11 +188,11 @@ scsi_cmd_free(ctlr_info_t *h, CommandList_struct *cmd)
188 188
189 sa = h->scsi_ctlr; 189 sa = h->scsi_ctlr;
190 stk = &sa->cmd_stack; 190 stk = &sa->cmd_stack;
191 stk->top++;
191 if (stk->top >= CMD_STACK_SIZE) { 192 if (stk->top >= CMD_STACK_SIZE) {
192 printk("cciss: scsi_cmd_free called too many times.\n"); 193 printk("cciss: scsi_cmd_free called too many times.\n");
193 BUG(); 194 BUG();
194 } 195 }
195 stk->top++;
196 stk->elem[stk->top] = (struct cciss_scsi_cmd_stack_elem_t *) cmd; 196 stk->elem[stk->top] = (struct cciss_scsi_cmd_stack_elem_t *) cmd;
197} 197}
198 198
@@ -861,6 +861,7 @@ cciss_scsi_detect(int ctlr)
861 sh->n_io_port = 0; // I don't think we use these two... 861 sh->n_io_port = 0; // I don't think we use these two...
862 sh->this_id = SELF_SCSI_ID; 862 sh->this_id = SELF_SCSI_ID;
863 sh->sg_tablesize = hba[ctlr]->maxsgentries; 863 sh->sg_tablesize = hba[ctlr]->maxsgentries;
864 sh->max_cmd_len = MAX_COMMAND_SIZE;
864 865
865 ((struct cciss_scsi_adapter_data_t *) 866 ((struct cciss_scsi_adapter_data_t *)
866 hba[ctlr]->scsi_ctlr)->scsi_host = sh; 867 hba[ctlr]->scsi_ctlr)->scsi_host = sh;
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 91d11631cec9..abb4ec6690fc 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -386,7 +386,7 @@ static void __devexit cpqarray_remove_one_eisa (int i)
386} 386}
387 387
388/* pdev is NULL for eisa */ 388/* pdev is NULL for eisa */
389static int __init cpqarray_register_ctlr( int i, struct pci_dev *pdev) 389static int __devinit cpqarray_register_ctlr( int i, struct pci_dev *pdev)
390{ 390{
391 struct request_queue *q; 391 struct request_queue *q;
392 int j; 392 int j;
@@ -503,7 +503,7 @@ Enomem4:
503 return -1; 503 return -1;
504} 504}
505 505
506static int __init cpqarray_init_one( struct pci_dev *pdev, 506static int __devinit cpqarray_init_one( struct pci_dev *pdev,
507 const struct pci_device_id *ent) 507 const struct pci_device_id *ent)
508{ 508{
509 int i; 509 int i;
@@ -740,7 +740,7 @@ __setup("smart2=", cpqarray_setup);
740/* 740/*
741 * Find an EISA controller's signature. Set up an hba if we find it. 741 * Find an EISA controller's signature. Set up an hba if we find it.
742 */ 742 */
743static int __init cpqarray_eisa_detect(void) 743static int __devinit cpqarray_eisa_detect(void)
744{ 744{
745 int i=0, j; 745 int i=0, j;
746 __u32 board_id; 746 __u32 board_id;
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index e9654c8d5b62..485ed8c7d623 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -943,8 +943,7 @@ struct drbd_conf {
943 struct drbd_work resync_work, 943 struct drbd_work resync_work,
944 unplug_work, 944 unplug_work,
945 md_sync_work, 945 md_sync_work,
946 delay_probe_work, 946 delay_probe_work;
947 uuid_work;
948 struct timer_list resync_timer; 947 struct timer_list resync_timer;
949 struct timer_list md_sync_timer; 948 struct timer_list md_sync_timer;
950 struct timer_list delay_probe_timer; 949 struct timer_list delay_probe_timer;
@@ -1069,7 +1068,6 @@ struct drbd_conf {
1069 struct timeval dps_time; /* delay-probes-start-time */ 1068 struct timeval dps_time; /* delay-probes-start-time */
1070 unsigned int dp_volume_last; /* send_cnt of last delay probe */ 1069 unsigned int dp_volume_last; /* send_cnt of last delay probe */
1071 int c_sync_rate; /* current resync rate after delay_probe magic */ 1070 int c_sync_rate; /* current resync rate after delay_probe magic */
1072 atomic_t new_c_uuid;
1073}; 1071};
1074 1072
1075static inline struct drbd_conf *minor_to_mdev(unsigned int minor) 1073static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
@@ -1476,7 +1474,6 @@ extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int);
1476extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int); 1474extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int);
1477extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int); 1475extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int);
1478extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int); 1476extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int);
1479extern int w_io_error(struct drbd_conf *, struct drbd_work *, int);
1480extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int); 1477extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int);
1481extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int); 1478extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int);
1482extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int); 1479extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int);
@@ -1542,7 +1539,7 @@ static inline void drbd_tcp_nodelay(struct socket *sock)
1542 1539
1543static inline void drbd_tcp_quickack(struct socket *sock) 1540static inline void drbd_tcp_quickack(struct socket *sock)
1544{ 1541{
1545 int __user val = 1; 1542 int __user val = 2;
1546 (void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK, 1543 (void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK,
1547 (char __user *)&val, sizeof(val)); 1544 (char __user *)&val, sizeof(val));
1548} 1545}
@@ -1728,7 +1725,7 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach,
1728 switch (mdev->ldev->dc.on_io_error) { 1725 switch (mdev->ldev->dc.on_io_error) {
1729 case EP_PASS_ON: 1726 case EP_PASS_ON:
1730 if (!forcedetach) { 1727 if (!forcedetach) {
1731 if (printk_ratelimit()) 1728 if (__ratelimit(&drbd_ratelimit_state))
1732 dev_err(DEV, "Local IO failed in %s." 1729 dev_err(DEV, "Local IO failed in %s."
1733 "Passing error on...\n", where); 1730 "Passing error on...\n", where);
1734 break; 1731 break;
@@ -2219,8 +2216,6 @@ static inline int __inc_ap_bio_cond(struct drbd_conf *mdev)
2219 return 0; 2216 return 0;
2220 if (test_bit(BITMAP_IO, &mdev->flags)) 2217 if (test_bit(BITMAP_IO, &mdev->flags))
2221 return 0; 2218 return 0;
2222 if (atomic_read(&mdev->new_c_uuid))
2223 return 0;
2224 return 1; 2219 return 1;
2225} 2220}
2226 2221
@@ -2241,9 +2236,6 @@ static inline void inc_ap_bio(struct drbd_conf *mdev, int count)
2241 * to avoid races with the reconnect code, 2236 * to avoid races with the reconnect code,
2242 * we need to atomic_inc within the spinlock. */ 2237 * we need to atomic_inc within the spinlock. */
2243 2238
2244 if (atomic_read(&mdev->new_c_uuid) && atomic_add_unless(&mdev->new_c_uuid, -1, 1))
2245 drbd_queue_work_front(&mdev->data.work, &mdev->uuid_work);
2246
2247 spin_lock_irq(&mdev->req_lock); 2239 spin_lock_irq(&mdev->req_lock);
2248 while (!__inc_ap_bio_cond(mdev)) { 2240 while (!__inc_ap_bio_cond(mdev)) {
2249 prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE); 2241 prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index be2d2da9cdba..7258c95e895e 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1215,18 +1215,17 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1215 ns.pdsk == D_OUTDATED)) { 1215 ns.pdsk == D_OUTDATED)) {
1216 if (get_ldev(mdev)) { 1216 if (get_ldev(mdev)) {
1217 if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && 1217 if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
1218 mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE && 1218 mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
1219 !atomic_read(&mdev->new_c_uuid)) 1219 drbd_uuid_new_current(mdev);
1220 atomic_set(&mdev->new_c_uuid, 2); 1220 drbd_send_uuids(mdev);
1221 }
1221 put_ldev(mdev); 1222 put_ldev(mdev);
1222 } 1223 }
1223 } 1224 }
1224 1225
1225 if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { 1226 if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) {
1226 /* Diskless peer becomes primary or got connected do diskless, primary peer. */ 1227 if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0)
1227 if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0 && 1228 drbd_uuid_new_current(mdev);
1228 !atomic_read(&mdev->new_c_uuid))
1229 atomic_set(&mdev->new_c_uuid, 2);
1230 1229
1231 /* D_DISKLESS Peer becomes secondary */ 1230 /* D_DISKLESS Peer becomes secondary */
1232 if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) 1231 if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
@@ -1237,8 +1236,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1237 /* Last part of the attaching process ... */ 1236 /* Last part of the attaching process ... */
1238 if (ns.conn >= C_CONNECTED && 1237 if (ns.conn >= C_CONNECTED &&
1239 os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { 1238 os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
1240 kfree(mdev->p_uuid); /* We expect to receive up-to-date UUIDs soon. */
1241 mdev->p_uuid = NULL; /* ...to not use the old ones in the mean time */
1242 drbd_send_sizes(mdev, 0, 0); /* to start sync... */ 1239 drbd_send_sizes(mdev, 0, 0); /* to start sync... */
1243 drbd_send_uuids(mdev); 1240 drbd_send_uuids(mdev);
1244 drbd_send_state(mdev); 1241 drbd_send_state(mdev);
@@ -1350,24 +1347,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1350 drbd_md_sync(mdev); 1347 drbd_md_sync(mdev);
1351} 1348}
1352 1349
1353static int w_new_current_uuid(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1354{
1355 if (get_ldev(mdev)) {
1356 if (mdev->ldev->md.uuid[UI_BITMAP] == 0) {
1357 drbd_uuid_new_current(mdev);
1358 if (get_net_conf(mdev)) {
1359 drbd_send_uuids(mdev);
1360 put_net_conf(mdev);
1361 }
1362 drbd_md_sync(mdev);
1363 }
1364 put_ldev(mdev);
1365 }
1366 atomic_dec(&mdev->new_c_uuid);
1367 wake_up(&mdev->misc_wait);
1368
1369 return 1;
1370}
1371 1350
1372static int drbd_thread_setup(void *arg) 1351static int drbd_thread_setup(void *arg)
1373{ 1352{
@@ -2291,9 +2270,9 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *
2291 * with page_count == 0 or PageSlab. 2270 * with page_count == 0 or PageSlab.
2292 */ 2271 */
2293static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, 2272static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
2294 int offset, size_t size) 2273 int offset, size_t size, unsigned msg_flags)
2295{ 2274{
2296 int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, 0); 2275 int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, msg_flags);
2297 kunmap(page); 2276 kunmap(page);
2298 if (sent == size) 2277 if (sent == size)
2299 mdev->send_cnt += size>>9; 2278 mdev->send_cnt += size>>9;
@@ -2301,7 +2280,7 @@ static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
2301} 2280}
2302 2281
2303static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, 2282static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
2304 int offset, size_t size) 2283 int offset, size_t size, unsigned msg_flags)
2305{ 2284{
2306 mm_segment_t oldfs = get_fs(); 2285 mm_segment_t oldfs = get_fs();
2307 int sent, ok; 2286 int sent, ok;
@@ -2314,14 +2293,15 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
2314 * __page_cache_release a page that would actually still be referenced 2293 * __page_cache_release a page that would actually still be referenced
2315 * by someone, leading to some obscure delayed Oops somewhere else. */ 2294 * by someone, leading to some obscure delayed Oops somewhere else. */
2316 if (disable_sendpage || (page_count(page) < 1) || PageSlab(page)) 2295 if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
2317 return _drbd_no_send_page(mdev, page, offset, size); 2296 return _drbd_no_send_page(mdev, page, offset, size, msg_flags);
2318 2297
2298 msg_flags |= MSG_NOSIGNAL;
2319 drbd_update_congested(mdev); 2299 drbd_update_congested(mdev);
2320 set_fs(KERNEL_DS); 2300 set_fs(KERNEL_DS);
2321 do { 2301 do {
2322 sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page, 2302 sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page,
2323 offset, len, 2303 offset, len,
2324 MSG_NOSIGNAL); 2304 msg_flags);
2325 if (sent == -EAGAIN) { 2305 if (sent == -EAGAIN) {
2326 if (we_should_drop_the_connection(mdev, 2306 if (we_should_drop_the_connection(mdev,
2327 mdev->data.socket)) 2307 mdev->data.socket))
@@ -2350,9 +2330,11 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
2350{ 2330{
2351 struct bio_vec *bvec; 2331 struct bio_vec *bvec;
2352 int i; 2332 int i;
2333 /* hint all but last page with MSG_MORE */
2353 __bio_for_each_segment(bvec, bio, i, 0) { 2334 __bio_for_each_segment(bvec, bio, i, 0) {
2354 if (!_drbd_no_send_page(mdev, bvec->bv_page, 2335 if (!_drbd_no_send_page(mdev, bvec->bv_page,
2355 bvec->bv_offset, bvec->bv_len)) 2336 bvec->bv_offset, bvec->bv_len,
2337 i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
2356 return 0; 2338 return 0;
2357 } 2339 }
2358 return 1; 2340 return 1;
@@ -2362,12 +2344,13 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
2362{ 2344{
2363 struct bio_vec *bvec; 2345 struct bio_vec *bvec;
2364 int i; 2346 int i;
2347 /* hint all but last page with MSG_MORE */
2365 __bio_for_each_segment(bvec, bio, i, 0) { 2348 __bio_for_each_segment(bvec, bio, i, 0) {
2366 if (!_drbd_send_page(mdev, bvec->bv_page, 2349 if (!_drbd_send_page(mdev, bvec->bv_page,
2367 bvec->bv_offset, bvec->bv_len)) 2350 bvec->bv_offset, bvec->bv_len,
2351 i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
2368 return 0; 2352 return 0;
2369 } 2353 }
2370
2371 return 1; 2354 return 1;
2372} 2355}
2373 2356
@@ -2375,9 +2358,11 @@ static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e)
2375{ 2358{
2376 struct page *page = e->pages; 2359 struct page *page = e->pages;
2377 unsigned len = e->size; 2360 unsigned len = e->size;
2361 /* hint all but last page with MSG_MORE */
2378 page_chain_for_each(page) { 2362 page_chain_for_each(page) {
2379 unsigned l = min_t(unsigned, len, PAGE_SIZE); 2363 unsigned l = min_t(unsigned, len, PAGE_SIZE);
2380 if (!_drbd_send_page(mdev, page, 0, l)) 2364 if (!_drbd_send_page(mdev, page, 0, l,
2365 page_chain_next(page) ? MSG_MORE : 0))
2381 return 0; 2366 return 0;
2382 len -= l; 2367 len -= l;
2383 } 2368 }
@@ -2457,11 +2442,11 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
2457 p.dp_flags = cpu_to_be32(dp_flags); 2442 p.dp_flags = cpu_to_be32(dp_flags);
2458 set_bit(UNPLUG_REMOTE, &mdev->flags); 2443 set_bit(UNPLUG_REMOTE, &mdev->flags);
2459 ok = (sizeof(p) == 2444 ok = (sizeof(p) ==
2460 drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE)); 2445 drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0));
2461 if (ok && dgs) { 2446 if (ok && dgs) {
2462 dgb = mdev->int_dig_out; 2447 dgb = mdev->int_dig_out;
2463 drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); 2448 drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb);
2464 ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); 2449 ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
2465 } 2450 }
2466 if (ok) { 2451 if (ok) {
2467 if (mdev->net_conf->wire_protocol == DRBD_PROT_A) 2452 if (mdev->net_conf->wire_protocol == DRBD_PROT_A)
@@ -2510,11 +2495,11 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
2510 return 0; 2495 return 0;
2511 2496
2512 ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, 2497 ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p,
2513 sizeof(p), MSG_MORE); 2498 sizeof(p), dgs ? MSG_MORE : 0);
2514 if (ok && dgs) { 2499 if (ok && dgs) {
2515 dgb = mdev->int_dig_out; 2500 dgb = mdev->int_dig_out;
2516 drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb); 2501 drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb);
2517 ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); 2502 ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
2518 } 2503 }
2519 if (ok) 2504 if (ok)
2520 ok = _drbd_send_zc_ee(mdev, e); 2505 ok = _drbd_send_zc_ee(mdev, e);
@@ -2708,7 +2693,6 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
2708 atomic_set(&mdev->net_cnt, 0); 2693 atomic_set(&mdev->net_cnt, 0);
2709 atomic_set(&mdev->packet_seq, 0); 2694 atomic_set(&mdev->packet_seq, 0);
2710 atomic_set(&mdev->pp_in_use, 0); 2695 atomic_set(&mdev->pp_in_use, 0);
2711 atomic_set(&mdev->new_c_uuid, 0);
2712 2696
2713 mutex_init(&mdev->md_io_mutex); 2697 mutex_init(&mdev->md_io_mutex);
2714 mutex_init(&mdev->data.mutex); 2698 mutex_init(&mdev->data.mutex);
@@ -2739,14 +2723,12 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
2739 INIT_LIST_HEAD(&mdev->bm_io_work.w.list); 2723 INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
2740 INIT_LIST_HEAD(&mdev->delay_probes); 2724 INIT_LIST_HEAD(&mdev->delay_probes);
2741 INIT_LIST_HEAD(&mdev->delay_probe_work.list); 2725 INIT_LIST_HEAD(&mdev->delay_probe_work.list);
2742 INIT_LIST_HEAD(&mdev->uuid_work.list);
2743 2726
2744 mdev->resync_work.cb = w_resync_inactive; 2727 mdev->resync_work.cb = w_resync_inactive;
2745 mdev->unplug_work.cb = w_send_write_hint; 2728 mdev->unplug_work.cb = w_send_write_hint;
2746 mdev->md_sync_work.cb = w_md_sync; 2729 mdev->md_sync_work.cb = w_md_sync;
2747 mdev->bm_io_work.w.cb = w_bitmap_io; 2730 mdev->bm_io_work.w.cb = w_bitmap_io;
2748 mdev->delay_probe_work.cb = w_delay_probes; 2731 mdev->delay_probe_work.cb = w_delay_probes;
2749 mdev->uuid_work.cb = w_new_current_uuid;
2750 init_timer(&mdev->resync_timer); 2732 init_timer(&mdev->resync_timer);
2751 init_timer(&mdev->md_sync_timer); 2733 init_timer(&mdev->md_sync_timer);
2752 init_timer(&mdev->delay_probe_timer); 2734 init_timer(&mdev->delay_probe_timer);
@@ -3799,7 +3781,7 @@ _drbd_insert_fault(struct drbd_conf *mdev, unsigned int type)
3799 if (ret) { 3781 if (ret) {
3800 fault_count++; 3782 fault_count++;
3801 3783
3802 if (printk_ratelimit()) 3784 if (__ratelimit(&drbd_ratelimit_state))
3803 dev_warn(DEV, "***Simulating %s failure\n", 3785 dev_warn(DEV, "***Simulating %s failure\n",
3804 _drbd_fault_str(type)); 3786 _drbd_fault_str(type));
3805 } 3787 }
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 632e3245d1bb..2151f18b21de 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1114,6 +1114,12 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1114 mdev->new_state_tmp.i = ns.i; 1114 mdev->new_state_tmp.i = ns.i;
1115 ns.i = os.i; 1115 ns.i = os.i;
1116 ns.disk = D_NEGOTIATING; 1116 ns.disk = D_NEGOTIATING;
1117
1118 /* We expect to receive up-to-date UUIDs soon.
1119 To avoid a race in receive_state, free p_uuid while
1120 holding req_lock. I.e. atomic with the state change */
1121 kfree(mdev->p_uuid);
1122 mdev->p_uuid = NULL;
1117 } 1123 }
1118 1124
1119 rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); 1125 rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index bc9ab7fb2cc7..dff48701b84d 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -42,7 +42,6 @@
42#include <linux/unistd.h> 42#include <linux/unistd.h>
43#include <linux/vmalloc.h> 43#include <linux/vmalloc.h>
44#include <linux/random.h> 44#include <linux/random.h>
45#include <linux/mm.h>
46#include <linux/string.h> 45#include <linux/string.h>
47#include <linux/scatterlist.h> 46#include <linux/scatterlist.h>
48#include "drbd_int.h" 47#include "drbd_int.h"
@@ -571,6 +570,25 @@ static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size)
571 return rv; 570 return rv;
572} 571}
573 572
573/* quoting tcp(7):
574 * On individual connections, the socket buffer size must be set prior to the
575 * listen(2) or connect(2) calls in order to have it take effect.
576 * This is our wrapper to do so.
577 */
578static void drbd_setbufsize(struct socket *sock, unsigned int snd,
579 unsigned int rcv)
580{
581 /* open coded SO_SNDBUF, SO_RCVBUF */
582 if (snd) {
583 sock->sk->sk_sndbuf = snd;
584 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
585 }
586 if (rcv) {
587 sock->sk->sk_rcvbuf = rcv;
588 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
589 }
590}
591
574static struct socket *drbd_try_connect(struct drbd_conf *mdev) 592static struct socket *drbd_try_connect(struct drbd_conf *mdev)
575{ 593{
576 const char *what; 594 const char *what;
@@ -592,6 +610,8 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev)
592 610
593 sock->sk->sk_rcvtimeo = 611 sock->sk->sk_rcvtimeo =
594 sock->sk->sk_sndtimeo = mdev->net_conf->try_connect_int*HZ; 612 sock->sk->sk_sndtimeo = mdev->net_conf->try_connect_int*HZ;
613 drbd_setbufsize(sock, mdev->net_conf->sndbuf_size,
614 mdev->net_conf->rcvbuf_size);
595 615
596 /* explicitly bind to the configured IP as source IP 616 /* explicitly bind to the configured IP as source IP
597 * for the outgoing connections. 617 * for the outgoing connections.
@@ -670,6 +690,8 @@ static struct socket *drbd_wait_for_connect(struct drbd_conf *mdev)
670 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */ 690 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
671 s_listen->sk->sk_rcvtimeo = timeo; 691 s_listen->sk->sk_rcvtimeo = timeo;
672 s_listen->sk->sk_sndtimeo = timeo; 692 s_listen->sk->sk_sndtimeo = timeo;
693 drbd_setbufsize(s_listen, mdev->net_conf->sndbuf_size,
694 mdev->net_conf->rcvbuf_size);
673 695
674 what = "bind before listen"; 696 what = "bind before listen";
675 err = s_listen->ops->bind(s_listen, 697 err = s_listen->ops->bind(s_listen,
@@ -856,16 +878,6 @@ retry:
856 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK; 878 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
857 msock->sk->sk_priority = TC_PRIO_INTERACTIVE; 879 msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
858 880
859 if (mdev->net_conf->sndbuf_size) {
860 sock->sk->sk_sndbuf = mdev->net_conf->sndbuf_size;
861 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
862 }
863
864 if (mdev->net_conf->rcvbuf_size) {
865 sock->sk->sk_rcvbuf = mdev->net_conf->rcvbuf_size;
866 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
867 }
868
869 /* NOT YET ... 881 /* NOT YET ...
870 * sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; 882 * sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10;
871 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; 883 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
@@ -1154,17 +1166,6 @@ int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
1154 unsigned n_bios = 0; 1166 unsigned n_bios = 0;
1155 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; 1167 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
1156 1168
1157 if (atomic_read(&mdev->new_c_uuid)) {
1158 if (atomic_add_unless(&mdev->new_c_uuid, -1, 1)) {
1159 drbd_uuid_new_current(mdev);
1160 drbd_md_sync(mdev);
1161
1162 atomic_dec(&mdev->new_c_uuid);
1163 wake_up(&mdev->misc_wait);
1164 }
1165 wait_event(mdev->misc_wait, !atomic_read(&mdev->new_c_uuid));
1166 }
1167
1168 /* In most cases, we will only need one bio. But in case the lower 1169 /* In most cases, we will only need one bio. But in case the lower
1169 * level restrictions happen to be different at this offset on this 1170 * level restrictions happen to be different at this offset on this
1170 * side than those of the sending peer, we may need to submit the 1171 * side than those of the sending peer, we may need to submit the
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 3397f11d0ba9..654f1ef5cbb0 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -102,32 +102,7 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const
102 } 102 }
103 } 103 }
104 104
105 /* if it was a local io error, we want to notify our 105 drbd_req_free(req);
106 * peer about that, and see if we need to
107 * detach the disk and stuff.
108 * to avoid allocating some special work
109 * struct, reuse the request. */
110
111 /* THINK
112 * why do we do this not when we detect the error,
113 * but delay it until it is "done", i.e. possibly
114 * until the next barrier ack? */
115
116 if (rw == WRITE &&
117 ((s & RQ_LOCAL_MASK) && !(s & RQ_LOCAL_OK))) {
118 if (!(req->w.list.next == LIST_POISON1 ||
119 list_empty(&req->w.list))) {
120 /* DEBUG ASSERT only; if this triggers, we
121 * probably corrupt the worker list here */
122 dev_err(DEV, "req->w.list.next = %p\n", req->w.list.next);
123 dev_err(DEV, "req->w.list.prev = %p\n", req->w.list.prev);
124 }
125 req->w.cb = w_io_error;
126 drbd_queue_work(&mdev->data.work, &req->w);
127 /* drbd_req_free() is done in w_io_error */
128 } else {
129 drbd_req_free(req);
130 }
131} 106}
132 107
133static void queue_barrier(struct drbd_conf *mdev) 108static void queue_barrier(struct drbd_conf *mdev)
@@ -453,9 +428,6 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
453 req->rq_state |= RQ_LOCAL_COMPLETED; 428 req->rq_state |= RQ_LOCAL_COMPLETED;
454 req->rq_state &= ~RQ_LOCAL_PENDING; 429 req->rq_state &= ~RQ_LOCAL_PENDING;
455 430
456 dev_alert(DEV, "Local WRITE failed sec=%llus size=%u\n",
457 (unsigned long long)req->sector, req->size);
458 /* and now: check how to handle local io error. */
459 __drbd_chk_io_error(mdev, FALSE); 431 __drbd_chk_io_error(mdev, FALSE);
460 _req_may_be_done(req, m); 432 _req_may_be_done(req, m);
461 put_ldev(mdev); 433 put_ldev(mdev);
@@ -475,22 +447,21 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
475 req->rq_state |= RQ_LOCAL_COMPLETED; 447 req->rq_state |= RQ_LOCAL_COMPLETED;
476 req->rq_state &= ~RQ_LOCAL_PENDING; 448 req->rq_state &= ~RQ_LOCAL_PENDING;
477 449
478 dev_alert(DEV, "Local READ failed sec=%llus size=%u\n",
479 (unsigned long long)req->sector, req->size);
480 /* _req_mod(req,to_be_send); oops, recursion... */
481 D_ASSERT(!(req->rq_state & RQ_NET_MASK)); 450 D_ASSERT(!(req->rq_state & RQ_NET_MASK));
482 req->rq_state |= RQ_NET_PENDING;
483 inc_ap_pending(mdev);
484 451
485 __drbd_chk_io_error(mdev, FALSE); 452 __drbd_chk_io_error(mdev, FALSE);
486 put_ldev(mdev); 453 put_ldev(mdev);
487 /* NOTE: if we have no connection,
488 * or know the peer has no good data either,
489 * then we don't actually need to "queue_for_net_read",
490 * but we do so anyways, since the drbd_io_error()
491 * and the potential state change to "Diskless"
492 * needs to be done from process context */
493 454
455 /* no point in retrying if there is no good remote data,
456 * or we have no connection. */
457 if (mdev->state.pdsk != D_UP_TO_DATE) {
458 _req_may_be_done(req, m);
459 break;
460 }
461
462 /* _req_mod(req,to_be_send); oops, recursion... */
463 req->rq_state |= RQ_NET_PENDING;
464 inc_ap_pending(mdev);
494 /* fall through: _req_mod(req,queue_for_net_read); */ 465 /* fall through: _req_mod(req,queue_for_net_read); */
495 466
496 case queue_for_net_read: 467 case queue_for_net_read:
@@ -600,6 +571,9 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
600 _req_may_be_done(req, m); 571 _req_may_be_done(req, m);
601 break; 572 break;
602 573
574 case read_retry_remote_canceled:
575 req->rq_state &= ~RQ_NET_QUEUED;
576 /* fall through, in case we raced with drbd_disconnect */
603 case connection_lost_while_pending: 577 case connection_lost_while_pending:
604 /* transfer log cleanup after connection loss */ 578 /* transfer log cleanup after connection loss */
605 /* assert something? */ 579 /* assert something? */
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 16119d7056cc..02d575d24518 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -91,6 +91,7 @@ enum drbd_req_event {
91 send_failed, 91 send_failed,
92 handed_over_to_network, 92 handed_over_to_network,
93 connection_lost_while_pending, 93 connection_lost_while_pending,
94 read_retry_remote_canceled,
94 recv_acked_by_peer, 95 recv_acked_by_peer,
95 write_acked_by_peer, 96 write_acked_by_peer,
96 write_acked_by_peer_and_sis, /* and set_in_sync */ 97 write_acked_by_peer_and_sis, /* and set_in_sync */
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 727ff6339754..b623ceee2a4a 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -224,9 +224,6 @@ void drbd_endio_pri(struct bio *bio, int error)
224 enum drbd_req_event what; 224 enum drbd_req_event what;
225 int uptodate = bio_flagged(bio, BIO_UPTODATE); 225 int uptodate = bio_flagged(bio, BIO_UPTODATE);
226 226
227 if (error)
228 dev_warn(DEV, "p %s: error=%d\n",
229 bio_data_dir(bio) == WRITE ? "write" : "read", error);
230 if (!error && !uptodate) { 227 if (!error && !uptodate) {
231 dev_warn(DEV, "p %s: setting error to -EIO\n", 228 dev_warn(DEV, "p %s: setting error to -EIO\n",
232 bio_data_dir(bio) == WRITE ? "write" : "read"); 229 bio_data_dir(bio) == WRITE ? "write" : "read");
@@ -257,20 +254,6 @@ void drbd_endio_pri(struct bio *bio, int error)
257 complete_master_bio(mdev, &m); 254 complete_master_bio(mdev, &m);
258} 255}
259 256
260int w_io_error(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
261{
262 struct drbd_request *req = container_of(w, struct drbd_request, w);
263
264 /* NOTE: mdev->ldev can be NULL by the time we get here! */
265 /* D_ASSERT(mdev->ldev->dc.on_io_error != EP_PASS_ON); */
266
267 /* the only way this callback is scheduled is from _req_may_be_done,
268 * when it is done and had a local write error, see comments there */
269 drbd_req_free(req);
270
271 return TRUE;
272}
273
274int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 257int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
275{ 258{
276 struct drbd_request *req = container_of(w, struct drbd_request, w); 259 struct drbd_request *req = container_of(w, struct drbd_request, w);
@@ -280,12 +263,9 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
280 * to give the disk the chance to relocate that block */ 263 * to give the disk the chance to relocate that block */
281 264
282 spin_lock_irq(&mdev->req_lock); 265 spin_lock_irq(&mdev->req_lock);
283 if (cancel || 266 if (cancel || mdev->state.pdsk != D_UP_TO_DATE) {
284 mdev->state.conn < C_CONNECTED || 267 _req_mod(req, read_retry_remote_canceled);
285 mdev->state.pdsk <= D_INCONSISTENT) {
286 _req_mod(req, send_canceled);
287 spin_unlock_irq(&mdev->req_lock); 268 spin_unlock_irq(&mdev->req_lock);
288 dev_alert(DEV, "WE ARE LOST. Local IO failure, no peer.\n");
289 return 1; 269 return 1;
290 } 270 }
291 spin_unlock_irq(&mdev->req_lock); 271 spin_unlock_irq(&mdev->req_lock);
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 52f2d11bc7b9..ed6fb91123ab 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -1159,8 +1159,10 @@ static struct of_device_id swim3_match[] =
1159 1159
1160static struct macio_driver swim3_driver = 1160static struct macio_driver swim3_driver =
1161{ 1161{
1162 .name = "swim3", 1162 .driver = {
1163 .match_table = swim3_match, 1163 .name = "swim3",
1164 .of_match_table = swim3_match,
1165 },
1164 .probe = swim3_attach, 1166 .probe = swim3_attach,
1165#if 0 1167#if 0
1166 .suspend = swim3_suspend, 1168 .suspend = swim3_suspend,
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 83fa09a836ca..258bc2ae2885 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -298,7 +298,9 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
298 err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX, 298 err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX,
299 offsetof(struct virtio_blk_config, seg_max), 299 offsetof(struct virtio_blk_config, seg_max),
300 &sg_elems); 300 &sg_elems);
301 if (err) 301
302 /* We need at least one SG element, whatever they say. */
303 if (err || !sg_elems)
302 sg_elems = 1; 304 sg_elems = 1;
303 305
304 /* We need an extra sg elements at head and tail. */ 306 /* We need an extra sg elements at head and tail. */