aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2011-01-24 08:47:09 -0500
committerPhilipp Reisner <philipp.reisner@linbit.com>2011-03-10 05:48:10 -0500
commit10f6d9926cd17afff9dc03c967706419798b4929 (patch)
tree7b1cd4bfcffbb3832c756ce29861411f7300aadd /drivers/block
parent039312b6481e2928f3be19fee94c83327d93e4c7 (diff)
drbd: don't BUG_ON, if bio_add_page of a single page to an empty bio fails
Just deal with it more gracefully, if we fail to add even a single page to an empty bio. We used to BUG_ON() there, but it has been observed in some Xen deployment, so we need to handle that case more robustly now. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/drbd/drbd_receiver.c45
-rw-r--r--drivers/block/drbd/drbd_worker.c7
2 files changed, 34 insertions, 18 deletions
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index a56b107e01eb..9e9fc3413137 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1073,6 +1073,16 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo)
1073 * @mdev: DRBD device. 1073 * @mdev: DRBD device.
1074 * @e: epoch entry 1074 * @e: epoch entry
1075 * @rw: flag field, see bio->bi_rw 1075 * @rw: flag field, see bio->bi_rw
1076 *
1077 * May spread the pages to multiple bios,
1078 * depending on bio_add_page restrictions.
1079 *
1080 * Returns 0 if all bios have been submitted,
1081 * -ENOMEM if we could not allocate enough bios,
1082 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1083 * single page to an empty bio (which should never happen and likely indicates
1084 * that the lower level IO stack is in some way broken). This has been observed
1085 * on certain Xen deployments.
1076 */ 1086 */
1077/* TODO allocate from our own bio_set. */ 1087/* TODO allocate from our own bio_set. */
1078int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, 1088int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
@@ -1085,6 +1095,7 @@ int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
1085 unsigned ds = e->size; 1095 unsigned ds = e->size;
1086 unsigned n_bios = 0; 1096 unsigned n_bios = 0;
1087 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; 1097 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
1098 int err = -ENOMEM;
1088 1099
1089 /* In most cases, we will only need one bio. But in case the lower 1100 /* In most cases, we will only need one bio. But in case the lower
1090 * level restrictions happen to be different at this offset on this 1101 * level restrictions happen to be different at this offset on this
@@ -1110,8 +1121,17 @@ next_bio:
1110 page_chain_for_each(page) { 1121 page_chain_for_each(page) {
1111 unsigned len = min_t(unsigned, ds, PAGE_SIZE); 1122 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1112 if (!bio_add_page(bio, page, len, 0)) { 1123 if (!bio_add_page(bio, page, len, 0)) {
1113 /* a single page must always be possible! */ 1124 /* A single page must always be possible!
1114 BUG_ON(bio->bi_vcnt == 0); 1125 * But in case it fails anyways,
1126 * we deal with it, and complain (below). */
1127 if (bio->bi_vcnt == 0) {
1128 dev_err(DEV,
1129 "bio_add_page failed for len=%u, "
1130 "bi_vcnt=0 (bi_sector=%llu)\n",
1131 len, (unsigned long long)bio->bi_sector);
1132 err = -ENOSPC;
1133 goto fail;
1134 }
1115 goto next_bio; 1135 goto next_bio;
1116 } 1136 }
1117 ds -= len; 1137 ds -= len;
@@ -1137,7 +1157,7 @@ fail:
1137 bios = bios->bi_next; 1157 bios = bios->bi_next;
1138 bio_put(bio); 1158 bio_put(bio);
1139 } 1159 }
1140 return -ENOMEM; 1160 return err;
1141} 1161}
1142 1162
1143static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) 1163static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
@@ -1436,9 +1456,8 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si
1436 if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0) 1456 if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0)
1437 return true; 1457 return true;
1438 1458
1439 /* drbd_submit_ee currently fails for one reason only: 1459 /* don't care for the reason here */
1440 * not being able to allocate enough bios. 1460 dev_err(DEV, "submit failed, triggering re-connect\n");
1441 * Is dropping the connection going to help? */
1442 spin_lock_irq(&mdev->req_lock); 1461 spin_lock_irq(&mdev->req_lock);
1443 list_del(&e->w.list); 1462 list_del(&e->w.list);
1444 spin_unlock_irq(&mdev->req_lock); 1463 spin_unlock_irq(&mdev->req_lock);
@@ -1837,9 +1856,8 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
1837 if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0) 1856 if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0)
1838 return true; 1857 return true;
1839 1858
1840 /* drbd_submit_ee currently fails for one reason only: 1859 /* don't care for the reason here */
1841 * not being able to allocate enough bios. 1860 dev_err(DEV, "submit failed, triggering re-connect\n");
1842 * Is dropping the connection going to help? */
1843 spin_lock_irq(&mdev->req_lock); 1861 spin_lock_irq(&mdev->req_lock);
1844 list_del(&e->w.list); 1862 list_del(&e->w.list);
1845 hlist_del_init(&e->colision); 1863 hlist_del_init(&e->colision);
@@ -1848,9 +1866,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
1848 drbd_al_complete_io(mdev, e->sector); 1866 drbd_al_complete_io(mdev, e->sector);
1849 1867
1850out_interrupted: 1868out_interrupted:
1851 /* yes, the epoch_size now is imbalanced. 1869 drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + EV_CLEANUP);
1852 * but we drop the connection anyways, so we don't have a chance to
1853 * receive a barrier... atomic_inc(&mdev->epoch_size); */
1854 put_ldev(mdev); 1870 put_ldev(mdev);
1855 drbd_free_ee(mdev, e); 1871 drbd_free_ee(mdev, e);
1856 return false; 1872 return false;
@@ -2096,9 +2112,8 @@ submit:
2096 if (drbd_submit_ee(mdev, e, READ, fault_type) == 0) 2112 if (drbd_submit_ee(mdev, e, READ, fault_type) == 0)
2097 return true; 2113 return true;
2098 2114
2099 /* drbd_submit_ee currently fails for one reason only: 2115 /* don't care for the reason here */
2100 * not being able to allocate enough bios. 2116 dev_err(DEV, "submit failed, triggering re-connect\n");
2101 * Is dropping the connection going to help? */
2102 spin_lock_irq(&mdev->req_lock); 2117 spin_lock_irq(&mdev->req_lock);
2103 list_del(&e->w.list); 2118 list_del(&e->w.list);
2104 spin_unlock_irq(&mdev->req_lock); 2119 spin_unlock_irq(&mdev->req_lock);
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index ff0eb308ee4a..cfd324b9f95b 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -369,9 +369,10 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
369 if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0) 369 if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0)
370 return 0; 370 return 0;
371 371
372 /* drbd_submit_ee currently fails for one reason only: 372 /* If it failed because of ENOMEM, retry should help. If it failed
373 * not being able to allocate enough bios. 373 * because bio_add_page failed (probably broken lower level driver),
374 * Is dropping the connection going to help? */ 374 * retry may or may not help.
375 * If it does not, you may need to force disconnect. */
375 spin_lock_irq(&mdev->req_lock); 376 spin_lock_irq(&mdev->req_lock);
376 list_del(&e->w.list); 377 list_del(&e->w.list);
377 spin_unlock_irq(&mdev->req_lock); 378 spin_unlock_irq(&mdev->req_lock);