diff options
author | Andreas Gruenbacher <agruen@linbit.com> | 2011-01-20 09:00:24 -0500 |
---|---|---|
committer | Philipp Reisner <philipp.reisner@linbit.com> | 2011-08-25 08:58:06 -0400 |
commit | de696716e8c40475d259fb49b3876ca0d9415970 (patch) | |
tree | 544c5d4f166f30a4aa3a6abde0da8a3cc092b945 /drivers/block | |
parent | ace652acf2d7e564dac48c615d9184e7ed575f9c (diff) |
drbd: Use interval tree for overlapping write request detection
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 3 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 1 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 38 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 56 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.h | 1 |
5 files changed, 52 insertions, 47 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index d7678e85031b..058371318da4 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -1019,6 +1019,9 @@ struct drbd_conf { | |||
1019 | struct hlist_head *tl_hash; | 1019 | struct hlist_head *tl_hash; |
1020 | unsigned int tl_hash_s; | 1020 | unsigned int tl_hash_s; |
1021 | 1021 | ||
1022 | /* Interval tree of pending local write requests */ | ||
1023 | struct rb_root write_requests; | ||
1024 | |||
1022 | /* blocks to resync in this run [unit BM_BLOCK_SIZE] */ | 1025 | /* blocks to resync in this run [unit BM_BLOCK_SIZE] */ |
1023 | unsigned long rs_total; | 1026 | unsigned long rs_total; |
1024 | /* number of resync blocks that failed in this run */ | 1027 | /* number of resync blocks that failed in this run */ |
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index a77b4bfd452a..4d85838f53e3 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
@@ -3473,6 +3473,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor) | |||
3473 | /* no need to lock access, we are still initializing this minor device. */ | 3473 | /* no need to lock access, we are still initializing this minor device. */ |
3474 | if (!tl_init(mdev)) | 3474 | if (!tl_init(mdev)) |
3475 | goto out_no_tl; | 3475 | goto out_no_tl; |
3476 | mdev->write_requests = RB_ROOT; | ||
3476 | 3477 | ||
3477 | mdev->app_reads_hash = kzalloc(APP_R_HSIZE*sizeof(void *), GFP_KERNEL); | 3478 | mdev->app_reads_hash = kzalloc(APP_R_HSIZE*sizeof(void *), GFP_KERNEL); |
3478 | if (!mdev->app_reads_hash) | 3479 | if (!mdev->app_reads_hash) |
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 6bb1a2f2a38d..6b0725842508 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c | |||
@@ -1733,9 +1733,6 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
1733 | const int size = e->size; | 1733 | const int size = e->size; |
1734 | const int discard = test_bit(DISCARD_CONCURRENT, &mdev->flags); | 1734 | const int discard = test_bit(DISCARD_CONCURRENT, &mdev->flags); |
1735 | DEFINE_WAIT(wait); | 1735 | DEFINE_WAIT(wait); |
1736 | struct drbd_request *i; | ||
1737 | struct hlist_node *n; | ||
1738 | struct hlist_head *slot; | ||
1739 | int first; | 1736 | int first; |
1740 | 1737 | ||
1741 | D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); | 1738 | D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); |
@@ -1783,30 +1780,31 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
1783 | 1780 | ||
1784 | hlist_add_head(&e->collision, ee_hash_slot(mdev, sector)); | 1781 | hlist_add_head(&e->collision, ee_hash_slot(mdev, sector)); |
1785 | 1782 | ||
1786 | #define OVERLAPS overlaps(i->i.sector, i->i.size, sector, size) | ||
1787 | slot = tl_hash_slot(mdev, sector); | ||
1788 | first = 1; | 1783 | first = 1; |
1789 | for (;;) { | 1784 | for (;;) { |
1785 | struct drbd_interval *i; | ||
1790 | int have_unacked = 0; | 1786 | int have_unacked = 0; |
1791 | int have_conflict = 0; | 1787 | int have_conflict = 0; |
1792 | prepare_to_wait(&mdev->misc_wait, &wait, | 1788 | prepare_to_wait(&mdev->misc_wait, &wait, |
1793 | TASK_INTERRUPTIBLE); | 1789 | TASK_INTERRUPTIBLE); |
1794 | hlist_for_each_entry(i, n, slot, collision) { | 1790 | |
1795 | if (OVERLAPS) { | 1791 | i = drbd_find_overlap(&mdev->write_requests, sector, size); |
1796 | /* only ALERT on first iteration, | 1792 | if (i) { |
1797 | * we may be woken up early... */ | 1793 | struct drbd_request *req2 = |
1798 | if (first) | 1794 | container_of(i, struct drbd_request, i); |
1799 | dev_alert(DEV, "%s[%u] Concurrent local write detected!" | 1795 | |
1800 | " new: %llus +%u; pending: %llus +%u\n", | 1796 | /* only ALERT on first iteration, |
1801 | current->comm, current->pid, | 1797 | * we may be woken up early... */ |
1802 | (unsigned long long)sector, size, | 1798 | if (first) |
1803 | (unsigned long long)i->i.sector, i->i.size); | 1799 | dev_alert(DEV, "%s[%u] Concurrent local write detected!" |
1804 | if (i->rq_state & RQ_NET_PENDING) | 1800 | " new: %llus +%u; pending: %llus +%u\n", |
1805 | ++have_unacked; | 1801 | current->comm, current->pid, |
1806 | ++have_conflict; | 1802 | (unsigned long long)sector, size, |
1807 | } | 1803 | (unsigned long long)req2->i.sector, req2->i.size); |
1804 | if (req2->rq_state & RQ_NET_PENDING) | ||
1805 | ++have_unacked; | ||
1806 | ++have_conflict; | ||
1808 | } | 1807 | } |
1809 | #undef OVERLAPS | ||
1810 | if (!have_conflict) | 1808 | if (!have_conflict) |
1811 | break; | 1809 | break; |
1812 | 1810 | ||
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 1af11a198b58..593576fcf64e 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c | |||
@@ -135,7 +135,6 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, | |||
135 | struct drbd_request *req) | 135 | struct drbd_request *req) |
136 | { | 136 | { |
137 | const unsigned long s = req->rq_state; | 137 | const unsigned long s = req->rq_state; |
138 | struct drbd_request *i; | ||
139 | struct drbd_epoch_entry *e; | 138 | struct drbd_epoch_entry *e; |
140 | struct hlist_node *n; | 139 | struct hlist_node *n; |
141 | struct hlist_head *slot; | 140 | struct hlist_head *slot; |
@@ -157,19 +156,21 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, | |||
157 | if ((s & RQ_NET_DONE) && mdev->ee_hash != NULL) { | 156 | if ((s & RQ_NET_DONE) && mdev->ee_hash != NULL) { |
158 | const sector_t sector = req->i.sector; | 157 | const sector_t sector = req->i.sector; |
159 | const int size = req->i.size; | 158 | const int size = req->i.size; |
159 | struct drbd_interval *i; | ||
160 | 160 | ||
161 | /* ASSERT: | 161 | /* ASSERT: |
162 | * there must be no conflicting requests, since | 162 | * there must be no conflicting requests, since |
163 | * they must have been failed on the spot */ | 163 | * they must have been failed on the spot */ |
164 | #define OVERLAPS overlaps(sector, size, i->i.sector, i->i.size) | 164 | |
165 | slot = tl_hash_slot(mdev, sector); | 165 | i = drbd_find_overlap(&mdev->write_requests, sector, size); |
166 | hlist_for_each_entry(i, n, slot, collision) { | 166 | if (i) { |
167 | if (OVERLAPS) { | 167 | struct drbd_request *req2 = |
168 | dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; " | 168 | container_of(i, struct drbd_request, i); |
169 | "other: %p %llus +%u\n", | 169 | |
170 | req, (unsigned long long)sector, size, | 170 | dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; " |
171 | i, (unsigned long long)i->i.sector, i->i.size); | 171 | "other: %p %llus +%u\n", |
172 | } | 172 | req, (unsigned long long)sector, size, |
173 | i, (unsigned long long)req2->i.sector, req2->i.size); | ||
173 | } | 174 | } |
174 | 175 | ||
175 | /* maybe "wake" those conflicting epoch entries | 176 | /* maybe "wake" those conflicting epoch entries |
@@ -184,7 +185,6 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, | |||
184 | * | 185 | * |
185 | * anyways, if we found one, | 186 | * anyways, if we found one, |
186 | * we just have to do a wake_up. */ | 187 | * we just have to do a wake_up. */ |
187 | #undef OVERLAPS | ||
188 | #define OVERLAPS overlaps(sector, size, e->sector, e->size) | 188 | #define OVERLAPS overlaps(sector, size, e->sector, e->size) |
189 | slot = ee_hash_slot(mdev, req->i.sector); | 189 | slot = ee_hash_slot(mdev, req->i.sector); |
190 | hlist_for_each_entry(e, n, slot, collision) { | 190 | hlist_for_each_entry(e, n, slot, collision) { |
@@ -260,9 +260,11 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) | |||
260 | 260 | ||
261 | /* remove the request from the conflict detection | 261 | /* remove the request from the conflict detection |
262 | * respective block_id verification hash */ | 262 | * respective block_id verification hash */ |
263 | if (!hlist_unhashed(&req->collision)) | 263 | if (!hlist_unhashed(&req->collision)) { |
264 | hlist_del(&req->collision); | 264 | hlist_del(&req->collision); |
265 | else | 265 | if (!drbd_interval_empty(&req->i)) |
266 | drbd_remove_interval(&mdev->write_requests, &req->i); | ||
267 | } else | ||
266 | D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0); | 268 | D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0); |
267 | 269 | ||
268 | /* for writes we need to do some extra housekeeping */ | 270 | /* for writes we need to do some extra housekeeping */ |
@@ -324,7 +326,7 @@ static int _req_conflicts(struct drbd_request *req) | |||
324 | struct drbd_conf *mdev = req->mdev; | 326 | struct drbd_conf *mdev = req->mdev; |
325 | const sector_t sector = req->i.sector; | 327 | const sector_t sector = req->i.sector; |
326 | const int size = req->i.size; | 328 | const int size = req->i.size; |
327 | struct drbd_request *i; | 329 | struct drbd_interval *i; |
328 | struct drbd_epoch_entry *e; | 330 | struct drbd_epoch_entry *e; |
329 | struct hlist_node *n; | 331 | struct hlist_node *n; |
330 | struct hlist_head *slot; | 332 | struct hlist_head *slot; |
@@ -339,24 +341,23 @@ static int _req_conflicts(struct drbd_request *req) | |||
339 | goto out_no_conflict; | 341 | goto out_no_conflict; |
340 | BUG_ON(mdev->tl_hash == NULL); | 342 | BUG_ON(mdev->tl_hash == NULL); |
341 | 343 | ||
342 | #define OVERLAPS overlaps(i->i.sector, i->i.size, sector, size) | 344 | i = drbd_find_overlap(&mdev->write_requests, sector, size); |
343 | slot = tl_hash_slot(mdev, sector); | 345 | if (i) { |
344 | hlist_for_each_entry(i, n, slot, collision) { | 346 | struct drbd_request *req2 = |
345 | if (OVERLAPS) { | 347 | container_of(i, struct drbd_request, i); |
346 | dev_alert(DEV, "%s[%u] Concurrent local write detected! " | 348 | |
347 | "[DISCARD L] new: %llus +%u; " | 349 | dev_alert(DEV, "%s[%u] Concurrent local write detected! " |
348 | "pending: %llus +%u\n", | 350 | "[DISCARD L] new: %llus +%u; " |
349 | current->comm, current->pid, | 351 | "pending: %llus +%u\n", |
350 | (unsigned long long)sector, size, | 352 | current->comm, current->pid, |
351 | (unsigned long long)i->i.sector, i->i.size); | 353 | (unsigned long long)sector, size, |
352 | goto out_conflict; | 354 | (unsigned long long)req2->i.sector, req2->i.size); |
353 | } | 355 | goto out_conflict; |
354 | } | 356 | } |
355 | 357 | ||
356 | if (mdev->ee_hash_s) { | 358 | if (mdev->ee_hash_s) { |
357 | /* now, check for overlapping requests with remote origin */ | 359 | /* now, check for overlapping requests with remote origin */ |
358 | BUG_ON(mdev->ee_hash == NULL); | 360 | BUG_ON(mdev->ee_hash == NULL); |
359 | #undef OVERLAPS | ||
360 | #define OVERLAPS overlaps(e->sector, e->size, sector, size) | 361 | #define OVERLAPS overlaps(e->sector, e->size, sector, size) |
361 | slot = ee_hash_slot(mdev, sector); | 362 | slot = ee_hash_slot(mdev, sector); |
362 | hlist_for_each_entry(e, n, slot, collision) { | 363 | hlist_for_each_entry(e, n, slot, collision) { |
@@ -509,6 +510,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
509 | 510 | ||
510 | hlist_add_head(&req->collision, tl_hash_slot(mdev, req->i.sector)); | 511 | hlist_add_head(&req->collision, tl_hash_slot(mdev, req->i.sector)); |
511 | /* corresponding hlist_del is in _req_may_be_done() */ | 512 | /* corresponding hlist_del is in _req_may_be_done() */ |
513 | drbd_insert_interval(&mdev->write_requests, &req->i); | ||
512 | 514 | ||
513 | /* NOTE | 515 | /* NOTE |
514 | * In case the req ended up on the transfer log before being | 516 | * In case the req ended up on the transfer log before being |
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 2520186c4c2b..6f11624cce38 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h | |||
@@ -275,6 +275,7 @@ static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev, | |||
275 | req->i.sector = bio_src->bi_sector; | 275 | req->i.sector = bio_src->bi_sector; |
276 | req->i.size = bio_src->bi_size; | 276 | req->i.size = bio_src->bi_size; |
277 | INIT_HLIST_NODE(&req->collision); | 277 | INIT_HLIST_NODE(&req->collision); |
278 | drbd_clear_interval(&req->i); | ||
278 | INIT_LIST_HEAD(&req->tl_requests); | 279 | INIT_LIST_HEAD(&req->tl_requests); |
279 | INIT_LIST_HEAD(&req->w.list); | 280 | INIT_LIST_HEAD(&req->w.list); |
280 | } | 281 | } |