diff options
author | Kent Overstreet <kmo@daterainc.com> | 2014-03-04 19:42:42 -0500 |
---|---|---|
committer | Kent Overstreet <kmo@daterainc.com> | 2014-03-18 15:23:35 -0400 |
commit | 2a285686c109816ba71a00b9278262cf02648258 (patch) | |
tree | 83be424d1b213a72a36de69b7ed98357c28cbfca | |
parent | 05335cff9f01555b769ac97b7bacc472b7ed047a (diff) |
bcache: btree locking rework
Add a new lock, b->write_lock, which is required to actually modify - or write -
a btree node; this lock is only held for short durations.
This means we can write out a btree node without taking b->lock, which _is_ held
for long durations - solving a deadlock when btree_flush_write() (from the
journalling code) is called with a btree node locked.
Right now just occurs in bch_btree_set_root(), but with an upcoming journalling
rework is going to happen a lot more.
This also turns b->lock is now more of a read/intent lock instead of a
read/write lock - but not completely, since it still blocks readers. May turn it
into a real intent lock at some point in the future.
Signed-off-by: Kent Overstreet <kmo@daterainc.com>
-rw-r--r-- | drivers/md/bcache/btree.c | 164 | ||||
-rw-r--r-- | drivers/md/bcache/btree.h | 3 | ||||
-rw-r--r-- | drivers/md/bcache/journal.c | 9 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 9 |
4 files changed, 133 insertions, 52 deletions
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index e83732e2d912..01b1b7e23cf2 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c | |||
@@ -167,6 +167,20 @@ static inline struct bset *write_block(struct btree *b) | |||
167 | return ((void *) btree_bset_first(b)) + b->written * block_bytes(b->c); | 167 | return ((void *) btree_bset_first(b)) + b->written * block_bytes(b->c); |
168 | } | 168 | } |
169 | 169 | ||
170 | static void bch_btree_init_next(struct btree *b) | ||
171 | { | ||
172 | /* If not a leaf node, always sort */ | ||
173 | if (b->level && b->keys.nsets) | ||
174 | bch_btree_sort(&b->keys, &b->c->sort); | ||
175 | else | ||
176 | bch_btree_sort_lazy(&b->keys, &b->c->sort); | ||
177 | |||
178 | if (b->written < btree_blocks(b)) | ||
179 | bch_bset_init_next(&b->keys, write_block(b), | ||
180 | bset_magic(&b->c->sb)); | ||
181 | |||
182 | } | ||
183 | |||
170 | /* Btree key manipulation */ | 184 | /* Btree key manipulation */ |
171 | 185 | ||
172 | void bkey_put(struct cache_set *c, struct bkey *k) | 186 | void bkey_put(struct cache_set *c, struct bkey *k) |
@@ -438,10 +452,12 @@ static void do_btree_node_write(struct btree *b) | |||
438 | } | 452 | } |
439 | } | 453 | } |
440 | 454 | ||
441 | void bch_btree_node_write(struct btree *b, struct closure *parent) | 455 | void __bch_btree_node_write(struct btree *b, struct closure *parent) |
442 | { | 456 | { |
443 | struct bset *i = btree_bset_last(b); | 457 | struct bset *i = btree_bset_last(b); |
444 | 458 | ||
459 | lockdep_assert_held(&b->write_lock); | ||
460 | |||
445 | trace_bcache_btree_write(b); | 461 | trace_bcache_btree_write(b); |
446 | 462 | ||
447 | BUG_ON(current->bio_list); | 463 | BUG_ON(current->bio_list); |
@@ -465,23 +481,24 @@ void bch_btree_node_write(struct btree *b, struct closure *parent) | |||
465 | &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written); | 481 | &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written); |
466 | 482 | ||
467 | b->written += set_blocks(i, block_bytes(b->c)); | 483 | b->written += set_blocks(i, block_bytes(b->c)); |
484 | } | ||
468 | 485 | ||
469 | /* If not a leaf node, always sort */ | 486 | void bch_btree_node_write(struct btree *b, struct closure *parent) |
470 | if (b->level && b->keys.nsets) | 487 | { |
471 | bch_btree_sort(&b->keys, &b->c->sort); | 488 | unsigned nsets = b->keys.nsets; |
472 | else | 489 | |
473 | bch_btree_sort_lazy(&b->keys, &b->c->sort); | 490 | lockdep_assert_held(&b->lock); |
491 | |||
492 | __bch_btree_node_write(b, parent); | ||
474 | 493 | ||
475 | /* | 494 | /* |
476 | * do verify if there was more than one set initially (i.e. we did a | 495 | * do verify if there was more than one set initially (i.e. we did a |
477 | * sort) and we sorted down to a single set: | 496 | * sort) and we sorted down to a single set: |
478 | */ | 497 | */ |
479 | if (i != b->keys.set->data && !b->keys.nsets) | 498 | if (nsets && !b->keys.nsets) |
480 | bch_btree_verify(b); | 499 | bch_btree_verify(b); |
481 | 500 | ||
482 | if (b->written < btree_blocks(b)) | 501 | bch_btree_init_next(b); |
483 | bch_bset_init_next(&b->keys, write_block(b), | ||
484 | bset_magic(&b->c->sb)); | ||
485 | } | 502 | } |
486 | 503 | ||
487 | static void bch_btree_node_write_sync(struct btree *b) | 504 | static void bch_btree_node_write_sync(struct btree *b) |
@@ -489,7 +506,11 @@ static void bch_btree_node_write_sync(struct btree *b) | |||
489 | struct closure cl; | 506 | struct closure cl; |
490 | 507 | ||
491 | closure_init_stack(&cl); | 508 | closure_init_stack(&cl); |
509 | |||
510 | mutex_lock(&b->write_lock); | ||
492 | bch_btree_node_write(b, &cl); | 511 | bch_btree_node_write(b, &cl); |
512 | mutex_unlock(&b->write_lock); | ||
513 | |||
493 | closure_sync(&cl); | 514 | closure_sync(&cl); |
494 | } | 515 | } |
495 | 516 | ||
@@ -497,11 +518,10 @@ static void btree_node_write_work(struct work_struct *w) | |||
497 | { | 518 | { |
498 | struct btree *b = container_of(to_delayed_work(w), struct btree, work); | 519 | struct btree *b = container_of(to_delayed_work(w), struct btree, work); |
499 | 520 | ||
500 | rw_lock(true, b, b->level); | 521 | mutex_lock(&b->write_lock); |
501 | |||
502 | if (btree_node_dirty(b)) | 522 | if (btree_node_dirty(b)) |
503 | bch_btree_node_write(b, NULL); | 523 | __bch_btree_node_write(b, NULL); |
504 | rw_unlock(true, b); | 524 | mutex_unlock(&b->write_lock); |
505 | } | 525 | } |
506 | 526 | ||
507 | static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref) | 527 | static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref) |
@@ -509,6 +529,8 @@ static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref) | |||
509 | struct bset *i = btree_bset_last(b); | 529 | struct bset *i = btree_bset_last(b); |
510 | struct btree_write *w = btree_current_write(b); | 530 | struct btree_write *w = btree_current_write(b); |
511 | 531 | ||
532 | lockdep_assert_held(&b->write_lock); | ||
533 | |||
512 | BUG_ON(!b->written); | 534 | BUG_ON(!b->written); |
513 | BUG_ON(!i->keys); | 535 | BUG_ON(!i->keys); |
514 | 536 | ||
@@ -593,6 +615,8 @@ static struct btree *mca_bucket_alloc(struct cache_set *c, | |||
593 | 615 | ||
594 | init_rwsem(&b->lock); | 616 | init_rwsem(&b->lock); |
595 | lockdep_set_novalidate_class(&b->lock); | 617 | lockdep_set_novalidate_class(&b->lock); |
618 | mutex_init(&b->write_lock); | ||
619 | lockdep_set_novalidate_class(&b->write_lock); | ||
596 | INIT_LIST_HEAD(&b->list); | 620 | INIT_LIST_HEAD(&b->list); |
597 | INIT_DELAYED_WORK(&b->work, btree_node_write_work); | 621 | INIT_DELAYED_WORK(&b->work, btree_node_write_work); |
598 | b->c = c; | 622 | b->c = c; |
@@ -626,8 +650,12 @@ static int mca_reap(struct btree *b, unsigned min_order, bool flush) | |||
626 | up(&b->io_mutex); | 650 | up(&b->io_mutex); |
627 | } | 651 | } |
628 | 652 | ||
653 | mutex_lock(&b->write_lock); | ||
629 | if (btree_node_dirty(b)) | 654 | if (btree_node_dirty(b)) |
630 | bch_btree_node_write_sync(b); | 655 | __bch_btree_node_write(b, &cl); |
656 | mutex_unlock(&b->write_lock); | ||
657 | |||
658 | closure_sync(&cl); | ||
631 | 659 | ||
632 | /* wait for any in flight btree write */ | 660 | /* wait for any in flight btree write */ |
633 | down(&b->io_mutex); | 661 | down(&b->io_mutex); |
@@ -1010,10 +1038,14 @@ static void btree_node_free(struct btree *b) | |||
1010 | 1038 | ||
1011 | BUG_ON(b == b->c->root); | 1039 | BUG_ON(b == b->c->root); |
1012 | 1040 | ||
1041 | mutex_lock(&b->write_lock); | ||
1042 | |||
1013 | if (btree_node_dirty(b)) | 1043 | if (btree_node_dirty(b)) |
1014 | btree_complete_write(b, btree_current_write(b)); | 1044 | btree_complete_write(b, btree_current_write(b)); |
1015 | clear_bit(BTREE_NODE_dirty, &b->flags); | 1045 | clear_bit(BTREE_NODE_dirty, &b->flags); |
1016 | 1046 | ||
1047 | mutex_unlock(&b->write_lock); | ||
1048 | |||
1017 | cancel_delayed_work(&b->work); | 1049 | cancel_delayed_work(&b->work); |
1018 | 1050 | ||
1019 | mutex_lock(&b->c->bucket_lock); | 1051 | mutex_lock(&b->c->bucket_lock); |
@@ -1065,8 +1097,10 @@ static struct btree *btree_node_alloc_replacement(struct btree *b, bool wait) | |||
1065 | { | 1097 | { |
1066 | struct btree *n = bch_btree_node_alloc(b->c, b->level, wait); | 1098 | struct btree *n = bch_btree_node_alloc(b->c, b->level, wait); |
1067 | if (!IS_ERR_OR_NULL(n)) { | 1099 | if (!IS_ERR_OR_NULL(n)) { |
1100 | mutex_lock(&n->write_lock); | ||
1068 | bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort); | 1101 | bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort); |
1069 | bkey_copy_key(&n->key, &b->key); | 1102 | bkey_copy_key(&n->key, &b->key); |
1103 | mutex_unlock(&n->write_lock); | ||
1070 | } | 1104 | } |
1071 | 1105 | ||
1072 | return n; | 1106 | return n; |
@@ -1269,6 +1303,9 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, | |||
1269 | goto out_nocoalesce; | 1303 | goto out_nocoalesce; |
1270 | } | 1304 | } |
1271 | 1305 | ||
1306 | for (i = 0; i < nodes; i++) | ||
1307 | mutex_lock(&new_nodes[i]->write_lock); | ||
1308 | |||
1272 | for (i = nodes - 1; i > 0; --i) { | 1309 | for (i = nodes - 1; i > 0; --i) { |
1273 | struct bset *n1 = btree_bset_first(new_nodes[i]); | 1310 | struct bset *n1 = btree_bset_first(new_nodes[i]); |
1274 | struct bset *n2 = btree_bset_first(new_nodes[i - 1]); | 1311 | struct bset *n2 = btree_bset_first(new_nodes[i - 1]); |
@@ -1335,6 +1372,9 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, | |||
1335 | bch_keylist_add(keylist, &new_nodes[i]->key); | 1372 | bch_keylist_add(keylist, &new_nodes[i]->key); |
1336 | } | 1373 | } |
1337 | 1374 | ||
1375 | for (i = 0; i < nodes; i++) | ||
1376 | mutex_unlock(&new_nodes[i]->write_lock); | ||
1377 | |||
1338 | closure_sync(&cl); | 1378 | closure_sync(&cl); |
1339 | 1379 | ||
1340 | /* We emptied out this node */ | 1380 | /* We emptied out this node */ |
@@ -1399,7 +1439,6 @@ static unsigned btree_gc_count_keys(struct btree *b) | |||
1399 | static int btree_gc_recurse(struct btree *b, struct btree_op *op, | 1439 | static int btree_gc_recurse(struct btree *b, struct btree_op *op, |
1400 | struct closure *writes, struct gc_stat *gc) | 1440 | struct closure *writes, struct gc_stat *gc) |
1401 | { | 1441 | { |
1402 | unsigned i; | ||
1403 | int ret = 0; | 1442 | int ret = 0; |
1404 | bool should_rewrite; | 1443 | bool should_rewrite; |
1405 | struct btree *n; | 1444 | struct btree *n; |
@@ -1407,13 +1446,13 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, | |||
1407 | struct keylist keys; | 1446 | struct keylist keys; |
1408 | struct btree_iter iter; | 1447 | struct btree_iter iter; |
1409 | struct gc_merge_info r[GC_MERGE_NODES]; | 1448 | struct gc_merge_info r[GC_MERGE_NODES]; |
1410 | struct gc_merge_info *last = r + GC_MERGE_NODES - 1; | 1449 | struct gc_merge_info *i, *last = r + ARRAY_SIZE(r) - 1; |
1411 | 1450 | ||
1412 | bch_keylist_init(&keys); | 1451 | bch_keylist_init(&keys); |
1413 | bch_btree_iter_init(&b->keys, &iter, &b->c->gc_done); | 1452 | bch_btree_iter_init(&b->keys, &iter, &b->c->gc_done); |
1414 | 1453 | ||
1415 | for (i = 0; i < GC_MERGE_NODES; i++) | 1454 | for (i = r; i < r + ARRAY_SIZE(r); i++) |
1416 | r[i].b = ERR_PTR(-EINTR); | 1455 | i->b = ERR_PTR(-EINTR); |
1417 | 1456 | ||
1418 | while (1) { | 1457 | while (1) { |
1419 | k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad); | 1458 | k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad); |
@@ -1443,6 +1482,7 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, | |||
1443 | 1482 | ||
1444 | if (!IS_ERR_OR_NULL(n)) { | 1483 | if (!IS_ERR_OR_NULL(n)) { |
1445 | bch_btree_node_write_sync(n); | 1484 | bch_btree_node_write_sync(n); |
1485 | |||
1446 | bch_keylist_add(&keys, &n->key); | 1486 | bch_keylist_add(&keys, &n->key); |
1447 | 1487 | ||
1448 | make_btree_freeing_key(last->b, | 1488 | make_btree_freeing_key(last->b, |
@@ -1475,8 +1515,10 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, | |||
1475 | * Must flush leaf nodes before gc ends, since replace | 1515 | * Must flush leaf nodes before gc ends, since replace |
1476 | * operations aren't journalled | 1516 | * operations aren't journalled |
1477 | */ | 1517 | */ |
1518 | mutex_lock(&last->b->write_lock); | ||
1478 | if (btree_node_dirty(last->b)) | 1519 | if (btree_node_dirty(last->b)) |
1479 | bch_btree_node_write(last->b, writes); | 1520 | bch_btree_node_write(last->b, writes); |
1521 | mutex_unlock(&last->b->write_lock); | ||
1480 | rw_unlock(true, last->b); | 1522 | rw_unlock(true, last->b); |
1481 | } | 1523 | } |
1482 | 1524 | ||
@@ -1489,11 +1531,13 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, | |||
1489 | } | 1531 | } |
1490 | } | 1532 | } |
1491 | 1533 | ||
1492 | for (i = 0; i < GC_MERGE_NODES; i++) | 1534 | for (i = r; i < r + ARRAY_SIZE(r); i++) |
1493 | if (!IS_ERR_OR_NULL(r[i].b)) { | 1535 | if (!IS_ERR_OR_NULL(i->b)) { |
1494 | if (btree_node_dirty(r[i].b)) | 1536 | mutex_lock(&i->b->write_lock); |
1495 | bch_btree_node_write(r[i].b, writes); | 1537 | if (btree_node_dirty(i->b)) |
1496 | rw_unlock(true, r[i].b); | 1538 | bch_btree_node_write(i->b, writes); |
1539 | mutex_unlock(&i->b->write_lock); | ||
1540 | rw_unlock(true, i->b); | ||
1497 | } | 1541 | } |
1498 | 1542 | ||
1499 | bch_keylist_free(&keys); | 1543 | bch_keylist_free(&keys); |
@@ -1514,6 +1558,7 @@ static int bch_btree_gc_root(struct btree *b, struct btree_op *op, | |||
1514 | 1558 | ||
1515 | if (!IS_ERR_OR_NULL(n)) { | 1559 | if (!IS_ERR_OR_NULL(n)) { |
1516 | bch_btree_node_write_sync(n); | 1560 | bch_btree_node_write_sync(n); |
1561 | |||
1517 | bch_btree_set_root(n); | 1562 | bch_btree_set_root(n); |
1518 | btree_node_free(b); | 1563 | btree_node_free(b); |
1519 | rw_unlock(true, n); | 1564 | rw_unlock(true, n); |
@@ -1871,6 +1916,9 @@ static int btree_split(struct btree *b, struct btree_op *op, | |||
1871 | goto err_free2; | 1916 | goto err_free2; |
1872 | } | 1917 | } |
1873 | 1918 | ||
1919 | mutex_lock(&n1->write_lock); | ||
1920 | mutex_lock(&n2->write_lock); | ||
1921 | |||
1874 | bch_btree_insert_keys(n1, op, insert_keys, replace_key); | 1922 | bch_btree_insert_keys(n1, op, insert_keys, replace_key); |
1875 | 1923 | ||
1876 | /* | 1924 | /* |
@@ -1897,21 +1945,26 @@ static int btree_split(struct btree *b, struct btree_op *op, | |||
1897 | 1945 | ||
1898 | bch_keylist_add(&parent_keys, &n2->key); | 1946 | bch_keylist_add(&parent_keys, &n2->key); |
1899 | bch_btree_node_write(n2, &cl); | 1947 | bch_btree_node_write(n2, &cl); |
1948 | mutex_unlock(&n2->write_lock); | ||
1900 | rw_unlock(true, n2); | 1949 | rw_unlock(true, n2); |
1901 | } else { | 1950 | } else { |
1902 | trace_bcache_btree_node_compact(b, btree_bset_first(n1)->keys); | 1951 | trace_bcache_btree_node_compact(b, btree_bset_first(n1)->keys); |
1903 | 1952 | ||
1953 | mutex_lock(&n1->write_lock); | ||
1904 | bch_btree_insert_keys(n1, op, insert_keys, replace_key); | 1954 | bch_btree_insert_keys(n1, op, insert_keys, replace_key); |
1905 | } | 1955 | } |
1906 | 1956 | ||
1907 | bch_keylist_add(&parent_keys, &n1->key); | 1957 | bch_keylist_add(&parent_keys, &n1->key); |
1908 | bch_btree_node_write(n1, &cl); | 1958 | bch_btree_node_write(n1, &cl); |
1959 | mutex_unlock(&n1->write_lock); | ||
1909 | 1960 | ||
1910 | if (n3) { | 1961 | if (n3) { |
1911 | /* Depth increases, make a new root */ | 1962 | /* Depth increases, make a new root */ |
1963 | mutex_lock(&n3->write_lock); | ||
1912 | bkey_copy_key(&n3->key, &MAX_KEY); | 1964 | bkey_copy_key(&n3->key, &MAX_KEY); |
1913 | bch_btree_insert_keys(n3, op, &parent_keys, NULL); | 1965 | bch_btree_insert_keys(n3, op, &parent_keys, NULL); |
1914 | bch_btree_node_write(n3, &cl); | 1966 | bch_btree_node_write(n3, &cl); |
1967 | mutex_unlock(&n3->write_lock); | ||
1915 | 1968 | ||
1916 | closure_sync(&cl); | 1969 | closure_sync(&cl); |
1917 | bch_btree_set_root(n3); | 1970 | bch_btree_set_root(n3); |
@@ -1960,33 +2013,54 @@ static int bch_btree_insert_node(struct btree *b, struct btree_op *op, | |||
1960 | atomic_t *journal_ref, | 2013 | atomic_t *journal_ref, |
1961 | struct bkey *replace_key) | 2014 | struct bkey *replace_key) |
1962 | { | 2015 | { |
2016 | struct closure cl; | ||
2017 | |||
1963 | BUG_ON(b->level && replace_key); | 2018 | BUG_ON(b->level && replace_key); |
1964 | 2019 | ||
2020 | closure_init_stack(&cl); | ||
2021 | |||
2022 | mutex_lock(&b->write_lock); | ||
2023 | |||
2024 | if (write_block(b) != btree_bset_last(b) && | ||
2025 | b->keys.last_set_unwritten) | ||
2026 | bch_btree_init_next(b); /* just wrote a set */ | ||
2027 | |||
1965 | if (bch_keylist_nkeys(insert_keys) > insert_u64s_remaining(b)) { | 2028 | if (bch_keylist_nkeys(insert_keys) > insert_u64s_remaining(b)) { |
1966 | if (current->bio_list) { | 2029 | mutex_unlock(&b->write_lock); |
1967 | op->lock = b->c->root->level + 1; | 2030 | goto split; |
1968 | return -EAGAIN; | 2031 | } |
1969 | } else if (op->lock <= b->c->root->level) { | ||
1970 | op->lock = b->c->root->level + 1; | ||
1971 | return -EINTR; | ||
1972 | } else { | ||
1973 | /* Invalidated all iterators */ | ||
1974 | int ret = btree_split(b, op, insert_keys, replace_key); | ||
1975 | 2032 | ||
1976 | return bch_keylist_empty(insert_keys) ? | 2033 | BUG_ON(write_block(b) != btree_bset_last(b)); |
1977 | 0 : ret ?: -EINTR; | ||
1978 | } | ||
1979 | } else { | ||
1980 | BUG_ON(write_block(b) != btree_bset_last(b)); | ||
1981 | 2034 | ||
1982 | if (bch_btree_insert_keys(b, op, insert_keys, replace_key)) { | 2035 | if (bch_btree_insert_keys(b, op, insert_keys, replace_key)) { |
1983 | if (!b->level) | 2036 | if (!b->level) |
1984 | bch_btree_leaf_dirty(b, journal_ref); | 2037 | bch_btree_leaf_dirty(b, journal_ref); |
1985 | else | 2038 | else |
1986 | bch_btree_node_write_sync(b); | 2039 | bch_btree_node_write(b, &cl); |
1987 | } | 2040 | } |
1988 | 2041 | ||
1989 | return 0; | 2042 | mutex_unlock(&b->write_lock); |
2043 | |||
2044 | /* wait for btree node write if necessary, after unlock */ | ||
2045 | closure_sync(&cl); | ||
2046 | |||
2047 | return 0; | ||
2048 | split: | ||
2049 | if (current->bio_list) { | ||
2050 | op->lock = b->c->root->level + 1; | ||
2051 | return -EAGAIN; | ||
2052 | } else if (op->lock <= b->c->root->level) { | ||
2053 | op->lock = b->c->root->level + 1; | ||
2054 | return -EINTR; | ||
2055 | } else { | ||
2056 | /* Invalidated all iterators */ | ||
2057 | int ret = btree_split(b, op, insert_keys, replace_key); | ||
2058 | |||
2059 | if (bch_keylist_empty(insert_keys)) | ||
2060 | return 0; | ||
2061 | else if (!ret) | ||
2062 | return -EINTR; | ||
2063 | return ret; | ||
1990 | } | 2064 | } |
1991 | } | 2065 | } |
1992 | 2066 | ||
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index def9dc4a822f..acebf26809cc 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h | |||
@@ -127,6 +127,8 @@ struct btree { | |||
127 | struct cache_set *c; | 127 | struct cache_set *c; |
128 | struct btree *parent; | 128 | struct btree *parent; |
129 | 129 | ||
130 | struct mutex write_lock; | ||
131 | |||
130 | unsigned long flags; | 132 | unsigned long flags; |
131 | uint16_t written; /* would be nice to kill */ | 133 | uint16_t written; /* would be nice to kill */ |
132 | uint8_t level; | 134 | uint8_t level; |
@@ -236,6 +238,7 @@ static inline void rw_unlock(bool w, struct btree *b) | |||
236 | } | 238 | } |
237 | 239 | ||
238 | void bch_btree_node_read_done(struct btree *); | 240 | void bch_btree_node_read_done(struct btree *); |
241 | void __bch_btree_node_write(struct btree *, struct closure *); | ||
239 | void bch_btree_node_write(struct btree *, struct closure *); | 242 | void bch_btree_node_write(struct btree *, struct closure *); |
240 | 243 | ||
241 | void bch_btree_set_root(struct btree *); | 244 | void bch_btree_set_root(struct btree *); |
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index c8bfc28cd2bd..59e82021b5bb 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c | |||
@@ -381,16 +381,15 @@ retry: | |||
381 | 381 | ||
382 | b = best; | 382 | b = best; |
383 | if (b) { | 383 | if (b) { |
384 | rw_lock(true, b, b->level); | 384 | mutex_lock(&b->write_lock); |
385 | |||
386 | if (!btree_current_write(b)->journal) { | 385 | if (!btree_current_write(b)->journal) { |
387 | rw_unlock(true, b); | 386 | mutex_unlock(&b->write_lock); |
388 | /* We raced */ | 387 | /* We raced */ |
389 | goto retry; | 388 | goto retry; |
390 | } | 389 | } |
391 | 390 | ||
392 | bch_btree_node_write(b, NULL); | 391 | __bch_btree_node_write(b, NULL); |
393 | rw_unlock(true, b); | 392 | mutex_unlock(&b->write_lock); |
394 | } | 393 | } |
395 | } | 394 | } |
396 | 395 | ||
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index ddfde380b49f..9ded06434e11 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c | |||
@@ -1398,9 +1398,12 @@ static void cache_set_flush(struct closure *cl) | |||
1398 | list_add(&c->root->list, &c->btree_cache); | 1398 | list_add(&c->root->list, &c->btree_cache); |
1399 | 1399 | ||
1400 | /* Should skip this if we're unregistering because of an error */ | 1400 | /* Should skip this if we're unregistering because of an error */ |
1401 | list_for_each_entry(b, &c->btree_cache, list) | 1401 | list_for_each_entry(b, &c->btree_cache, list) { |
1402 | mutex_lock(&b->write_lock); | ||
1402 | if (btree_node_dirty(b)) | 1403 | if (btree_node_dirty(b)) |
1403 | bch_btree_node_write(b, NULL); | 1404 | __bch_btree_node_write(b, NULL); |
1405 | mutex_unlock(&b->write_lock); | ||
1406 | } | ||
1404 | 1407 | ||
1405 | for_each_cache(ca, c, i) | 1408 | for_each_cache(ca, c, i) |
1406 | if (ca->alloc_thread) | 1409 | if (ca->alloc_thread) |
@@ -1667,8 +1670,10 @@ static void run_cache_set(struct cache_set *c) | |||
1667 | if (IS_ERR_OR_NULL(c->root)) | 1670 | if (IS_ERR_OR_NULL(c->root)) |
1668 | goto err; | 1671 | goto err; |
1669 | 1672 | ||
1673 | mutex_lock(&c->root->write_lock); | ||
1670 | bkey_copy_key(&c->root->key, &MAX_KEY); | 1674 | bkey_copy_key(&c->root->key, &MAX_KEY); |
1671 | bch_btree_node_write(c->root, &cl); | 1675 | bch_btree_node_write(c->root, &cl); |
1676 | mutex_unlock(&c->root->write_lock); | ||
1672 | 1677 | ||
1673 | bch_btree_set_root(c->root); | 1678 | bch_btree_set_root(c->root); |
1674 | rw_unlock(true, c->root); | 1679 | rw_unlock(true, c->root); |