diff options
author | Kent Overstreet <kmo@daterainc.com> | 2014-03-17 21:22:34 -0400 |
---|---|---|
committer | Kent Overstreet <kmo@daterainc.com> | 2014-03-18 15:23:34 -0400 |
commit | 05335cff9f01555b769ac97b7bacc472b7ed047a (patch) | |
tree | 58e0a080e662b4b932a8b7bc678b5ed882a2d382 /drivers/md | |
parent | 4fe6a816707aace9e8e297b708411c5930537793 (diff) |
bcache: Fix a race when freeing btree nodes
This isn't a bulletproof fix; btree_node_free() -> bch_bucket_free() puts the
bucket on the unused freelist, where it can be reused right away without any
ordering requirements. It would be better to wait on at least a journal write to
go down before reusing the bucket. bch_btree_set_root() does this, and inserting
into non leaf nodes is completely synchronous so we should be ok, but future
patches are just going to get rid of the unused freelist - it was needed in the
past for various reasons but shouldn't be anymore.
Signed-off-by: Kent Overstreet <kmo@daterainc.com>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bcache/btree.c | 53 |
1 files changed, 20 insertions, 33 deletions
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 1672db348c8b..e83732e2d912 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c | |||
@@ -1006,8 +1006,6 @@ static void btree_node_prefetch(struct cache_set *c, struct bkey *k, int level) | |||
1006 | 1006 | ||
1007 | static void btree_node_free(struct btree *b) | 1007 | static void btree_node_free(struct btree *b) |
1008 | { | 1008 | { |
1009 | unsigned i; | ||
1010 | |||
1011 | trace_bcache_btree_node_free(b); | 1009 | trace_bcache_btree_node_free(b); |
1012 | 1010 | ||
1013 | BUG_ON(b == b->c->root); | 1011 | BUG_ON(b == b->c->root); |
@@ -1019,14 +1017,6 @@ static void btree_node_free(struct btree *b) | |||
1019 | cancel_delayed_work(&b->work); | 1017 | cancel_delayed_work(&b->work); |
1020 | 1018 | ||
1021 | mutex_lock(&b->c->bucket_lock); | 1019 | mutex_lock(&b->c->bucket_lock); |
1022 | |||
1023 | for (i = 0; i < KEY_PTRS(&b->key); i++) { | ||
1024 | BUG_ON(atomic_read(&PTR_BUCKET(b->c, &b->key, i)->pin)); | ||
1025 | |||
1026 | bch_inc_gen(PTR_CACHE(b->c, &b->key, i), | ||
1027 | PTR_BUCKET(b->c, &b->key, i)); | ||
1028 | } | ||
1029 | |||
1030 | bch_bucket_free(b->c, &b->key); | 1020 | bch_bucket_free(b->c, &b->key); |
1031 | mca_bucket_free(b); | 1021 | mca_bucket_free(b); |
1032 | mutex_unlock(&b->c->bucket_lock); | 1022 | mutex_unlock(&b->c->bucket_lock); |
@@ -1086,16 +1076,19 @@ static void make_btree_freeing_key(struct btree *b, struct bkey *k) | |||
1086 | { | 1076 | { |
1087 | unsigned i; | 1077 | unsigned i; |
1088 | 1078 | ||
1079 | mutex_lock(&b->c->bucket_lock); | ||
1080 | |||
1081 | atomic_inc(&b->c->prio_blocked); | ||
1082 | |||
1089 | bkey_copy(k, &b->key); | 1083 | bkey_copy(k, &b->key); |
1090 | bkey_copy_key(k, &ZERO_KEY); | 1084 | bkey_copy_key(k, &ZERO_KEY); |
1091 | 1085 | ||
1092 | for (i = 0; i < KEY_PTRS(k); i++) { | 1086 | for (i = 0; i < KEY_PTRS(k); i++) |
1093 | uint8_t g = PTR_BUCKET(b->c, k, i)->gen + 1; | 1087 | SET_PTR_GEN(k, i, |
1094 | 1088 | bch_inc_gen(PTR_CACHE(b->c, &b->key, i), | |
1095 | SET_PTR_GEN(k, i, g); | 1089 | PTR_BUCKET(b->c, &b->key, i))); |
1096 | } | ||
1097 | 1090 | ||
1098 | atomic_inc(&b->c->prio_blocked); | 1091 | mutex_unlock(&b->c->bucket_lock); |
1099 | } | 1092 | } |
1100 | 1093 | ||
1101 | static int btree_check_reserve(struct btree *b, struct btree_op *op) | 1094 | static int btree_check_reserve(struct btree *b, struct btree_op *op) |
@@ -1342,6 +1335,13 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, | |||
1342 | bch_keylist_add(keylist, &new_nodes[i]->key); | 1335 | bch_keylist_add(keylist, &new_nodes[i]->key); |
1343 | } | 1336 | } |
1344 | 1337 | ||
1338 | closure_sync(&cl); | ||
1339 | |||
1340 | /* We emptied out this node */ | ||
1341 | BUG_ON(btree_bset_first(new_nodes[0])->keys); | ||
1342 | btree_node_free(new_nodes[0]); | ||
1343 | rw_unlock(true, new_nodes[0]); | ||
1344 | |||
1345 | for (i = 0; i < nodes; i++) { | 1345 | for (i = 0; i < nodes; i++) { |
1346 | if (__bch_keylist_realloc(keylist, bkey_u64s(&r[i].b->key))) | 1346 | if (__bch_keylist_realloc(keylist, bkey_u64s(&r[i].b->key))) |
1347 | goto out_nocoalesce; | 1347 | goto out_nocoalesce; |
@@ -1350,12 +1350,8 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, | |||
1350 | bch_keylist_push(keylist); | 1350 | bch_keylist_push(keylist); |
1351 | } | 1351 | } |
1352 | 1352 | ||
1353 | /* We emptied out this node */ | 1353 | bch_btree_insert_node(b, op, keylist, NULL, NULL); |
1354 | BUG_ON(btree_bset_first(new_nodes[0])->keys); | 1354 | BUG_ON(!bch_keylist_empty(keylist)); |
1355 | btree_node_free(new_nodes[0]); | ||
1356 | rw_unlock(true, new_nodes[0]); | ||
1357 | |||
1358 | closure_sync(&cl); | ||
1359 | 1355 | ||
1360 | for (i = 0; i < nodes; i++) { | 1356 | for (i = 0; i < nodes; i++) { |
1361 | btree_node_free(r[i].b); | 1357 | btree_node_free(r[i].b); |
@@ -1364,9 +1360,6 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, | |||
1364 | r[i].b = new_nodes[i]; | 1360 | r[i].b = new_nodes[i]; |
1365 | } | 1361 | } |
1366 | 1362 | ||
1367 | bch_btree_insert_node(b, op, keylist, NULL, NULL); | ||
1368 | BUG_ON(!bch_keylist_empty(keylist)); | ||
1369 | |||
1370 | memmove(r, r + 1, sizeof(r[0]) * (nodes - 1)); | 1363 | memmove(r, r + 1, sizeof(r[0]) * (nodes - 1)); |
1371 | r[nodes - 1].b = ERR_PTR(-EINTR); | 1364 | r[nodes - 1].b = ERR_PTR(-EINTR); |
1372 | 1365 | ||
@@ -1456,12 +1449,11 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, | |||
1456 | keys.top); | 1449 | keys.top); |
1457 | bch_keylist_push(&keys); | 1450 | bch_keylist_push(&keys); |
1458 | 1451 | ||
1459 | btree_node_free(last->b); | ||
1460 | |||
1461 | bch_btree_insert_node(b, op, &keys, | 1452 | bch_btree_insert_node(b, op, &keys, |
1462 | NULL, NULL); | 1453 | NULL, NULL); |
1463 | BUG_ON(!bch_keylist_empty(&keys)); | 1454 | BUG_ON(!bch_keylist_empty(&keys)); |
1464 | 1455 | ||
1456 | btree_node_free(last->b); | ||
1465 | rw_unlock(true, last->b); | 1457 | rw_unlock(true, last->b); |
1466 | last->b = n; | 1458 | last->b = n; |
1467 | 1459 | ||
@@ -1924,26 +1916,21 @@ static int btree_split(struct btree *b, struct btree_op *op, | |||
1924 | closure_sync(&cl); | 1916 | closure_sync(&cl); |
1925 | bch_btree_set_root(n3); | 1917 | bch_btree_set_root(n3); |
1926 | rw_unlock(true, n3); | 1918 | rw_unlock(true, n3); |
1927 | |||
1928 | btree_node_free(b); | ||
1929 | } else if (!b->parent) { | 1919 | } else if (!b->parent) { |
1930 | /* Root filled up but didn't need to be split */ | 1920 | /* Root filled up but didn't need to be split */ |
1931 | closure_sync(&cl); | 1921 | closure_sync(&cl); |
1932 | bch_btree_set_root(n1); | 1922 | bch_btree_set_root(n1); |
1933 | |||
1934 | btree_node_free(b); | ||
1935 | } else { | 1923 | } else { |
1936 | /* Split a non root node */ | 1924 | /* Split a non root node */ |
1937 | closure_sync(&cl); | 1925 | closure_sync(&cl); |
1938 | make_btree_freeing_key(b, parent_keys.top); | 1926 | make_btree_freeing_key(b, parent_keys.top); |
1939 | bch_keylist_push(&parent_keys); | 1927 | bch_keylist_push(&parent_keys); |
1940 | 1928 | ||
1941 | btree_node_free(b); | ||
1942 | |||
1943 | bch_btree_insert_node(b->parent, op, &parent_keys, NULL, NULL); | 1929 | bch_btree_insert_node(b->parent, op, &parent_keys, NULL, NULL); |
1944 | BUG_ON(!bch_keylist_empty(&parent_keys)); | 1930 | BUG_ON(!bch_keylist_empty(&parent_keys)); |
1945 | } | 1931 | } |
1946 | 1932 | ||
1933 | btree_node_free(b); | ||
1947 | rw_unlock(true, n1); | 1934 | rw_unlock(true, n1); |
1948 | 1935 | ||
1949 | bch_time_stats_update(&b->c->btree_split_time, start_time); | 1936 | bch_time_stats_update(&b->c->btree_split_time, start_time); |