aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorKent Overstreet <kmo@daterainc.com>2013-09-10 22:07:00 -0400
committerKent Overstreet <kmo@daterainc.com>2013-11-11 00:56:37 -0500
commita1f0358b2bf69be216cb6e4ea40fe7ae4d38b8a6 (patch)
tree287cf5c3daa6ae4ec2ffc55b33b6df52c617f7f7 /drivers/md
parent8835c1234dd9a838993a2d5cb7572f57992ebbee (diff)
bcache: Incremental gc
Big garbage collection rewrite; now, garbage collection uses the same mechanisms as used elsewhere for inserting/updating btree node pointers, instead of rewriting interior btree nodes in place. This makes the code significantly cleaner and less fragile, and means we can now make garbage collection incremental - it doesn't have to hold a write lock on the root of the btree for the entire duration of garbage collection. This means that there's less of a latency hit for doing garbage collection, which means we can gc more frequently (and do a better job of reclaiming from the cache), and we can coalesce across more btree nodes (improving our space efficiency). Signed-off-by: Kent Overstreet <kmo@daterainc.com>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bcache/bcache.h1
-rw-r--r--drivers/md/bcache/btree.c388
-rw-r--r--drivers/md/bcache/btree.h2
-rw-r--r--drivers/md/bcache/sysfs.c2
4 files changed, 226 insertions, 167 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index d03bc6f66493..d6970a651e42 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -477,7 +477,6 @@ struct gc_stat {
477 477
478 size_t nkeys; 478 size_t nkeys;
479 uint64_t data; /* sectors */ 479 uint64_t data; /* sectors */
480 uint64_t dirty; /* sectors */
481 unsigned in_use; /* percent */ 480 unsigned in_use; /* percent */
482}; 481};
483 482
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index a3f8ca4ee6e0..7d283d217438 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1176,12 +1176,10 @@ uint8_t __bch_btree_mark_key(struct cache_set *c, int level, struct bkey *k)
1176 1176
1177#define btree_mark_key(b, k) __bch_btree_mark_key(b->c, b->level, k) 1177#define btree_mark_key(b, k) __bch_btree_mark_key(b->c, b->level, k)
1178 1178
1179static int btree_gc_mark_node(struct btree *b, unsigned *keys, 1179static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc)
1180 struct gc_stat *gc)
1181{ 1180{
1182 uint8_t stale = 0; 1181 uint8_t stale = 0;
1183 unsigned last_dev = -1; 1182 unsigned keys = 0, good_keys = 0;
1184 struct bcache_device *d = NULL;
1185 struct bkey *k; 1183 struct bkey *k;
1186 struct btree_iter iter; 1184 struct btree_iter iter;
1187 struct bset_tree *t; 1185 struct bset_tree *t;
@@ -1189,27 +1187,17 @@ static int btree_gc_mark_node(struct btree *b, unsigned *keys,
1189 gc->nodes++; 1187 gc->nodes++;
1190 1188
1191 for_each_key_filter(b, k, &iter, bch_ptr_invalid) { 1189 for_each_key_filter(b, k, &iter, bch_ptr_invalid) {
1192 if (last_dev != KEY_INODE(k)) {
1193 last_dev = KEY_INODE(k);
1194
1195 d = KEY_INODE(k) < b->c->nr_uuids
1196 ? b->c->devices[last_dev]
1197 : NULL;
1198 }
1199
1200 stale = max(stale, btree_mark_key(b, k)); 1190 stale = max(stale, btree_mark_key(b, k));
1191 keys++;
1201 1192
1202 if (bch_ptr_bad(b, k)) 1193 if (bch_ptr_bad(b, k))
1203 continue; 1194 continue;
1204 1195
1205 *keys += bkey_u64s(k);
1206
1207 gc->key_bytes += bkey_u64s(k); 1196 gc->key_bytes += bkey_u64s(k);
1208 gc->nkeys++; 1197 gc->nkeys++;
1198 good_keys++;
1209 1199
1210 gc->data += KEY_SIZE(k); 1200 gc->data += KEY_SIZE(k);
1211 if (KEY_DIRTY(k))
1212 gc->dirty += KEY_SIZE(k);
1213 } 1201 }
1214 1202
1215 for (t = b->sets; t <= &b->sets[b->nsets]; t++) 1203 for (t = b->sets; t <= &b->sets[b->nsets]; t++)
@@ -1218,79 +1206,74 @@ static int btree_gc_mark_node(struct btree *b, unsigned *keys,
1218 bkey_cmp(&b->key, &t->end) < 0, 1206 bkey_cmp(&b->key, &t->end) < 0,
1219 b, "found short btree key in gc"); 1207 b, "found short btree key in gc");
1220 1208
1221 return stale; 1209 if (b->c->gc_always_rewrite)
1222} 1210 return true;
1223 1211
1224static struct btree *btree_gc_alloc(struct btree *b, struct bkey *k) 1212 if (stale > 10)
1225{ 1213 return true;
1226 /*
1227 * We block priorities from being written for the duration of garbage
1228 * collection, so we can't sleep in btree_alloc() ->
1229 * bch_bucket_alloc_set(), or we'd risk deadlock - so we don't pass it
1230 * our closure.
1231 */
1232 struct btree *n = btree_node_alloc_replacement(b);
1233
1234 if (!IS_ERR_OR_NULL(n)) {
1235 swap(b, n);
1236 1214
1237 memcpy(k->ptr, b->key.ptr, 1215 if ((keys - good_keys) * 2 > keys)
1238 sizeof(uint64_t) * KEY_PTRS(&b->key)); 1216 return true;
1239
1240 btree_node_free(n);
1241 up_write(&n->lock);
1242 }
1243 1217
1244 return b; 1218 return false;
1245} 1219}
1246 1220
1247/* 1221#define GC_MERGE_NODES 4U
1248 * Leaving this at 2 until we've got incremental garbage collection done; it
1249 * could be higher (and has been tested with 4) except that garbage collection
1250 * could take much longer, adversely affecting latency.
1251 */
1252#define GC_MERGE_NODES 2U
1253 1222
1254struct gc_merge_info { 1223struct gc_merge_info {
1255 struct btree *b; 1224 struct btree *b;
1256 struct bkey *k;
1257 unsigned keys; 1225 unsigned keys;
1258}; 1226};
1259 1227
1260static void btree_gc_coalesce(struct btree *b, struct gc_stat *gc, 1228static int bch_btree_insert_node(struct btree *, struct btree_op *,
1261 struct gc_merge_info *r) 1229 struct keylist *, atomic_t *, struct bkey *);
1230
1231static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
1232 struct keylist *keylist, struct gc_stat *gc,
1233 struct gc_merge_info *r)
1262{ 1234{
1263 unsigned nodes = 0, keys = 0, blocks; 1235 unsigned i, nodes = 0, keys = 0, blocks;
1264 int i; 1236 struct btree *new_nodes[GC_MERGE_NODES];
1265 struct closure cl; 1237 struct closure cl;
1238 struct bkey *k;
1266 1239
1240 memset(new_nodes, 0, sizeof(new_nodes));
1267 closure_init_stack(&cl); 1241 closure_init_stack(&cl);
1268 1242
1269 while (nodes < GC_MERGE_NODES && r[nodes].b) 1243 while (nodes < GC_MERGE_NODES && !IS_ERR_OR_NULL(r[nodes].b))
1270 keys += r[nodes++].keys; 1244 keys += r[nodes++].keys;
1271 1245
1272 blocks = btree_default_blocks(b->c) * 2 / 3; 1246 blocks = btree_default_blocks(b->c) * 2 / 3;
1273 1247
1274 if (nodes < 2 || 1248 if (nodes < 2 ||
1275 __set_blocks(b->sets[0].data, keys, b->c) > blocks * (nodes - 1)) 1249 __set_blocks(b->sets[0].data, keys, b->c) > blocks * (nodes - 1))
1276 return; 1250 return 0;
1277
1278 for (i = nodes - 1; i >= 0; --i) {
1279 if (r[i].b->written)
1280 r[i].b = btree_gc_alloc(r[i].b, r[i].k);
1281 1251
1282 if (r[i].b->written) 1252 for (i = 0; i < nodes; i++) {
1283 return; 1253 new_nodes[i] = btree_node_alloc_replacement(r[i].b);
1254 if (IS_ERR_OR_NULL(new_nodes[i]))
1255 goto out_nocoalesce;
1284 } 1256 }
1285 1257
1286 for (i = nodes - 1; i > 0; --i) { 1258 for (i = nodes - 1; i > 0; --i) {
1287 struct bset *n1 = r[i].b->sets->data; 1259 struct bset *n1 = new_nodes[i]->sets->data;
1288 struct bset *n2 = r[i - 1].b->sets->data; 1260 struct bset *n2 = new_nodes[i - 1]->sets->data;
1289 struct bkey *k, *last = NULL; 1261 struct bkey *k, *last = NULL;
1290 1262
1291 keys = 0; 1263 keys = 0;
1292 1264
1293 if (i == 1) { 1265 if (i > 1) {
1266 for (k = n2->start;
1267 k < end(n2);
1268 k = bkey_next(k)) {
1269 if (__set_blocks(n1, n1->keys + keys +
1270 bkey_u64s(k), b->c) > blocks)
1271 break;
1272
1273 last = k;
1274 keys += bkey_u64s(k);
1275 }
1276 } else {
1294 /* 1277 /*
1295 * Last node we're not getting rid of - we're getting 1278 * Last node we're not getting rid of - we're getting
1296 * rid of the node at r[0]. Have to try and fit all of 1279 * rid of the node at r[0]. Have to try and fit all of
@@ -1299,37 +1282,27 @@ static void btree_gc_coalesce(struct btree *b, struct gc_stat *gc,
1299 * length keys (shouldn't be possible in practice, 1282 * length keys (shouldn't be possible in practice,
1300 * though) 1283 * though)
1301 */ 1284 */
1302 if (__set_blocks(n1, n1->keys + r->keys, 1285 if (__set_blocks(n1, n1->keys + n2->keys,
1303 b->c) > btree_blocks(r[i].b)) 1286 b->c) > btree_blocks(new_nodes[i]))
1304 return; 1287 goto out_nocoalesce;
1305 1288
1306 keys = n2->keys; 1289 keys = n2->keys;
1290 /* Take the key of the node we're getting rid of */
1307 last = &r->b->key; 1291 last = &r->b->key;
1308 } else 1292 }
1309 for (k = n2->start;
1310 k < end(n2);
1311 k = bkey_next(k)) {
1312 if (__set_blocks(n1, n1->keys + keys +
1313 bkey_u64s(k), b->c) > blocks)
1314 break;
1315
1316 last = k;
1317 keys += bkey_u64s(k);
1318 }
1319 1293
1320 BUG_ON(__set_blocks(n1, n1->keys + keys, 1294 BUG_ON(__set_blocks(n1, n1->keys + keys,
1321 b->c) > btree_blocks(r[i].b)); 1295 b->c) > btree_blocks(new_nodes[i]));
1322 1296
1323 if (last) { 1297 if (last)
1324 bkey_copy_key(&r[i].b->key, last); 1298 bkey_copy_key(&new_nodes[i]->key, last);
1325 bkey_copy_key(r[i].k, last);
1326 }
1327 1299
1328 memcpy(end(n1), 1300 memcpy(end(n1),
1329 n2->start, 1301 n2->start,
1330 (void *) node(n2, keys) - (void *) n2->start); 1302 (void *) node(n2, keys) - (void *) n2->start);
1331 1303
1332 n1->keys += keys; 1304 n1->keys += keys;
1305 r[i].keys = n1->keys;
1333 1306
1334 memmove(n2->start, 1307 memmove(n2->start,
1335 node(n2, keys), 1308 node(n2, keys),
@@ -1337,93 +1310,175 @@ static void btree_gc_coalesce(struct btree *b, struct gc_stat *gc,
1337 1310
1338 n2->keys -= keys; 1311 n2->keys -= keys;
1339 1312
1340 r[i].keys = n1->keys; 1313 if (bch_keylist_realloc(keylist,
1341 r[i - 1].keys = n2->keys; 1314 KEY_PTRS(&new_nodes[i]->key), b->c))
1315 goto out_nocoalesce;
1316
1317 bch_btree_node_write(new_nodes[i], &cl);
1318 bch_keylist_add(keylist, &new_nodes[i]->key);
1342 } 1319 }
1343 1320
1344 btree_node_free(r->b); 1321 for (i = 0; i < nodes; i++) {
1345 up_write(&r->b->lock); 1322 if (bch_keylist_realloc(keylist, KEY_PTRS(&r[i].b->key), b->c))
1323 goto out_nocoalesce;
1346 1324
1347 trace_bcache_btree_gc_coalesce(nodes); 1325 make_btree_freeing_key(r[i].b, keylist->top);
1326 bch_keylist_push(keylist);
1327 }
1328
1329 /* We emptied out this node */
1330 BUG_ON(new_nodes[0]->sets->data->keys);
1331 btree_node_free(new_nodes[0]);
1332 rw_unlock(true, new_nodes[0]);
1348 1333
1334 closure_sync(&cl);
1335
1336 for (i = 0; i < nodes; i++) {
1337 btree_node_free(r[i].b);
1338 rw_unlock(true, r[i].b);
1339
1340 r[i].b = new_nodes[i];
1341 }
1342
1343 bch_btree_insert_node(b, op, keylist, NULL, NULL);
1344 BUG_ON(!bch_keylist_empty(keylist));
1345
1346 memmove(r, r + 1, sizeof(r[0]) * (nodes - 1));
1347 r[nodes - 1].b = ERR_PTR(-EINTR);
1348
1349 trace_bcache_btree_gc_coalesce(nodes);
1349 gc->nodes--; 1350 gc->nodes--;
1350 nodes--;
1351 1351
1352 memmove(&r[0], &r[1], sizeof(struct gc_merge_info) * nodes); 1352 /* Invalidated our iterator */
1353 memset(&r[nodes], 0, sizeof(struct gc_merge_info)); 1353 return -EINTR;
1354
1355out_nocoalesce:
1356 closure_sync(&cl);
1357
1358 while ((k = bch_keylist_pop(keylist)))
1359 if (!bkey_cmp(k, &ZERO_KEY))
1360 atomic_dec(&b->c->prio_blocked);
1361
1362 for (i = 0; i < nodes; i++)
1363 if (!IS_ERR_OR_NULL(new_nodes[i])) {
1364 btree_node_free(new_nodes[i]);
1365 rw_unlock(true, new_nodes[i]);
1366 }
1367 return 0;
1354} 1368}
1355 1369
1356static int btree_gc_recurse(struct btree *b, struct btree_op *op, 1370static unsigned btree_gc_count_keys(struct btree *b)
1357 struct closure *writes, struct gc_stat *gc)
1358{ 1371{
1359 void write(struct btree *r) 1372 struct bkey *k;
1360 { 1373 struct btree_iter iter;
1361 if (!r->written || btree_node_dirty(r)) 1374 unsigned ret = 0;
1362 bch_btree_node_write(r, writes);
1363 1375
1364 up_write(&r->lock); 1376 for_each_key_filter(b, k, &iter, bch_ptr_bad)
1365 } 1377 ret += bkey_u64s(k);
1366 1378
1367 int ret = 0, stale; 1379 return ret;
1380}
1381
1382static int btree_gc_recurse(struct btree *b, struct btree_op *op,
1383 struct closure *writes, struct gc_stat *gc)
1384{
1368 unsigned i; 1385 unsigned i;
1386 int ret = 0;
1387 bool should_rewrite;
1388 struct btree *n;
1389 struct bkey *k;
1390 struct keylist keys;
1391 struct btree_iter iter;
1369 struct gc_merge_info r[GC_MERGE_NODES]; 1392 struct gc_merge_info r[GC_MERGE_NODES];
1393 struct gc_merge_info *last = r + GC_MERGE_NODES - 1;
1370 1394
1371 memset(r, 0, sizeof(r)); 1395 bch_keylist_init(&keys);
1396 bch_btree_iter_init(b, &iter, &b->c->gc_done);
1372 1397
1373 while ((r->k = bch_next_recurse_key(b, &b->c->gc_done))) { 1398 for (i = 0; i < GC_MERGE_NODES; i++)
1374 r->b = bch_btree_node_get(b->c, r->k, b->level - 1, true); 1399 r[i].b = ERR_PTR(-EINTR);
1375 1400
1376 if (IS_ERR(r->b)) { 1401 while (1) {
1377 ret = PTR_ERR(r->b); 1402 k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad);
1378 break; 1403 if (k) {
1404 r->b = bch_btree_node_get(b->c, k, b->level - 1, true);
1405 if (IS_ERR(r->b)) {
1406 ret = PTR_ERR(r->b);
1407 break;
1408 }
1409
1410 r->keys = btree_gc_count_keys(r->b);
1411
1412 ret = btree_gc_coalesce(b, op, &keys, gc, r);
1413 if (ret)
1414 break;
1379 } 1415 }
1380 1416
1381 r->keys = 0; 1417 if (!last->b)
1382 stale = btree_gc_mark_node(r->b, &r->keys, gc); 1418 break;
1383 1419
1384 if (!b->written && 1420 if (!IS_ERR(last->b)) {
1385 (r->b->level || stale > 10 || 1421 should_rewrite = btree_gc_mark_node(last->b, gc);
1386 b->c->gc_always_rewrite)) 1422 if (should_rewrite) {
1387 r->b = btree_gc_alloc(r->b, r->k); 1423 n = btree_node_alloc_replacement(last->b);
1388 1424
1389 if (r->b->level) 1425 if (!IS_ERR_OR_NULL(n)) {
1390 ret = btree_gc_recurse(r->b, op, writes, gc); 1426 bch_btree_node_write_sync(n);
1427 bch_keylist_add(&keys, &n->key);
1391 1428
1392 if (ret) { 1429 make_btree_freeing_key(last->b,
1393 write(r->b); 1430 keys.top);
1394 break; 1431 bch_keylist_push(&keys);
1395 } 1432
1433 btree_node_free(last->b);
1396 1434
1397 bkey_copy_key(&b->c->gc_done, r->k); 1435 bch_btree_insert_node(b, op, &keys,
1436 NULL, NULL);
1437 BUG_ON(!bch_keylist_empty(&keys));
1398 1438
1399 if (!b->written) 1439 rw_unlock(true, last->b);
1400 btree_gc_coalesce(b, gc, r); 1440 last->b = n;
1441
1442 /* Invalidated our iterator */
1443 ret = -EINTR;
1444 break;
1445 }
1446 }
1401 1447
1402 if (r[GC_MERGE_NODES - 1].b) 1448 if (last->b->level) {
1403 write(r[GC_MERGE_NODES - 1].b); 1449 ret = btree_gc_recurse(last->b, op, writes, gc);
1450 if (ret)
1451 break;
1452 }
1404 1453
1405 memmove(&r[1], &r[0], 1454 bkey_copy_key(&b->c->gc_done, &last->b->key);
1406 sizeof(struct gc_merge_info) * (GC_MERGE_NODES - 1)); 1455
1456 /*
1457 * Must flush leaf nodes before gc ends, since replace
1458 * operations aren't journalled
1459 */
1460 if (btree_node_dirty(last->b))
1461 bch_btree_node_write(last->b, writes);
1462 rw_unlock(true, last->b);
1463 }
1464
1465 memmove(r + 1, r, sizeof(r[0]) * (GC_MERGE_NODES - 1));
1466 r->b = NULL;
1407 1467
1408 /* When we've got incremental GC working, we'll want to do
1409 * if (should_resched())
1410 * return -EAGAIN;
1411 */
1412 cond_resched();
1413#if 0
1414 if (need_resched()) { 1468 if (need_resched()) {
1415 ret = -EAGAIN; 1469 ret = -EAGAIN;
1416 break; 1470 break;
1417 } 1471 }
1418#endif
1419 } 1472 }
1420 1473
1421 for (i = 1; i < GC_MERGE_NODES && r[i].b; i++) 1474 for (i = 0; i < GC_MERGE_NODES; i++)
1422 write(r[i].b); 1475 if (!IS_ERR_OR_NULL(r[i].b)) {
1476 if (btree_node_dirty(r[i].b))
1477 bch_btree_node_write(r[i].b, writes);
1478 rw_unlock(true, r[i].b);
1479 }
1423 1480
1424 /* Might have freed some children, must remove their keys */ 1481 bch_keylist_free(&keys);
1425 if (!b->written)
1426 bch_btree_sort(b);
1427 1482
1428 return ret; 1483 return ret;
1429} 1484}
@@ -1432,27 +1487,31 @@ static int bch_btree_gc_root(struct btree *b, struct btree_op *op,
1432 struct closure *writes, struct gc_stat *gc) 1487 struct closure *writes, struct gc_stat *gc)
1433{ 1488{
1434 struct btree *n = NULL; 1489 struct btree *n = NULL;
1435 unsigned keys = 0; 1490 int ret = 0;
1436 int ret = 0, stale = btree_gc_mark_node(b, &keys, gc); 1491 bool should_rewrite;
1437 1492
1438 if (b->level || stale > 10) 1493 should_rewrite = btree_gc_mark_node(b, gc);
1494 if (should_rewrite) {
1439 n = btree_node_alloc_replacement(b); 1495 n = btree_node_alloc_replacement(b);
1440 1496
1441 if (!IS_ERR_OR_NULL(n)) 1497 if (!IS_ERR_OR_NULL(n)) {
1442 swap(b, n); 1498 bch_btree_node_write_sync(n);
1443 1499 bch_btree_set_root(n);
1444 if (b->level) 1500 btree_node_free(b);
1445 ret = btree_gc_recurse(b, op, writes, gc); 1501 rw_unlock(true, n);
1446 1502
1447 if (!b->written || btree_node_dirty(b)) 1503 return -EINTR;
1448 bch_btree_node_write_sync(b); 1504 }
1505 }
1449 1506
1450 if (!IS_ERR_OR_NULL(n)) { 1507 if (b->level) {
1451 bch_btree_set_root(b); 1508 ret = btree_gc_recurse(b, op, writes, gc);
1452 btree_node_free(n); 1509 if (ret)
1453 rw_unlock(true, b); 1510 return ret;
1454 } 1511 }
1455 1512
1513 bkey_copy_key(&b->c->gc_done, &b->key);
1514
1456 return ret; 1515 return ret;
1457} 1516}
1458 1517
@@ -1550,29 +1609,20 @@ static void bch_btree_gc(struct cache_set *c)
1550 1609
1551 btree_gc_start(c); 1610 btree_gc_start(c);
1552 1611
1553 atomic_inc(&c->prio_blocked); 1612 do {
1554 1613 ret = btree_root(gc_root, c, &op, &writes, &stats);
1555 ret = btree_root(gc_root, c, &op, &writes, &stats); 1614 closure_sync(&writes);
1556 closure_sync(&writes);
1557
1558 if (ret) {
1559 pr_warn("gc failed!");
1560 return;
1561 }
1562 1615
1563 /* Possibly wait for new UUIDs or whatever to hit disk */ 1616 if (ret && ret != -EAGAIN)
1564 bch_journal_meta(c, &writes); 1617 pr_warn("gc failed!");
1565 closure_sync(&writes); 1618 } while (ret);
1566 1619
1567 available = bch_btree_gc_finish(c); 1620 available = bch_btree_gc_finish(c);
1568
1569 atomic_dec(&c->prio_blocked);
1570 wake_up_allocators(c); 1621 wake_up_allocators(c);
1571 1622
1572 bch_time_stats_update(&c->btree_gc_time, start_time); 1623 bch_time_stats_update(&c->btree_gc_time, start_time);
1573 1624
1574 stats.key_bytes *= sizeof(uint64_t); 1625 stats.key_bytes *= sizeof(uint64_t);
1575 stats.dirty <<= 9;
1576 stats.data <<= 9; 1626 stats.data <<= 9;
1577 stats.in_use = (c->nbuckets - available) * 100 / c->nbuckets; 1627 stats.in_use = (c->nbuckets - available) * 100 / c->nbuckets;
1578 memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat)); 1628 memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat));
@@ -1585,14 +1635,28 @@ static void bch_btree_gc(struct cache_set *c)
1585static int bch_gc_thread(void *arg) 1635static int bch_gc_thread(void *arg)
1586{ 1636{
1587 struct cache_set *c = arg; 1637 struct cache_set *c = arg;
1638 struct cache *ca;
1639 unsigned i;
1588 1640
1589 while (1) { 1641 while (1) {
1642again:
1590 bch_btree_gc(c); 1643 bch_btree_gc(c);
1591 1644
1592 set_current_state(TASK_INTERRUPTIBLE); 1645 set_current_state(TASK_INTERRUPTIBLE);
1593 if (kthread_should_stop()) 1646 if (kthread_should_stop())
1594 break; 1647 break;
1595 1648
1649 mutex_lock(&c->bucket_lock);
1650
1651 for_each_cache(ca, c, i)
1652 if (ca->invalidate_needs_gc) {
1653 mutex_unlock(&c->bucket_lock);
1654 set_current_state(TASK_RUNNING);
1655 goto again;
1656 }
1657
1658 mutex_unlock(&c->bucket_lock);
1659
1596 try_to_freeze(); 1660 try_to_freeze();
1597 schedule(); 1661 schedule();
1598 } 1662 }
@@ -2083,8 +2147,6 @@ static int bch_btree_insert_node(struct btree *b, struct btree_op *op,
2083 2147
2084 bch_keylist_init(&split_keys); 2148 bch_keylist_init(&split_keys);
2085 2149
2086 BUG_ON(b->level);
2087
2088 do { 2150 do {
2089 BUG_ON(b->level && replace_key); 2151 BUG_ON(b->level && replace_key);
2090 2152
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index e11bb8571d24..b5a46affe8eb 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -201,7 +201,7 @@ static inline bool bkey_written(struct btree *b, struct bkey *k)
201 201
202static inline void set_gc_sectors(struct cache_set *c) 202static inline void set_gc_sectors(struct cache_set *c)
203{ 203{
204 atomic_set(&c->sectors_to_gc, c->sb.bucket_size * c->nbuckets / 8); 204 atomic_set(&c->sectors_to_gc, c->sb.bucket_size * c->nbuckets / 16);
205} 205}
206 206
207static inline struct bkey *bch_btree_iter_init(struct btree *b, 207static inline struct bkey *bch_btree_iter_init(struct btree *b,
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 9687771ec6f3..c5f73e34d016 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -489,7 +489,6 @@ lock_root:
489 489
490 sysfs_print(btree_used_percent, btree_used(c)); 490 sysfs_print(btree_used_percent, btree_used(c));
491 sysfs_print(btree_nodes, c->gc_stats.nodes); 491 sysfs_print(btree_nodes, c->gc_stats.nodes);
492 sysfs_hprint(dirty_data, c->gc_stats.dirty);
493 sysfs_hprint(average_key_size, average_key_size(c)); 492 sysfs_hprint(average_key_size, average_key_size(c));
494 493
495 sysfs_print(cache_read_races, 494 sysfs_print(cache_read_races,
@@ -642,7 +641,6 @@ static struct attribute *bch_cache_set_files[] = {
642 &sysfs_cache_available_percent, 641 &sysfs_cache_available_percent,
643 642
644 &sysfs_average_key_size, 643 &sysfs_average_key_size,
645 &sysfs_dirty_data,
646 644
647 &sysfs_errors, 645 &sysfs_errors,
648 &sysfs_io_error_limit, 646 &sysfs_io_error_limit,