aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorChris Mason <chris.mason@fusionio.com>2013-03-04 17:13:31 -0500
committerChris Mason <chris.mason@fusionio.com>2013-03-07 07:52:40 -0500
commitde3cb945db4d8eb3b046dc7a5ea89a893372750c (patch)
treed3f479c1431a2607d7154d7a1e10878533d38008 /fs
parent3a01aa7a25274308fe813a6237f678aed901cea3 (diff)
Btrfs: improve the delayed inode throttling
The delayed inode code batches up changes to the btree in hopes of doing them in bulk. As the changes build up, processes kick off worker threads and wait for them to make progress. The current code kicks off an async work queue item for each delayed node, which creates a lot of churn. It also uses a fixed 1 HZ waiting period for the throttle, which allows us to build a lot of pending work and can slow down the commit. This changes us to watch a sequence counter as it is bumped during the operations. We kick off fewer work items and have each work item do more work. Signed-off-by: Chris Mason <chris.mason@fusionio.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/delayed-inode.c151
-rw-r--r--fs/btrfs/delayed-inode.h2
2 files changed, 92 insertions, 61 deletions
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 0b278b117cbe..14fce27b4780 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -22,8 +22,9 @@
22#include "disk-io.h" 22#include "disk-io.h"
23#include "transaction.h" 23#include "transaction.h"
24 24
25#define BTRFS_DELAYED_WRITEBACK 400 25#define BTRFS_DELAYED_WRITEBACK 512
26#define BTRFS_DELAYED_BACKGROUND 100 26#define BTRFS_DELAYED_BACKGROUND 128
27#define BTRFS_DELAYED_BATCH 16
27 28
28static struct kmem_cache *delayed_node_cache; 29static struct kmem_cache *delayed_node_cache;
29 30
@@ -494,6 +495,15 @@ static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
494 BTRFS_DELAYED_DELETION_ITEM); 495 BTRFS_DELAYED_DELETION_ITEM);
495} 496}
496 497
498static void finish_one_item(struct btrfs_delayed_root *delayed_root)
499{
500 int seq = atomic_inc_return(&delayed_root->items_seq);
501 if ((atomic_dec_return(&delayed_root->items) <
502 BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0) &&
503 waitqueue_active(&delayed_root->wait))
504 wake_up(&delayed_root->wait);
505}
506
497static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item) 507static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
498{ 508{
499 struct rb_root *root; 509 struct rb_root *root;
@@ -512,10 +522,8 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
512 522
513 rb_erase(&delayed_item->rb_node, root); 523 rb_erase(&delayed_item->rb_node, root);
514 delayed_item->delayed_node->count--; 524 delayed_item->delayed_node->count--;
515 if (atomic_dec_return(&delayed_root->items) < 525
516 BTRFS_DELAYED_BACKGROUND && 526 finish_one_item(delayed_root);
517 waitqueue_active(&delayed_root->wait))
518 wake_up(&delayed_root->wait);
519} 527}
520 528
521static void btrfs_release_delayed_item(struct btrfs_delayed_item *item) 529static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
@@ -1056,10 +1064,7 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
1056 delayed_node->count--; 1064 delayed_node->count--;
1057 1065
1058 delayed_root = delayed_node->root->fs_info->delayed_root; 1066 delayed_root = delayed_node->root->fs_info->delayed_root;
1059 if (atomic_dec_return(&delayed_root->items) < 1067 finish_one_item(delayed_root);
1060 BTRFS_DELAYED_BACKGROUND &&
1061 waitqueue_active(&delayed_root->wait))
1062 wake_up(&delayed_root->wait);
1063 } 1068 }
1064} 1069}
1065 1070
@@ -1304,35 +1309,44 @@ void btrfs_remove_delayed_node(struct inode *inode)
1304 btrfs_release_delayed_node(delayed_node); 1309 btrfs_release_delayed_node(delayed_node);
1305} 1310}
1306 1311
1307struct btrfs_async_delayed_node { 1312struct btrfs_async_delayed_work {
1308 struct btrfs_root *root; 1313 struct btrfs_delayed_root *delayed_root;
1309 struct btrfs_delayed_node *delayed_node; 1314 int nr;
1310 struct btrfs_work work; 1315 struct btrfs_work work;
1311}; 1316};
1312 1317
1313static void btrfs_async_run_delayed_node_done(struct btrfs_work *work) 1318static void btrfs_async_run_delayed_root(struct btrfs_work *work)
1314{ 1319{
1315 struct btrfs_async_delayed_node *async_node; 1320 struct btrfs_async_delayed_work *async_work;
1321 struct btrfs_delayed_root *delayed_root;
1316 struct btrfs_trans_handle *trans; 1322 struct btrfs_trans_handle *trans;
1317 struct btrfs_path *path; 1323 struct btrfs_path *path;
1318 struct btrfs_delayed_node *delayed_node = NULL; 1324 struct btrfs_delayed_node *delayed_node = NULL;
1319 struct btrfs_root *root; 1325 struct btrfs_root *root;
1320 struct btrfs_block_rsv *block_rsv; 1326 struct btrfs_block_rsv *block_rsv;
1321 int need_requeue = 0; 1327 int total_done = 0;
1322 1328
1323 async_node = container_of(work, struct btrfs_async_delayed_node, work); 1329 async_work = container_of(work, struct btrfs_async_delayed_work, work);
1330 delayed_root = async_work->delayed_root;
1324 1331
1325 path = btrfs_alloc_path(); 1332 path = btrfs_alloc_path();
1326 if (!path) 1333 if (!path)
1327 goto out; 1334 goto out;
1328 path->leave_spinning = 1;
1329 1335
1330 delayed_node = async_node->delayed_node; 1336again:
1337 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND / 2)
1338 goto free_path;
1339
1340 delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
1341 if (!delayed_node)
1342 goto free_path;
1343
1344 path->leave_spinning = 1;
1331 root = delayed_node->root; 1345 root = delayed_node->root;
1332 1346
1333 trans = btrfs_join_transaction(root); 1347 trans = btrfs_join_transaction(root);
1334 if (IS_ERR(trans)) 1348 if (IS_ERR(trans))
1335 goto free_path; 1349 goto release_path;
1336 1350
1337 block_rsv = trans->block_rsv; 1351 block_rsv = trans->block_rsv;
1338 trans->block_rsv = &root->fs_info->delayed_block_rsv; 1352 trans->block_rsv = &root->fs_info->delayed_block_rsv;
@@ -1363,57 +1377,47 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
1363 * Task1 will sleep until the transaction is commited. 1377 * Task1 will sleep until the transaction is commited.
1364 */ 1378 */
1365 mutex_lock(&delayed_node->mutex); 1379 mutex_lock(&delayed_node->mutex);
1366 if (delayed_node->count) 1380 btrfs_dequeue_delayed_node(root->fs_info->delayed_root, delayed_node);
1367 need_requeue = 1;
1368 else
1369 btrfs_dequeue_delayed_node(root->fs_info->delayed_root,
1370 delayed_node);
1371 mutex_unlock(&delayed_node->mutex); 1381 mutex_unlock(&delayed_node->mutex);
1372 1382
1373 trans->block_rsv = block_rsv; 1383 trans->block_rsv = block_rsv;
1374 btrfs_end_transaction_dmeta(trans, root); 1384 btrfs_end_transaction_dmeta(trans, root);
1375 btrfs_btree_balance_dirty_nodelay(root); 1385 btrfs_btree_balance_dirty_nodelay(root);
1386
1387release_path:
1388 btrfs_release_path(path);
1389 total_done++;
1390
1391 btrfs_release_prepared_delayed_node(delayed_node);
1392 if (async_work->nr == 0 || total_done < async_work->nr)
1393 goto again;
1394
1376free_path: 1395free_path:
1377 btrfs_free_path(path); 1396 btrfs_free_path(path);
1378out: 1397out:
1379 if (need_requeue) 1398 wake_up(&delayed_root->wait);
1380 btrfs_requeue_work(&async_node->work); 1399 kfree(async_work);
1381 else {
1382 btrfs_release_prepared_delayed_node(delayed_node);
1383 kfree(async_node);
1384 }
1385} 1400}
1386 1401
1402
1387static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, 1403static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1388 struct btrfs_root *root, int all) 1404 struct btrfs_root *root, int nr)
1389{ 1405{
1390 struct btrfs_async_delayed_node *async_node; 1406 struct btrfs_async_delayed_work *async_work;
1391 struct btrfs_delayed_node *curr;
1392 int count = 0;
1393 1407
1394again: 1408 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1395 curr = btrfs_first_prepared_delayed_node(delayed_root);
1396 if (!curr)
1397 return 0; 1409 return 0;
1398 1410
1399 async_node = kmalloc(sizeof(*async_node), GFP_NOFS); 1411 async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
1400 if (!async_node) { 1412 if (!async_work)
1401 btrfs_release_prepared_delayed_node(curr);
1402 return -ENOMEM; 1413 return -ENOMEM;
1403 }
1404
1405 async_node->root = root;
1406 async_node->delayed_node = curr;
1407
1408 async_node->work.func = btrfs_async_run_delayed_node_done;
1409 async_node->work.flags = 0;
1410 1414
1411 btrfs_queue_worker(&root->fs_info->delayed_workers, &async_node->work); 1415 async_work->delayed_root = delayed_root;
1412 count++; 1416 async_work->work.func = btrfs_async_run_delayed_root;
1413 1417 async_work->work.flags = 0;
1414 if (all || count < 4) 1418 async_work->nr = nr;
1415 goto again;
1416 1419
1420 btrfs_queue_worker(&root->fs_info->delayed_workers, &async_work->work);
1417 return 0; 1421 return 0;
1418} 1422}
1419 1423
@@ -1424,30 +1428,55 @@ void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
1424 WARN_ON(btrfs_first_delayed_node(delayed_root)); 1428 WARN_ON(btrfs_first_delayed_node(delayed_root));
1425} 1429}
1426 1430
1431static int refs_newer(struct btrfs_delayed_root *delayed_root,
1432 int seq, int count)
1433{
1434 int val = atomic_read(&delayed_root->items_seq);
1435
1436 if (val < seq || val >= seq + count)
1437 return 1;
1438 return 0;
1439}
1440
1427void btrfs_balance_delayed_items(struct btrfs_root *root) 1441void btrfs_balance_delayed_items(struct btrfs_root *root)
1428{ 1442{
1429 struct btrfs_delayed_root *delayed_root; 1443 struct btrfs_delayed_root *delayed_root;
1444 int seq;
1430 1445
1431 delayed_root = btrfs_get_delayed_root(root); 1446 delayed_root = btrfs_get_delayed_root(root);
1432 1447
1433 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) 1448 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1434 return; 1449 return;
1435 1450
1451 seq = atomic_read(&delayed_root->items_seq);
1452
1436 if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) { 1453 if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
1437 int ret; 1454 int ret;
1438 ret = btrfs_wq_run_delayed_node(delayed_root, root, 1); 1455 DEFINE_WAIT(__wait);
1456
1457 ret = btrfs_wq_run_delayed_node(delayed_root, root, 0);
1439 if (ret) 1458 if (ret)
1440 return; 1459 return;
1441 1460
1442 wait_event_interruptible_timeout( 1461 while (1) {
1443 delayed_root->wait, 1462 prepare_to_wait(&delayed_root->wait, &__wait,
1444 (atomic_read(&delayed_root->items) < 1463 TASK_INTERRUPTIBLE);
1445 BTRFS_DELAYED_BACKGROUND), 1464
1446 HZ); 1465 if (refs_newer(delayed_root, seq,
1447 return; 1466 BTRFS_DELAYED_BATCH) ||
1467 atomic_read(&delayed_root->items) <
1468 BTRFS_DELAYED_BACKGROUND) {
1469 break;
1470 }
1471 if (!signal_pending(current))
1472 schedule();
1473 else
1474 break;
1475 }
1476 finish_wait(&delayed_root->wait, &__wait);
1448 } 1477 }
1449 1478
1450 btrfs_wq_run_delayed_node(delayed_root, root, 0); 1479 btrfs_wq_run_delayed_node(delayed_root, root, BTRFS_DELAYED_BATCH);
1451} 1480}
1452 1481
1453/* Will return 0 or -ENOMEM */ 1482/* Will return 0 or -ENOMEM */
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 78b6ad0fc669..1d5c5f7abe3e 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -43,6 +43,7 @@ struct btrfs_delayed_root {
43 */ 43 */
44 struct list_head prepare_list; 44 struct list_head prepare_list;
45 atomic_t items; /* for delayed items */ 45 atomic_t items; /* for delayed items */
46 atomic_t items_seq; /* for delayed items */
46 int nodes; /* for delayed nodes */ 47 int nodes; /* for delayed nodes */
47 wait_queue_head_t wait; 48 wait_queue_head_t wait;
48}; 49};
@@ -86,6 +87,7 @@ static inline void btrfs_init_delayed_root(
86 struct btrfs_delayed_root *delayed_root) 87 struct btrfs_delayed_root *delayed_root)
87{ 88{
88 atomic_set(&delayed_root->items, 0); 89 atomic_set(&delayed_root->items, 0);
90 atomic_set(&delayed_root->items_seq, 0);
89 delayed_root->nodes = 0; 91 delayed_root->nodes = 0;
90 spin_lock_init(&delayed_root->lock); 92 spin_lock_init(&delayed_root->lock);
91 init_waitqueue_head(&delayed_root->wait); 93 init_waitqueue_head(&delayed_root->wait);