diff options
author | Dave Chinner <dchinner@redhat.com> | 2011-04-07 22:45:07 -0400 |
---|---|---|
committer | Dave Chinner <david@fromorbit.com> | 2011-04-07 22:45:07 -0400 |
commit | 89e4cb550a492cfca038a555fcc1bdac58822ec3 (patch) | |
tree | ab688a1849d6361c92b9f60ae0586045908010da | |
parent | c6d09b666de11eb272326a6eb6cd3246da571014 (diff) |
xfs: convert ENOSPC inode flushing to use new syncd workqueue
On of the problems with the current inode flush at ENOSPC is that we
queue a flush per ENOSPC event, regardless of how many are already
queued. Thi can result in hundreds of queued flushes, most of
which simply burn CPU scanned and do no real work. This simply slows
down allocation at ENOSPC.
We really only need one active flush at a time, and we can easily
implement that via the new xfs_syncd_wq. All we need to do is queue
a flush if one is not already active, then block waiting for the
currently active flush to complete. The result is that we only ever
have a single ENOSPC inode flush active at a time and this greatly
reduces the overhead of ENOSPC processing.
On my 2p test machine, this results in tests exercising ENOSPC
conditions running significantly faster - 042 halves execution time,
083 drops from 60s to 5s, etc - while not introducing test
regressions.
This allows us to remove the old xfssyncd threads and infrastructure
as they are no longer used.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Alex Elder <aelder@sgi.com>
-rw-r--r-- | fs/xfs/linux-2.6/xfs_super.c | 2 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.c | 132 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.h | 4 |
3 files changed, 36 insertions, 102 deletions
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index c71b6ed45e41..ee0e981aa9d1 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -1480,8 +1480,6 @@ xfs_fs_fill_super( | |||
1480 | spin_lock_init(&mp->m_sb_lock); | 1480 | spin_lock_init(&mp->m_sb_lock); |
1481 | mutex_init(&mp->m_growlock); | 1481 | mutex_init(&mp->m_growlock); |
1482 | atomic_set(&mp->m_active_trans, 0); | 1482 | atomic_set(&mp->m_active_trans, 0); |
1483 | INIT_LIST_HEAD(&mp->m_sync_list); | ||
1484 | spin_lock_init(&mp->m_sync_lock); | ||
1485 | 1483 | ||
1486 | mp->m_super = sb; | 1484 | mp->m_super = sb; |
1487 | sb->s_fs_info = mp; | 1485 | sb->s_fs_info = mp; |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 4a582d8100e4..af3275965c77 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -433,99 +433,6 @@ xfs_quiesce_attr( | |||
433 | xfs_unmountfs_writesb(mp); | 433 | xfs_unmountfs_writesb(mp); |
434 | } | 434 | } |
435 | 435 | ||
436 | /* | ||
437 | * Enqueue a work item to be picked up by the vfs xfssyncd thread. | ||
438 | * Doing this has two advantages: | ||
439 | * - It saves on stack space, which is tight in certain situations | ||
440 | * - It can be used (with care) as a mechanism to avoid deadlocks. | ||
441 | * Flushing while allocating in a full filesystem requires both. | ||
442 | */ | ||
443 | STATIC void | ||
444 | xfs_syncd_queue_work( | ||
445 | struct xfs_mount *mp, | ||
446 | void *data, | ||
447 | void (*syncer)(struct xfs_mount *, void *), | ||
448 | struct completion *completion) | ||
449 | { | ||
450 | struct xfs_sync_work *work; | ||
451 | |||
452 | work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP); | ||
453 | INIT_LIST_HEAD(&work->w_list); | ||
454 | work->w_syncer = syncer; | ||
455 | work->w_data = data; | ||
456 | work->w_mount = mp; | ||
457 | work->w_completion = completion; | ||
458 | spin_lock(&mp->m_sync_lock); | ||
459 | list_add_tail(&work->w_list, &mp->m_sync_list); | ||
460 | spin_unlock(&mp->m_sync_lock); | ||
461 | wake_up_process(mp->m_sync_task); | ||
462 | } | ||
463 | |||
464 | /* | ||
465 | * Flush delayed allocate data, attempting to free up reserved space | ||
466 | * from existing allocations. At this point a new allocation attempt | ||
467 | * has failed with ENOSPC and we are in the process of scratching our | ||
468 | * heads, looking about for more room... | ||
469 | */ | ||
470 | STATIC void | ||
471 | xfs_flush_inodes_work( | ||
472 | struct xfs_mount *mp, | ||
473 | void *arg) | ||
474 | { | ||
475 | struct inode *inode = arg; | ||
476 | xfs_sync_data(mp, SYNC_TRYLOCK); | ||
477 | xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT); | ||
478 | iput(inode); | ||
479 | } | ||
480 | |||
481 | void | ||
482 | xfs_flush_inodes( | ||
483 | xfs_inode_t *ip) | ||
484 | { | ||
485 | struct inode *inode = VFS_I(ip); | ||
486 | DECLARE_COMPLETION_ONSTACK(completion); | ||
487 | |||
488 | igrab(inode); | ||
489 | xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion); | ||
490 | wait_for_completion(&completion); | ||
491 | xfs_log_force(ip->i_mount, XFS_LOG_SYNC); | ||
492 | } | ||
493 | |||
494 | STATIC int | ||
495 | xfssyncd( | ||
496 | void *arg) | ||
497 | { | ||
498 | struct xfs_mount *mp = arg; | ||
499 | long timeleft; | ||
500 | xfs_sync_work_t *work, *n; | ||
501 | LIST_HEAD (tmp); | ||
502 | |||
503 | set_freezable(); | ||
504 | timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); | ||
505 | for (;;) { | ||
506 | if (list_empty(&mp->m_sync_list)) | ||
507 | schedule_timeout_interruptible(timeleft); | ||
508 | /* swsusp */ | ||
509 | try_to_freeze(); | ||
510 | if (kthread_should_stop() && list_empty(&mp->m_sync_list)) | ||
511 | break; | ||
512 | |||
513 | spin_lock(&mp->m_sync_lock); | ||
514 | list_splice_init(&mp->m_sync_list, &tmp); | ||
515 | spin_unlock(&mp->m_sync_lock); | ||
516 | |||
517 | list_for_each_entry_safe(work, n, &tmp, w_list) { | ||
518 | (*work->w_syncer)(mp, work->w_data); | ||
519 | list_del(&work->w_list); | ||
520 | if (work->w_completion) | ||
521 | complete(work->w_completion); | ||
522 | kmem_free(work); | ||
523 | } | ||
524 | } | ||
525 | |||
526 | return 0; | ||
527 | } | ||
528 | |||
529 | static void | 436 | static void |
530 | xfs_syncd_queue_sync( | 437 | xfs_syncd_queue_sync( |
531 | struct xfs_mount *mp) | 438 | struct xfs_mount *mp) |
@@ -562,16 +469,47 @@ xfs_sync_worker( | |||
562 | xfs_syncd_queue_sync(mp); | 469 | xfs_syncd_queue_sync(mp); |
563 | } | 470 | } |
564 | 471 | ||
472 | /* | ||
473 | * Flush delayed allocate data, attempting to free up reserved space | ||
474 | * from existing allocations. At this point a new allocation attempt | ||
475 | * has failed with ENOSPC and we are in the process of scratching our | ||
476 | * heads, looking about for more room. | ||
477 | * | ||
478 | * Queue a new data flush if there isn't one already in progress and | ||
479 | * wait for completion of the flush. This means that we only ever have one | ||
480 | * inode flush in progress no matter how many ENOSPC events are occurring and | ||
481 | * so will prevent the system from bogging down due to every concurrent | ||
482 | * ENOSPC event scanning all the active inodes in the system for writeback. | ||
483 | */ | ||
484 | void | ||
485 | xfs_flush_inodes( | ||
486 | struct xfs_inode *ip) | ||
487 | { | ||
488 | struct xfs_mount *mp = ip->i_mount; | ||
489 | |||
490 | queue_work(xfs_syncd_wq, &mp->m_flush_work); | ||
491 | flush_work_sync(&mp->m_flush_work); | ||
492 | } | ||
493 | |||
494 | STATIC void | ||
495 | xfs_flush_worker( | ||
496 | struct work_struct *work) | ||
497 | { | ||
498 | struct xfs_mount *mp = container_of(work, | ||
499 | struct xfs_mount, m_flush_work); | ||
500 | |||
501 | xfs_sync_data(mp, SYNC_TRYLOCK); | ||
502 | xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT); | ||
503 | } | ||
504 | |||
565 | int | 505 | int |
566 | xfs_syncd_init( | 506 | xfs_syncd_init( |
567 | struct xfs_mount *mp) | 507 | struct xfs_mount *mp) |
568 | { | 508 | { |
509 | INIT_WORK(&mp->m_flush_work, xfs_flush_worker); | ||
569 | INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker); | 510 | INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker); |
570 | xfs_syncd_queue_sync(mp); | 511 | xfs_syncd_queue_sync(mp); |
571 | 512 | ||
572 | mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname); | ||
573 | if (IS_ERR(mp->m_sync_task)) | ||
574 | return -PTR_ERR(mp->m_sync_task); | ||
575 | return 0; | 513 | return 0; |
576 | } | 514 | } |
577 | 515 | ||
@@ -580,7 +518,7 @@ xfs_syncd_stop( | |||
580 | struct xfs_mount *mp) | 518 | struct xfs_mount *mp) |
581 | { | 519 | { |
582 | cancel_delayed_work_sync(&mp->m_sync_work); | 520 | cancel_delayed_work_sync(&mp->m_sync_work); |
583 | kthread_stop(mp->m_sync_task); | 521 | cancel_work_sync(&mp->m_flush_work); |
584 | } | 522 | } |
585 | 523 | ||
586 | void | 524 | void |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 2c11e62be888..a0ad90e95299 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -204,9 +204,7 @@ typedef struct xfs_mount { | |||
204 | #endif | 204 | #endif |
205 | struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ | 205 | struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ |
206 | struct delayed_work m_sync_work; /* background sync work */ | 206 | struct delayed_work m_sync_work; /* background sync work */ |
207 | struct task_struct *m_sync_task; /* generalised sync thread */ | 207 | struct work_struct m_flush_work; /* background inode flush */ |
208 | struct list_head m_sync_list; /* sync thread work item list */ | ||
209 | spinlock_t m_sync_lock; /* work item list lock */ | ||
210 | __int64_t m_update_flags; /* sb flags we need to update | 208 | __int64_t m_update_flags; /* sb flags we need to update |
211 | on the next remount,rw */ | 209 | on the next remount,rw */ |
212 | struct shrinker m_inode_shrink; /* inode reclaim shrinker */ | 210 | struct shrinker m_inode_shrink; /* inode reclaim shrinker */ |