aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2011-04-07 22:45:07 -0400
committerDave Chinner <david@fromorbit.com>2011-04-07 22:45:07 -0400
commit89e4cb550a492cfca038a555fcc1bdac58822ec3 (patch)
treeab688a1849d6361c92b9f60ae0586045908010da
parentc6d09b666de11eb272326a6eb6cd3246da571014 (diff)
xfs: convert ENOSPC inode flushing to use new syncd workqueue
On of the problems with the current inode flush at ENOSPC is that we queue a flush per ENOSPC event, regardless of how many are already queued. Thi can result in hundreds of queued flushes, most of which simply burn CPU scanned and do no real work. This simply slows down allocation at ENOSPC. We really only need one active flush at a time, and we can easily implement that via the new xfs_syncd_wq. All we need to do is queue a flush if one is not already active, then block waiting for the currently active flush to complete. The result is that we only ever have a single ENOSPC inode flush active at a time and this greatly reduces the overhead of ENOSPC processing. On my 2p test machine, this results in tests exercising ENOSPC conditions running significantly faster - 042 halves execution time, 083 drops from 60s to 5s, etc - while not introducing test regressions. This allows us to remove the old xfssyncd threads and infrastructure as they are no longer used. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Alex Elder <aelder@sgi.com>
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c132
-rw-r--r--fs/xfs/xfs_mount.h4
3 files changed, 36 insertions, 102 deletions
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index c71b6ed45e41..ee0e981aa9d1 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1480,8 +1480,6 @@ xfs_fs_fill_super(
1480 spin_lock_init(&mp->m_sb_lock); 1480 spin_lock_init(&mp->m_sb_lock);
1481 mutex_init(&mp->m_growlock); 1481 mutex_init(&mp->m_growlock);
1482 atomic_set(&mp->m_active_trans, 0); 1482 atomic_set(&mp->m_active_trans, 0);
1483 INIT_LIST_HEAD(&mp->m_sync_list);
1484 spin_lock_init(&mp->m_sync_lock);
1485 1483
1486 mp->m_super = sb; 1484 mp->m_super = sb;
1487 sb->s_fs_info = mp; 1485 sb->s_fs_info = mp;
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 4a582d8100e4..af3275965c77 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -433,99 +433,6 @@ xfs_quiesce_attr(
433 xfs_unmountfs_writesb(mp); 433 xfs_unmountfs_writesb(mp);
434} 434}
435 435
436/*
437 * Enqueue a work item to be picked up by the vfs xfssyncd thread.
438 * Doing this has two advantages:
439 * - It saves on stack space, which is tight in certain situations
440 * - It can be used (with care) as a mechanism to avoid deadlocks.
441 * Flushing while allocating in a full filesystem requires both.
442 */
443STATIC void
444xfs_syncd_queue_work(
445 struct xfs_mount *mp,
446 void *data,
447 void (*syncer)(struct xfs_mount *, void *),
448 struct completion *completion)
449{
450 struct xfs_sync_work *work;
451
452 work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP);
453 INIT_LIST_HEAD(&work->w_list);
454 work->w_syncer = syncer;
455 work->w_data = data;
456 work->w_mount = mp;
457 work->w_completion = completion;
458 spin_lock(&mp->m_sync_lock);
459 list_add_tail(&work->w_list, &mp->m_sync_list);
460 spin_unlock(&mp->m_sync_lock);
461 wake_up_process(mp->m_sync_task);
462}
463
464/*
465 * Flush delayed allocate data, attempting to free up reserved space
466 * from existing allocations. At this point a new allocation attempt
467 * has failed with ENOSPC and we are in the process of scratching our
468 * heads, looking about for more room...
469 */
470STATIC void
471xfs_flush_inodes_work(
472 struct xfs_mount *mp,
473 void *arg)
474{
475 struct inode *inode = arg;
476 xfs_sync_data(mp, SYNC_TRYLOCK);
477 xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
478 iput(inode);
479}
480
481void
482xfs_flush_inodes(
483 xfs_inode_t *ip)
484{
485 struct inode *inode = VFS_I(ip);
486 DECLARE_COMPLETION_ONSTACK(completion);
487
488 igrab(inode);
489 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
490 wait_for_completion(&completion);
491 xfs_log_force(ip->i_mount, XFS_LOG_SYNC);
492}
493
494STATIC int
495xfssyncd(
496 void *arg)
497{
498 struct xfs_mount *mp = arg;
499 long timeleft;
500 xfs_sync_work_t *work, *n;
501 LIST_HEAD (tmp);
502
503 set_freezable();
504 timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
505 for (;;) {
506 if (list_empty(&mp->m_sync_list))
507 schedule_timeout_interruptible(timeleft);
508 /* swsusp */
509 try_to_freeze();
510 if (kthread_should_stop() && list_empty(&mp->m_sync_list))
511 break;
512
513 spin_lock(&mp->m_sync_lock);
514 list_splice_init(&mp->m_sync_list, &tmp);
515 spin_unlock(&mp->m_sync_lock);
516
517 list_for_each_entry_safe(work, n, &tmp, w_list) {
518 (*work->w_syncer)(mp, work->w_data);
519 list_del(&work->w_list);
520 if (work->w_completion)
521 complete(work->w_completion);
522 kmem_free(work);
523 }
524 }
525
526 return 0;
527}
528
529static void 436static void
530xfs_syncd_queue_sync( 437xfs_syncd_queue_sync(
531 struct xfs_mount *mp) 438 struct xfs_mount *mp)
@@ -562,16 +469,47 @@ xfs_sync_worker(
562 xfs_syncd_queue_sync(mp); 469 xfs_syncd_queue_sync(mp);
563} 470}
564 471
472/*
473 * Flush delayed allocate data, attempting to free up reserved space
474 * from existing allocations. At this point a new allocation attempt
475 * has failed with ENOSPC and we are in the process of scratching our
476 * heads, looking about for more room.
477 *
478 * Queue a new data flush if there isn't one already in progress and
479 * wait for completion of the flush. This means that we only ever have one
480 * inode flush in progress no matter how many ENOSPC events are occurring and
481 * so will prevent the system from bogging down due to every concurrent
482 * ENOSPC event scanning all the active inodes in the system for writeback.
483 */
484void
485xfs_flush_inodes(
486 struct xfs_inode *ip)
487{
488 struct xfs_mount *mp = ip->i_mount;
489
490 queue_work(xfs_syncd_wq, &mp->m_flush_work);
491 flush_work_sync(&mp->m_flush_work);
492}
493
494STATIC void
495xfs_flush_worker(
496 struct work_struct *work)
497{
498 struct xfs_mount *mp = container_of(work,
499 struct xfs_mount, m_flush_work);
500
501 xfs_sync_data(mp, SYNC_TRYLOCK);
502 xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
503}
504
565int 505int
566xfs_syncd_init( 506xfs_syncd_init(
567 struct xfs_mount *mp) 507 struct xfs_mount *mp)
568{ 508{
509 INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
569 INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker); 510 INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
570 xfs_syncd_queue_sync(mp); 511 xfs_syncd_queue_sync(mp);
571 512
572 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname);
573 if (IS_ERR(mp->m_sync_task))
574 return -PTR_ERR(mp->m_sync_task);
575 return 0; 513 return 0;
576} 514}
577 515
@@ -580,7 +518,7 @@ xfs_syncd_stop(
580 struct xfs_mount *mp) 518 struct xfs_mount *mp)
581{ 519{
582 cancel_delayed_work_sync(&mp->m_sync_work); 520 cancel_delayed_work_sync(&mp->m_sync_work);
583 kthread_stop(mp->m_sync_task); 521 cancel_work_sync(&mp->m_flush_work);
584} 522}
585 523
586void 524void
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 2c11e62be888..a0ad90e95299 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -204,9 +204,7 @@ typedef struct xfs_mount {
204#endif 204#endif
205 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ 205 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
206 struct delayed_work m_sync_work; /* background sync work */ 206 struct delayed_work m_sync_work; /* background sync work */
207 struct task_struct *m_sync_task; /* generalised sync thread */ 207 struct work_struct m_flush_work; /* background inode flush */
208 struct list_head m_sync_list; /* sync thread work item list */
209 spinlock_t m_sync_lock; /* work item list lock */
210 __int64_t m_update_flags; /* sb flags we need to update 208 __int64_t m_update_flags; /* sb flags we need to update
211 on the next remount,rw */ 209 on the next remount,rw */
212 struct shrinker m_inode_shrink; /* inode reclaim shrinker */ 210 struct shrinker m_inode_shrink; /* inode reclaim shrinker */