aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorDavid Chinner <dgc@sgi.com>2008-02-04 20:13:32 -0500
committerLachlan McIlroy <lachlan@redback.melbourne.sgi.com>2008-02-07 02:22:51 -0500
commit249a8c1124653fa90f3a3afff869095a31bc229f (patch)
treee0681990b5b61155a64a9fd3c0cf73d4d6bb4ce5 /fs/xfs
parent4576758db5817a91b8974c696247d459dc653db2 (diff)
[XFS] Move AIL pushing into it's own thread
When many hundreds to thousands of threads all try to do simultaneous transactions and the log is in a tail-pushing situation (i.e. full), we can get multiple threads walking the AIL list and contending on the AIL lock. The AIL push is, in effect, a simple I/O dispatch algorithm complicated by the ordering constraints placed on it by the transaction subsystem. It really does not need multiple threads to push on it - even when only a single CPU is pushing the AIL, it can push the I/O out far faster that pretty much any disk subsystem can handle. So, to avoid contention problems stemming from multiple list walkers, move the list walk off into another thread and simply provide a "target" to push to. When a thread requires a push, it sets the target and wakes the push thread, then goes to sleep waiting for the required amount of space to become available in the log. This mechanism should also be a lot fairer under heavy load as the waiters will queue in arrival order, rather than queuing in "who completed a push first" order. Also, by moving the pushing to a separate thread we can do more effectively overload detection and prevention as we can keep context from loop iteration to loop iteration. That is, we can push only part of the list each loop and not have to loop back to the start of the list every time we run. This should also help by reducing the number of items we try to lock and/or push items that we cannot move. Note that this patch is not intended to solve the inefficiencies in the AIL structure and the associated issues with extremely large list contents. That needs to be addresses separately; parallel access would cause problems to any new structure as well, so I'm only aiming to isolate the structure from unbounded parallelism here. SGI-PV: 972759 SGI-Modid: xfs-linux-melb:xfs-kern:30371a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c59
-rw-r--r--fs/xfs/xfs_log.c33
-rw-r--r--fs/xfs/xfs_mount.c6
-rw-r--r--fs/xfs/xfs_mount.h10
-rw-r--r--fs/xfs/xfs_trans.h5
-rw-r--r--fs/xfs/xfs_trans_ail.c269
-rw-r--r--fs/xfs/xfs_trans_priv.h8
7 files changed, 282 insertions, 108 deletions
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 70024a24692d..156a1a7da16b 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -51,6 +51,7 @@
51#include "xfs_vfsops.h" 51#include "xfs_vfsops.h"
52#include "xfs_version.h" 52#include "xfs_version.h"
53#include "xfs_log_priv.h" 53#include "xfs_log_priv.h"
54#include "xfs_trans_priv.h"
54 55
55#include <linux/namei.h> 56#include <linux/namei.h>
56#include <linux/init.h> 57#include <linux/init.h>
@@ -765,6 +766,64 @@ xfs_blkdev_issue_flush(
765 blkdev_issue_flush(buftarg->bt_bdev, NULL); 766 blkdev_issue_flush(buftarg->bt_bdev, NULL);
766} 767}
767 768
769/*
770 * XFS AIL push thread support
771 */
772void
773xfsaild_wakeup(
774 xfs_mount_t *mp,
775 xfs_lsn_t threshold_lsn)
776{
777 mp->m_ail.xa_target = threshold_lsn;
778 wake_up_process(mp->m_ail.xa_task);
779}
780
781int
782xfsaild(
783 void *data)
784{
785 xfs_mount_t *mp = (xfs_mount_t *)data;
786 xfs_lsn_t last_pushed_lsn = 0;
787 long tout = 0;
788
789 while (!kthread_should_stop()) {
790 if (tout)
791 schedule_timeout_interruptible(msecs_to_jiffies(tout));
792 tout = 1000;
793
794 /* swsusp */
795 try_to_freeze();
796
797 ASSERT(mp->m_log);
798 if (XFS_FORCED_SHUTDOWN(mp))
799 continue;
800
801 tout = xfsaild_push(mp, &last_pushed_lsn);
802 }
803
804 return 0;
805} /* xfsaild */
806
807int
808xfsaild_start(
809 xfs_mount_t *mp)
810{
811 mp->m_ail.xa_target = 0;
812 mp->m_ail.xa_task = kthread_run(xfsaild, mp, "xfsaild");
813 if (IS_ERR(mp->m_ail.xa_task))
814 return -PTR_ERR(mp->m_ail.xa_task);
815 return 0;
816}
817
818void
819xfsaild_stop(
820 xfs_mount_t *mp)
821{
822 kthread_stop(mp->m_ail.xa_task);
823}
824
825
826
768STATIC struct inode * 827STATIC struct inode *
769xfs_fs_alloc_inode( 828xfs_fs_alloc_inode(
770 struct super_block *sb) 829 struct super_block *sb)
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 4c86a26330a0..b3ac3805d3c4 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -498,11 +498,14 @@ xfs_log_reserve(xfs_mount_t *mp,
498 * Return error or zero. 498 * Return error or zero.
499 */ 499 */
500int 500int
501xfs_log_mount(xfs_mount_t *mp, 501xfs_log_mount(
502 xfs_buftarg_t *log_target, 502 xfs_mount_t *mp,
503 xfs_daddr_t blk_offset, 503 xfs_buftarg_t *log_target,
504 int num_bblks) 504 xfs_daddr_t blk_offset,
505 int num_bblks)
505{ 506{
507 int error;
508
506 if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) 509 if (!(mp->m_flags & XFS_MOUNT_NORECOVERY))
507 cmn_err(CE_NOTE, "XFS mounting filesystem %s", mp->m_fsname); 510 cmn_err(CE_NOTE, "XFS mounting filesystem %s", mp->m_fsname);
508 else { 511 else {
@@ -515,11 +518,21 @@ xfs_log_mount(xfs_mount_t *mp,
515 mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks); 518 mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks);
516 519
517 /* 520 /*
521 * Initialize the AIL now we have a log.
522 */
523 spin_lock_init(&mp->m_ail_lock);
524 error = xfs_trans_ail_init(mp);
525 if (error) {
526 cmn_err(CE_WARN, "XFS: AIL initialisation failed: error %d", error);
527 goto error;
528 }
529
530 /*
518 * skip log recovery on a norecovery mount. pretend it all 531 * skip log recovery on a norecovery mount. pretend it all
519 * just worked. 532 * just worked.
520 */ 533 */
521 if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) { 534 if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) {
522 int error, readonly = (mp->m_flags & XFS_MOUNT_RDONLY); 535 int readonly = (mp->m_flags & XFS_MOUNT_RDONLY);
523 536
524 if (readonly) 537 if (readonly)
525 mp->m_flags &= ~XFS_MOUNT_RDONLY; 538 mp->m_flags &= ~XFS_MOUNT_RDONLY;
@@ -530,8 +543,7 @@ xfs_log_mount(xfs_mount_t *mp,
530 mp->m_flags |= XFS_MOUNT_RDONLY; 543 mp->m_flags |= XFS_MOUNT_RDONLY;
531 if (error) { 544 if (error) {
532 cmn_err(CE_WARN, "XFS: log mount/recovery failed: error %d", error); 545 cmn_err(CE_WARN, "XFS: log mount/recovery failed: error %d", error);
533 xlog_dealloc_log(mp->m_log); 546 goto error;
534 return error;
535 } 547 }
536 } 548 }
537 549
@@ -540,6 +552,9 @@ xfs_log_mount(xfs_mount_t *mp,
540 552
541 /* End mounting message in xfs_log_mount_finish */ 553 /* End mounting message in xfs_log_mount_finish */
542 return 0; 554 return 0;
555error:
556 xfs_log_unmount_dealloc(mp);
557 return error;
543} /* xfs_log_mount */ 558} /* xfs_log_mount */
544 559
545/* 560/*
@@ -722,10 +737,14 @@ xfs_log_unmount_write(xfs_mount_t *mp)
722 737
723/* 738/*
724 * Deallocate log structures for unmount/relocation. 739 * Deallocate log structures for unmount/relocation.
740 *
741 * We need to stop the aild from running before we destroy
742 * and deallocate the log as the aild references the log.
725 */ 743 */
726void 744void
727xfs_log_unmount_dealloc(xfs_mount_t *mp) 745xfs_log_unmount_dealloc(xfs_mount_t *mp)
728{ 746{
747 xfs_trans_ail_destroy(mp);
729 xlog_dealloc_log(mp->m_log); 748 xlog_dealloc_log(mp->m_log);
730} 749}
731 750
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 766c01670b2c..6409b3762995 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -136,15 +136,9 @@ xfs_mount_init(void)
136 mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; 136 mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
137 } 137 }
138 138
139 spin_lock_init(&mp->m_ail_lock);
140 spin_lock_init(&mp->m_sb_lock); 139 spin_lock_init(&mp->m_sb_lock);
141 mutex_init(&mp->m_ilock); 140 mutex_init(&mp->m_ilock);
142 mutex_init(&mp->m_growlock); 141 mutex_init(&mp->m_growlock);
143 /*
144 * Initialize the AIL.
145 */
146 xfs_trans_ail_init(mp);
147
148 atomic_set(&mp->m_active_trans, 0); 142 atomic_set(&mp->m_active_trans, 0);
149 143
150 return mp; 144 return mp;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 15025e01300a..f7c620ec6e69 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -219,12 +219,18 @@ extern void xfs_icsb_sync_counters_flags(struct xfs_mount *, int);
219#define xfs_icsb_sync_counters_flags(mp, flags) do { } while (0) 219#define xfs_icsb_sync_counters_flags(mp, flags) do { } while (0)
220#endif 220#endif
221 221
222typedef struct xfs_ail {
223 xfs_ail_entry_t xa_ail;
224 uint xa_gen;
225 struct task_struct *xa_task;
226 xfs_lsn_t xa_target;
227} xfs_ail_t;
228
222typedef struct xfs_mount { 229typedef struct xfs_mount {
223 struct super_block *m_super; 230 struct super_block *m_super;
224 xfs_tid_t m_tid; /* next unused tid for fs */ 231 xfs_tid_t m_tid; /* next unused tid for fs */
225 spinlock_t m_ail_lock; /* fs AIL mutex */ 232 spinlock_t m_ail_lock; /* fs AIL mutex */
226 xfs_ail_entry_t m_ail; /* fs active log item list */ 233 xfs_ail_t m_ail; /* fs active log item list */
227 uint m_ail_gen; /* fs AIL generation count */
228 xfs_sb_t m_sb; /* copy of fs superblock */ 234 xfs_sb_t m_sb; /* copy of fs superblock */
229 spinlock_t m_sb_lock; /* sb counter lock */ 235 spinlock_t m_sb_lock; /* sb counter lock */
230 struct xfs_buf *m_sb_bp; /* buffer for superblock */ 236 struct xfs_buf *m_sb_bp; /* buffer for superblock */
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index bea6e2b95a90..7f40628d85c7 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -992,8 +992,9 @@ int _xfs_trans_commit(xfs_trans_t *,
992 int *); 992 int *);
993#define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL) 993#define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL)
994void xfs_trans_cancel(xfs_trans_t *, int); 994void xfs_trans_cancel(xfs_trans_t *, int);
995void xfs_trans_ail_init(struct xfs_mount *); 995int xfs_trans_ail_init(struct xfs_mount *);
996xfs_lsn_t xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t); 996void xfs_trans_ail_destroy(struct xfs_mount *);
997void xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t);
997xfs_lsn_t xfs_trans_tail_ail(struct xfs_mount *); 998xfs_lsn_t xfs_trans_tail_ail(struct xfs_mount *);
998void xfs_trans_unlocked_item(struct xfs_mount *, 999void xfs_trans_unlocked_item(struct xfs_mount *,
999 xfs_log_item_t *); 1000 xfs_log_item_t *);
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 2d3c297259c2..7ed7425478de 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -57,7 +57,7 @@ xfs_trans_tail_ail(
57 xfs_log_item_t *lip; 57 xfs_log_item_t *lip;
58 58
59 spin_lock(&mp->m_ail_lock); 59 spin_lock(&mp->m_ail_lock);
60 lip = xfs_ail_min(&(mp->m_ail)); 60 lip = xfs_ail_min(&(mp->m_ail.xa_ail));
61 if (lip == NULL) { 61 if (lip == NULL) {
62 lsn = (xfs_lsn_t)0; 62 lsn = (xfs_lsn_t)0;
63 } else { 63 } else {
@@ -71,119 +71,185 @@ xfs_trans_tail_ail(
71/* 71/*
72 * xfs_trans_push_ail 72 * xfs_trans_push_ail
73 * 73 *
74 * This routine is called to move the tail of the AIL 74 * This routine is called to move the tail of the AIL forward. It does this by
75 * forward. It does this by trying to flush items in the AIL 75 * trying to flush items in the AIL whose lsns are below the given
76 * whose lsns are below the given threshold_lsn. 76 * threshold_lsn.
77 * 77 *
78 * The routine returns the lsn of the tail of the log. 78 * the push is run asynchronously in a separate thread, so we return the tail
79 * of the log right now instead of the tail after the push. This means we will
80 * either continue right away, or we will sleep waiting on the async thread to
81 * do it's work.
82 *
83 * We do this unlocked - we only need to know whether there is anything in the
84 * AIL at the time we are called. We don't need to access the contents of
85 * any of the objects, so the lock is not needed.
79 */ 86 */
80xfs_lsn_t 87void
81xfs_trans_push_ail( 88xfs_trans_push_ail(
82 xfs_mount_t *mp, 89 xfs_mount_t *mp,
83 xfs_lsn_t threshold_lsn) 90 xfs_lsn_t threshold_lsn)
84{ 91{
85 xfs_lsn_t lsn;
86 xfs_log_item_t *lip; 92 xfs_log_item_t *lip;
87 int gen;
88 int restarts;
89 int lock_result;
90 int flush_log;
91 93
92#define XFS_TRANS_PUSH_AIL_RESTARTS 1000 94 lip = xfs_ail_min(&mp->m_ail.xa_ail);
95 if (lip && !XFS_FORCED_SHUTDOWN(mp)) {
96 if (XFS_LSN_CMP(threshold_lsn, mp->m_ail.xa_target) > 0)
97 xfsaild_wakeup(mp, threshold_lsn);
98 }
99}
100
101/*
102 * Return the item in the AIL with the current lsn.
103 * Return the current tree generation number for use
104 * in calls to xfs_trans_next_ail().
105 */
106STATIC xfs_log_item_t *
107xfs_trans_first_push_ail(
108 xfs_mount_t *mp,
109 int *gen,
110 xfs_lsn_t lsn)
111{
112 xfs_log_item_t *lip;
113
114 lip = xfs_ail_min(&(mp->m_ail.xa_ail));
115 *gen = (int)mp->m_ail.xa_gen;
116 if (lsn == 0)
117 return lip;
118
119 while (lip && (XFS_LSN_CMP(lip->li_lsn, lsn) < 0))
120 lip = lip->li_ail.ail_forw;
121
122 return lip;
123}
124
125/*
126 * Function that does the work of pushing on the AIL
127 */
128long
129xfsaild_push(
130 xfs_mount_t *mp,
131 xfs_lsn_t *last_lsn)
132{
133 long tout = 1000; /* milliseconds */
134 xfs_lsn_t last_pushed_lsn = *last_lsn;
135 xfs_lsn_t target = mp->m_ail.xa_target;
136 xfs_lsn_t lsn;
137 xfs_log_item_t *lip;
138 int gen;
139 int restarts;
140 int flush_log, count, stuck;
141
142#define XFS_TRANS_PUSH_AIL_RESTARTS 10
93 143
94 spin_lock(&mp->m_ail_lock); 144 spin_lock(&mp->m_ail_lock);
95 lip = xfs_trans_first_ail(mp, &gen); 145 lip = xfs_trans_first_push_ail(mp, &gen, *last_lsn);
96 if (lip == NULL || XFS_FORCED_SHUTDOWN(mp)) { 146 if (!lip || XFS_FORCED_SHUTDOWN(mp)) {
97 /* 147 /*
98 * Just return if the AIL is empty. 148 * AIL is empty or our push has reached the end.
99 */ 149 */
100 spin_unlock(&mp->m_ail_lock); 150 spin_unlock(&mp->m_ail_lock);
101 return (xfs_lsn_t)0; 151 last_pushed_lsn = 0;
152 goto out;
102 } 153 }
103 154
104 XFS_STATS_INC(xs_push_ail); 155 XFS_STATS_INC(xs_push_ail);
105 156
106 /* 157 /*
107 * While the item we are looking at is below the given threshold 158 * While the item we are looking at is below the given threshold
108 * try to flush it out. Make sure to limit the number of times 159 * try to flush it out. We'd like not to stop until we've at least
109 * we allow xfs_trans_next_ail() to restart scanning from the
110 * beginning of the list. We'd like not to stop until we've at least
111 * tried to push on everything in the AIL with an LSN less than 160 * tried to push on everything in the AIL with an LSN less than
112 * the given threshold. However, we may give up before that if 161 * the given threshold.
113 * we realize that we've been holding the AIL lock for 'too long', 162 *
114 * blocking interrupts. Currently, too long is < 500us roughly. 163 * However, we will stop after a certain number of pushes and wait
164 * for a reduced timeout to fire before pushing further. This
165 * prevents use from spinning when we can't do anything or there is
166 * lots of contention on the AIL lists.
115 */ 167 */
116 flush_log = 0; 168 tout = 10;
117 restarts = 0; 169 lsn = lip->li_lsn;
118 while (((restarts < XFS_TRANS_PUSH_AIL_RESTARTS) && 170 flush_log = stuck = count = restarts = 0;
119 (XFS_LSN_CMP(lip->li_lsn, threshold_lsn) < 0))) { 171 while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) {
172 int lock_result;
120 /* 173 /*
121 * If we can lock the item without sleeping, unlock 174 * If we can lock the item without sleeping, unlock the AIL
122 * the AIL lock and flush the item. Then re-grab the 175 * lock and flush the item. Then re-grab the AIL lock so we
123 * AIL lock so we can look for the next item on the 176 * can look for the next item on the AIL. List changes are
124 * AIL. Since we unlock the AIL while we flush the 177 * handled by the AIL lookup functions internally
125 * item, the next routine may start over again at the
126 * the beginning of the list if anything has changed.
127 * That is what the generation count is for.
128 * 178 *
129 * If we can't lock the item, either its holder will flush 179 * If we can't lock the item, either its holder will flush it
130 * it or it is already being flushed or it is being relogged. 180 * or it is already being flushed or it is being relogged. In
131 * In any of these case it is being taken care of and we 181 * any of these case it is being taken care of and we can just
132 * can just skip to the next item in the list. 182 * skip to the next item in the list.
133 */ 183 */
134 lock_result = IOP_TRYLOCK(lip); 184 lock_result = IOP_TRYLOCK(lip);
185 spin_unlock(&mp->m_ail_lock);
135 switch (lock_result) { 186 switch (lock_result) {
136 case XFS_ITEM_SUCCESS: 187 case XFS_ITEM_SUCCESS:
137 spin_unlock(&mp->m_ail_lock);
138 XFS_STATS_INC(xs_push_ail_success); 188 XFS_STATS_INC(xs_push_ail_success);
139 IOP_PUSH(lip); 189 IOP_PUSH(lip);
140 spin_lock(&mp->m_ail_lock); 190 last_pushed_lsn = lsn;
141 break; 191 break;
142 192
143 case XFS_ITEM_PUSHBUF: 193 case XFS_ITEM_PUSHBUF:
144 spin_unlock(&mp->m_ail_lock);
145 XFS_STATS_INC(xs_push_ail_pushbuf); 194 XFS_STATS_INC(xs_push_ail_pushbuf);
146#ifdef XFSRACEDEBUG
147 delay_for_intr();
148 delay(300);
149#endif
150 ASSERT(lip->li_ops->iop_pushbuf);
151 ASSERT(lip);
152 IOP_PUSHBUF(lip); 195 IOP_PUSHBUF(lip);
153 spin_lock(&mp->m_ail_lock); 196 last_pushed_lsn = lsn;
154 break; 197 break;
155 198
156 case XFS_ITEM_PINNED: 199 case XFS_ITEM_PINNED:
157 XFS_STATS_INC(xs_push_ail_pinned); 200 XFS_STATS_INC(xs_push_ail_pinned);
201 stuck++;
158 flush_log = 1; 202 flush_log = 1;
159 break; 203 break;
160 204
161 case XFS_ITEM_LOCKED: 205 case XFS_ITEM_LOCKED:
162 XFS_STATS_INC(xs_push_ail_locked); 206 XFS_STATS_INC(xs_push_ail_locked);
207 last_pushed_lsn = lsn;
208 stuck++;
163 break; 209 break;
164 210
165 case XFS_ITEM_FLUSHING: 211 case XFS_ITEM_FLUSHING:
166 XFS_STATS_INC(xs_push_ail_flushing); 212 XFS_STATS_INC(xs_push_ail_flushing);
213 last_pushed_lsn = lsn;
214 stuck++;
167 break; 215 break;
168 216
169 default: 217 default:
170 ASSERT(0); 218 ASSERT(0);
171 break; 219 break;
172 } 220 }
173 221
174 lip = xfs_trans_next_ail(mp, lip, &gen, &restarts); 222 spin_lock(&mp->m_ail_lock);
175 if (lip == NULL) { 223 /* should we bother continuing? */
224 if (XFS_FORCED_SHUTDOWN(mp))
225 break;
226 ASSERT(mp->m_log);
227
228 count++;
229
230 /*
231 * Are there too many items we can't do anything with?
232 * If we we are skipping too many items because we can't flush
233 * them or they are already being flushed, we back off and
234 * given them time to complete whatever operation is being
235 * done. i.e. remove pressure from the AIL while we can't make
236 * progress so traversals don't slow down further inserts and
237 * removals to/from the AIL.
238 *
239 * The value of 100 is an arbitrary magic number based on
240 * observation.
241 */
242 if (stuck > 100)
176 break; 243 break;
177 }
178 if (XFS_FORCED_SHUTDOWN(mp)) {
179 /*
180 * Just return if we shut down during the last try.
181 */
182 spin_unlock(&mp->m_ail_lock);
183 return (xfs_lsn_t)0;
184 }
185 244
245 lip = xfs_trans_next_ail(mp, lip, &gen, &restarts);
246 if (lip == NULL)
247 break;
248 if (restarts > XFS_TRANS_PUSH_AIL_RESTARTS)
249 break;
250 lsn = lip->li_lsn;
186 } 251 }
252 spin_unlock(&mp->m_ail_lock);
187 253
188 if (flush_log) { 254 if (flush_log) {
189 /* 255 /*
@@ -191,22 +257,35 @@ xfs_trans_push_ail(
191 * push out the log so it will become unpinned and 257 * push out the log so it will become unpinned and
192 * move forward in the AIL. 258 * move forward in the AIL.
193 */ 259 */
194 spin_unlock(&mp->m_ail_lock);
195 XFS_STATS_INC(xs_push_ail_flush); 260 XFS_STATS_INC(xs_push_ail_flush);
196 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); 261 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
197 spin_lock(&mp->m_ail_lock);
198 } 262 }
199 263
200 lip = xfs_ail_min(&(mp->m_ail)); 264 /*
201 if (lip == NULL) { 265 * We reached the target so wait a bit longer for I/O to complete and
202 lsn = (xfs_lsn_t)0; 266 * remove pushed items from the AIL before we start the next scan from
203 } else { 267 * the start of the AIL.
204 lsn = lip->li_lsn; 268 */
269 if ((XFS_LSN_CMP(lsn, target) >= 0)) {
270 tout += 20;
271 last_pushed_lsn = 0;
272 } else if ((restarts > XFS_TRANS_PUSH_AIL_RESTARTS) ||
273 (count && ((stuck * 100) / count > 90))) {
274 /*
275 * Either there is a lot of contention on the AIL or we
276 * are stuck due to operations in progress. "Stuck" in this
277 * case is defined as >90% of the items we tried to push
278 * were stuck.
279 *
280 * Backoff a bit more to allow some I/O to complete before
281 * continuing from where we were.
282 */
283 tout += 10;
205 } 284 }
206 285out:
207 spin_unlock(&mp->m_ail_lock); 286 *last_lsn = last_pushed_lsn;
208 return lsn; 287 return tout;
209} /* xfs_trans_push_ail */ 288} /* xfsaild_push */
210 289
211 290
212/* 291/*
@@ -247,7 +326,7 @@ xfs_trans_unlocked_item(
247 * the call to xfs_log_move_tail() doesn't do anything if there's 326 * the call to xfs_log_move_tail() doesn't do anything if there's
248 * not enough free space to wake people up so we're safe calling it. 327 * not enough free space to wake people up so we're safe calling it.
249 */ 328 */
250 min_lip = xfs_ail_min(&mp->m_ail); 329 min_lip = xfs_ail_min(&mp->m_ail.xa_ail);
251 330
252 if (min_lip == lip) 331 if (min_lip == lip)
253 xfs_log_move_tail(mp, 1); 332 xfs_log_move_tail(mp, 1);
@@ -279,7 +358,7 @@ xfs_trans_update_ail(
279 xfs_log_item_t *dlip=NULL; 358 xfs_log_item_t *dlip=NULL;
280 xfs_log_item_t *mlip; /* ptr to minimum lip */ 359 xfs_log_item_t *mlip; /* ptr to minimum lip */
281 360
282 ailp = &(mp->m_ail); 361 ailp = &(mp->m_ail.xa_ail);
283 mlip = xfs_ail_min(ailp); 362 mlip = xfs_ail_min(ailp);
284 363
285 if (lip->li_flags & XFS_LI_IN_AIL) { 364 if (lip->li_flags & XFS_LI_IN_AIL) {
@@ -292,10 +371,10 @@ xfs_trans_update_ail(
292 lip->li_lsn = lsn; 371 lip->li_lsn = lsn;
293 372
294 xfs_ail_insert(ailp, lip); 373 xfs_ail_insert(ailp, lip);
295 mp->m_ail_gen++; 374 mp->m_ail.xa_gen++;
296 375
297 if (mlip == dlip) { 376 if (mlip == dlip) {
298 mlip = xfs_ail_min(&(mp->m_ail)); 377 mlip = xfs_ail_min(&(mp->m_ail.xa_ail));
299 spin_unlock(&mp->m_ail_lock); 378 spin_unlock(&mp->m_ail_lock);
300 xfs_log_move_tail(mp, mlip->li_lsn); 379 xfs_log_move_tail(mp, mlip->li_lsn);
301 } else { 380 } else {
@@ -330,7 +409,7 @@ xfs_trans_delete_ail(
330 xfs_log_item_t *mlip; 409 xfs_log_item_t *mlip;
331 410
332 if (lip->li_flags & XFS_LI_IN_AIL) { 411 if (lip->li_flags & XFS_LI_IN_AIL) {
333 ailp = &(mp->m_ail); 412 ailp = &(mp->m_ail.xa_ail);
334 mlip = xfs_ail_min(ailp); 413 mlip = xfs_ail_min(ailp);
335 dlip = xfs_ail_delete(ailp, lip); 414 dlip = xfs_ail_delete(ailp, lip);
336 ASSERT(dlip == lip); 415 ASSERT(dlip == lip);
@@ -338,10 +417,10 @@ xfs_trans_delete_ail(
338 417
339 lip->li_flags &= ~XFS_LI_IN_AIL; 418 lip->li_flags &= ~XFS_LI_IN_AIL;
340 lip->li_lsn = 0; 419 lip->li_lsn = 0;
341 mp->m_ail_gen++; 420 mp->m_ail.xa_gen++;
342 421
343 if (mlip == dlip) { 422 if (mlip == dlip) {
344 mlip = xfs_ail_min(&(mp->m_ail)); 423 mlip = xfs_ail_min(&(mp->m_ail.xa_ail));
345 spin_unlock(&mp->m_ail_lock); 424 spin_unlock(&mp->m_ail_lock);
346 xfs_log_move_tail(mp, (mlip ? mlip->li_lsn : 0)); 425 xfs_log_move_tail(mp, (mlip ? mlip->li_lsn : 0));
347 } else { 426 } else {
@@ -379,10 +458,10 @@ xfs_trans_first_ail(
379{ 458{
380 xfs_log_item_t *lip; 459 xfs_log_item_t *lip;
381 460
382 lip = xfs_ail_min(&(mp->m_ail)); 461 lip = xfs_ail_min(&(mp->m_ail.xa_ail));
383 *gen = (int)mp->m_ail_gen; 462 *gen = (int)mp->m_ail.xa_gen;
384 463
385 return (lip); 464 return lip;
386} 465}
387 466
388/* 467/*
@@ -402,11 +481,11 @@ xfs_trans_next_ail(
402 xfs_log_item_t *nlip; 481 xfs_log_item_t *nlip;
403 482
404 ASSERT(mp && lip && gen); 483 ASSERT(mp && lip && gen);
405 if (mp->m_ail_gen == *gen) { 484 if (mp->m_ail.xa_gen == *gen) {
406 nlip = xfs_ail_next(&(mp->m_ail), lip); 485 nlip = xfs_ail_next(&(mp->m_ail.xa_ail), lip);
407 } else { 486 } else {
408 nlip = xfs_ail_min(&(mp->m_ail)); 487 nlip = xfs_ail_min(&(mp->m_ail).xa_ail);
409 *gen = (int)mp->m_ail_gen; 488 *gen = (int)mp->m_ail.xa_gen;
410 if (restarts != NULL) { 489 if (restarts != NULL) {
411 XFS_STATS_INC(xs_push_ail_restarts); 490 XFS_STATS_INC(xs_push_ail_restarts);
412 (*restarts)++; 491 (*restarts)++;
@@ -431,12 +510,20 @@ xfs_trans_next_ail(
431/* 510/*
432 * Initialize the doubly linked list to point only to itself. 511 * Initialize the doubly linked list to point only to itself.
433 */ 512 */
434void 513int
435xfs_trans_ail_init( 514xfs_trans_ail_init(
436 xfs_mount_t *mp) 515 xfs_mount_t *mp)
437{ 516{
438 mp->m_ail.ail_forw = (xfs_log_item_t*)&(mp->m_ail); 517 mp->m_ail.xa_ail.ail_forw = (xfs_log_item_t*)&mp->m_ail.xa_ail;
439 mp->m_ail.ail_back = (xfs_log_item_t*)&(mp->m_ail); 518 mp->m_ail.xa_ail.ail_back = (xfs_log_item_t*)&mp->m_ail.xa_ail;
519 return xfsaild_start(mp);
520}
521
522void
523xfs_trans_ail_destroy(
524 xfs_mount_t *mp)
525{
526 xfsaild_stop(mp);
440} 527}
441 528
442/* 529/*
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index c40f0512097a..3c748c456ed4 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -57,4 +57,12 @@ struct xfs_log_item *xfs_trans_next_ail(struct xfs_mount *,
57 struct xfs_log_item *, int *, int *); 57 struct xfs_log_item *, int *, int *);
58 58
59 59
60/*
61 * AIL push thread support
62 */
63long xfsaild_push(struct xfs_mount *, xfs_lsn_t *);
64void xfsaild_wakeup(struct xfs_mount *, xfs_lsn_t);
65int xfsaild_start(struct xfs_mount *);
66void xfsaild_stop(struct xfs_mount *);
67
60#endif /* __XFS_TRANS_PRIV_H__ */ 68#endif /* __XFS_TRANS_PRIV_H__ */