aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_trans_ail.c
diff options
context:
space:
mode:
authorDavid Chinner <dgc@sgi.com>2008-02-04 20:13:32 -0500
committerLachlan McIlroy <lachlan@redback.melbourne.sgi.com>2008-02-07 02:22:51 -0500
commit249a8c1124653fa90f3a3afff869095a31bc229f (patch)
treee0681990b5b61155a64a9fd3c0cf73d4d6bb4ce5 /fs/xfs/xfs_trans_ail.c
parent4576758db5817a91b8974c696247d459dc653db2 (diff)
[XFS] Move AIL pushing into it's own thread
When many hundreds to thousands of threads all try to do simultaneous transactions and the log is in a tail-pushing situation (i.e. full), we can get multiple threads walking the AIL list and contending on the AIL lock. The AIL push is, in effect, a simple I/O dispatch algorithm complicated by the ordering constraints placed on it by the transaction subsystem. It really does not need multiple threads to push on it - even when only a single CPU is pushing the AIL, it can push the I/O out far faster that pretty much any disk subsystem can handle. So, to avoid contention problems stemming from multiple list walkers, move the list walk off into another thread and simply provide a "target" to push to. When a thread requires a push, it sets the target and wakes the push thread, then goes to sleep waiting for the required amount of space to become available in the log. This mechanism should also be a lot fairer under heavy load as the waiters will queue in arrival order, rather than queuing in "who completed a push first" order. Also, by moving the pushing to a separate thread we can do more effectively overload detection and prevention as we can keep context from loop iteration to loop iteration. That is, we can push only part of the list each loop and not have to loop back to the start of the list every time we run. This should also help by reducing the number of items we try to lock and/or push items that we cannot move. Note that this patch is not intended to solve the inefficiencies in the AIL structure and the associated issues with extremely large list contents. That needs to be addresses separately; parallel access would cause problems to any new structure as well, so I'm only aiming to isolate the structure from unbounded parallelism here. SGI-PV: 972759 SGI-Modid: xfs-linux-melb:xfs-kern:30371a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_trans_ail.c')
-rw-r--r--fs/xfs/xfs_trans_ail.c269
1 files changed, 178 insertions, 91 deletions
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 2d3c297259c2..7ed7425478de 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -57,7 +57,7 @@ xfs_trans_tail_ail(
57 xfs_log_item_t *lip; 57 xfs_log_item_t *lip;
58 58
59 spin_lock(&mp->m_ail_lock); 59 spin_lock(&mp->m_ail_lock);
60 lip = xfs_ail_min(&(mp->m_ail)); 60 lip = xfs_ail_min(&(mp->m_ail.xa_ail));
61 if (lip == NULL) { 61 if (lip == NULL) {
62 lsn = (xfs_lsn_t)0; 62 lsn = (xfs_lsn_t)0;
63 } else { 63 } else {
@@ -71,119 +71,185 @@ xfs_trans_tail_ail(
71/* 71/*
72 * xfs_trans_push_ail 72 * xfs_trans_push_ail
73 * 73 *
74 * This routine is called to move the tail of the AIL 74 * This routine is called to move the tail of the AIL forward. It does this by
75 * forward. It does this by trying to flush items in the AIL 75 * trying to flush items in the AIL whose lsns are below the given
76 * whose lsns are below the given threshold_lsn. 76 * threshold_lsn.
77 * 77 *
78 * The routine returns the lsn of the tail of the log. 78 * the push is run asynchronously in a separate thread, so we return the tail
79 * of the log right now instead of the tail after the push. This means we will
80 * either continue right away, or we will sleep waiting on the async thread to
81 * do it's work.
82 *
83 * We do this unlocked - we only need to know whether there is anything in the
84 * AIL at the time we are called. We don't need to access the contents of
85 * any of the objects, so the lock is not needed.
79 */ 86 */
80xfs_lsn_t 87void
81xfs_trans_push_ail( 88xfs_trans_push_ail(
82 xfs_mount_t *mp, 89 xfs_mount_t *mp,
83 xfs_lsn_t threshold_lsn) 90 xfs_lsn_t threshold_lsn)
84{ 91{
85 xfs_lsn_t lsn;
86 xfs_log_item_t *lip; 92 xfs_log_item_t *lip;
87 int gen;
88 int restarts;
89 int lock_result;
90 int flush_log;
91 93
92#define XFS_TRANS_PUSH_AIL_RESTARTS 1000 94 lip = xfs_ail_min(&mp->m_ail.xa_ail);
95 if (lip && !XFS_FORCED_SHUTDOWN(mp)) {
96 if (XFS_LSN_CMP(threshold_lsn, mp->m_ail.xa_target) > 0)
97 xfsaild_wakeup(mp, threshold_lsn);
98 }
99}
100
101/*
102 * Return the item in the AIL with the current lsn.
103 * Return the current tree generation number for use
104 * in calls to xfs_trans_next_ail().
105 */
106STATIC xfs_log_item_t *
107xfs_trans_first_push_ail(
108 xfs_mount_t *mp,
109 int *gen,
110 xfs_lsn_t lsn)
111{
112 xfs_log_item_t *lip;
113
114 lip = xfs_ail_min(&(mp->m_ail.xa_ail));
115 *gen = (int)mp->m_ail.xa_gen;
116 if (lsn == 0)
117 return lip;
118
119 while (lip && (XFS_LSN_CMP(lip->li_lsn, lsn) < 0))
120 lip = lip->li_ail.ail_forw;
121
122 return lip;
123}
124
125/*
126 * Function that does the work of pushing on the AIL
127 */
128long
129xfsaild_push(
130 xfs_mount_t *mp,
131 xfs_lsn_t *last_lsn)
132{
133 long tout = 1000; /* milliseconds */
134 xfs_lsn_t last_pushed_lsn = *last_lsn;
135 xfs_lsn_t target = mp->m_ail.xa_target;
136 xfs_lsn_t lsn;
137 xfs_log_item_t *lip;
138 int gen;
139 int restarts;
140 int flush_log, count, stuck;
141
142#define XFS_TRANS_PUSH_AIL_RESTARTS 10
93 143
94 spin_lock(&mp->m_ail_lock); 144 spin_lock(&mp->m_ail_lock);
95 lip = xfs_trans_first_ail(mp, &gen); 145 lip = xfs_trans_first_push_ail(mp, &gen, *last_lsn);
96 if (lip == NULL || XFS_FORCED_SHUTDOWN(mp)) { 146 if (!lip || XFS_FORCED_SHUTDOWN(mp)) {
97 /* 147 /*
98 * Just return if the AIL is empty. 148 * AIL is empty or our push has reached the end.
99 */ 149 */
100 spin_unlock(&mp->m_ail_lock); 150 spin_unlock(&mp->m_ail_lock);
101 return (xfs_lsn_t)0; 151 last_pushed_lsn = 0;
152 goto out;
102 } 153 }
103 154
104 XFS_STATS_INC(xs_push_ail); 155 XFS_STATS_INC(xs_push_ail);
105 156
106 /* 157 /*
107 * While the item we are looking at is below the given threshold 158 * While the item we are looking at is below the given threshold
108 * try to flush it out. Make sure to limit the number of times 159 * try to flush it out. We'd like not to stop until we've at least
109 * we allow xfs_trans_next_ail() to restart scanning from the
110 * beginning of the list. We'd like not to stop until we've at least
111 * tried to push on everything in the AIL with an LSN less than 160 * tried to push on everything in the AIL with an LSN less than
112 * the given threshold. However, we may give up before that if 161 * the given threshold.
113 * we realize that we've been holding the AIL lock for 'too long', 162 *
114 * blocking interrupts. Currently, too long is < 500us roughly. 163 * However, we will stop after a certain number of pushes and wait
164 * for a reduced timeout to fire before pushing further. This
165 * prevents use from spinning when we can't do anything or there is
166 * lots of contention on the AIL lists.
115 */ 167 */
116 flush_log = 0; 168 tout = 10;
117 restarts = 0; 169 lsn = lip->li_lsn;
118 while (((restarts < XFS_TRANS_PUSH_AIL_RESTARTS) && 170 flush_log = stuck = count = restarts = 0;
119 (XFS_LSN_CMP(lip->li_lsn, threshold_lsn) < 0))) { 171 while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) {
172 int lock_result;
120 /* 173 /*
121 * If we can lock the item without sleeping, unlock 174 * If we can lock the item without sleeping, unlock the AIL
122 * the AIL lock and flush the item. Then re-grab the 175 * lock and flush the item. Then re-grab the AIL lock so we
123 * AIL lock so we can look for the next item on the 176 * can look for the next item on the AIL. List changes are
124 * AIL. Since we unlock the AIL while we flush the 177 * handled by the AIL lookup functions internally
125 * item, the next routine may start over again at the
126 * the beginning of the list if anything has changed.
127 * That is what the generation count is for.
128 * 178 *
129 * If we can't lock the item, either its holder will flush 179 * If we can't lock the item, either its holder will flush it
130 * it or it is already being flushed or it is being relogged. 180 * or it is already being flushed or it is being relogged. In
131 * In any of these case it is being taken care of and we 181 * any of these case it is being taken care of and we can just
132 * can just skip to the next item in the list. 182 * skip to the next item in the list.
133 */ 183 */
134 lock_result = IOP_TRYLOCK(lip); 184 lock_result = IOP_TRYLOCK(lip);
185 spin_unlock(&mp->m_ail_lock);
135 switch (lock_result) { 186 switch (lock_result) {
136 case XFS_ITEM_SUCCESS: 187 case XFS_ITEM_SUCCESS:
137 spin_unlock(&mp->m_ail_lock);
138 XFS_STATS_INC(xs_push_ail_success); 188 XFS_STATS_INC(xs_push_ail_success);
139 IOP_PUSH(lip); 189 IOP_PUSH(lip);
140 spin_lock(&mp->m_ail_lock); 190 last_pushed_lsn = lsn;
141 break; 191 break;
142 192
143 case XFS_ITEM_PUSHBUF: 193 case XFS_ITEM_PUSHBUF:
144 spin_unlock(&mp->m_ail_lock);
145 XFS_STATS_INC(xs_push_ail_pushbuf); 194 XFS_STATS_INC(xs_push_ail_pushbuf);
146#ifdef XFSRACEDEBUG
147 delay_for_intr();
148 delay(300);
149#endif
150 ASSERT(lip->li_ops->iop_pushbuf);
151 ASSERT(lip);
152 IOP_PUSHBUF(lip); 195 IOP_PUSHBUF(lip);
153 spin_lock(&mp->m_ail_lock); 196 last_pushed_lsn = lsn;
154 break; 197 break;
155 198
156 case XFS_ITEM_PINNED: 199 case XFS_ITEM_PINNED:
157 XFS_STATS_INC(xs_push_ail_pinned); 200 XFS_STATS_INC(xs_push_ail_pinned);
201 stuck++;
158 flush_log = 1; 202 flush_log = 1;
159 break; 203 break;
160 204
161 case XFS_ITEM_LOCKED: 205 case XFS_ITEM_LOCKED:
162 XFS_STATS_INC(xs_push_ail_locked); 206 XFS_STATS_INC(xs_push_ail_locked);
207 last_pushed_lsn = lsn;
208 stuck++;
163 break; 209 break;
164 210
165 case XFS_ITEM_FLUSHING: 211 case XFS_ITEM_FLUSHING:
166 XFS_STATS_INC(xs_push_ail_flushing); 212 XFS_STATS_INC(xs_push_ail_flushing);
213 last_pushed_lsn = lsn;
214 stuck++;
167 break; 215 break;
168 216
169 default: 217 default:
170 ASSERT(0); 218 ASSERT(0);
171 break; 219 break;
172 } 220 }
173 221
174 lip = xfs_trans_next_ail(mp, lip, &gen, &restarts); 222 spin_lock(&mp->m_ail_lock);
175 if (lip == NULL) { 223 /* should we bother continuing? */
224 if (XFS_FORCED_SHUTDOWN(mp))
225 break;
226 ASSERT(mp->m_log);
227
228 count++;
229
230 /*
231 * Are there too many items we can't do anything with?
232 * If we we are skipping too many items because we can't flush
233 * them or they are already being flushed, we back off and
234 * given them time to complete whatever operation is being
235 * done. i.e. remove pressure from the AIL while we can't make
236 * progress so traversals don't slow down further inserts and
237 * removals to/from the AIL.
238 *
239 * The value of 100 is an arbitrary magic number based on
240 * observation.
241 */
242 if (stuck > 100)
176 break; 243 break;
177 }
178 if (XFS_FORCED_SHUTDOWN(mp)) {
179 /*
180 * Just return if we shut down during the last try.
181 */
182 spin_unlock(&mp->m_ail_lock);
183 return (xfs_lsn_t)0;
184 }
185 244
245 lip = xfs_trans_next_ail(mp, lip, &gen, &restarts);
246 if (lip == NULL)
247 break;
248 if (restarts > XFS_TRANS_PUSH_AIL_RESTARTS)
249 break;
250 lsn = lip->li_lsn;
186 } 251 }
252 spin_unlock(&mp->m_ail_lock);
187 253
188 if (flush_log) { 254 if (flush_log) {
189 /* 255 /*
@@ -191,22 +257,35 @@ xfs_trans_push_ail(
191 * push out the log so it will become unpinned and 257 * push out the log so it will become unpinned and
192 * move forward in the AIL. 258 * move forward in the AIL.
193 */ 259 */
194 spin_unlock(&mp->m_ail_lock);
195 XFS_STATS_INC(xs_push_ail_flush); 260 XFS_STATS_INC(xs_push_ail_flush);
196 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); 261 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
197 spin_lock(&mp->m_ail_lock);
198 } 262 }
199 263
200 lip = xfs_ail_min(&(mp->m_ail)); 264 /*
201 if (lip == NULL) { 265 * We reached the target so wait a bit longer for I/O to complete and
202 lsn = (xfs_lsn_t)0; 266 * remove pushed items from the AIL before we start the next scan from
203 } else { 267 * the start of the AIL.
204 lsn = lip->li_lsn; 268 */
269 if ((XFS_LSN_CMP(lsn, target) >= 0)) {
270 tout += 20;
271 last_pushed_lsn = 0;
272 } else if ((restarts > XFS_TRANS_PUSH_AIL_RESTARTS) ||
273 (count && ((stuck * 100) / count > 90))) {
274 /*
275 * Either there is a lot of contention on the AIL or we
276 * are stuck due to operations in progress. "Stuck" in this
277 * case is defined as >90% of the items we tried to push
278 * were stuck.
279 *
280 * Backoff a bit more to allow some I/O to complete before
281 * continuing from where we were.
282 */
283 tout += 10;
205 } 284 }
206 285out:
207 spin_unlock(&mp->m_ail_lock); 286 *last_lsn = last_pushed_lsn;
208 return lsn; 287 return tout;
209} /* xfs_trans_push_ail */ 288} /* xfsaild_push */
210 289
211 290
212/* 291/*
@@ -247,7 +326,7 @@ xfs_trans_unlocked_item(
247 * the call to xfs_log_move_tail() doesn't do anything if there's 326 * the call to xfs_log_move_tail() doesn't do anything if there's
248 * not enough free space to wake people up so we're safe calling it. 327 * not enough free space to wake people up so we're safe calling it.
249 */ 328 */
250 min_lip = xfs_ail_min(&mp->m_ail); 329 min_lip = xfs_ail_min(&mp->m_ail.xa_ail);
251 330
252 if (min_lip == lip) 331 if (min_lip == lip)
253 xfs_log_move_tail(mp, 1); 332 xfs_log_move_tail(mp, 1);
@@ -279,7 +358,7 @@ xfs_trans_update_ail(
279 xfs_log_item_t *dlip=NULL; 358 xfs_log_item_t *dlip=NULL;
280 xfs_log_item_t *mlip; /* ptr to minimum lip */ 359 xfs_log_item_t *mlip; /* ptr to minimum lip */
281 360
282 ailp = &(mp->m_ail); 361 ailp = &(mp->m_ail.xa_ail);
283 mlip = xfs_ail_min(ailp); 362 mlip = xfs_ail_min(ailp);
284 363
285 if (lip->li_flags & XFS_LI_IN_AIL) { 364 if (lip->li_flags & XFS_LI_IN_AIL) {
@@ -292,10 +371,10 @@ xfs_trans_update_ail(
292 lip->li_lsn = lsn; 371 lip->li_lsn = lsn;
293 372
294 xfs_ail_insert(ailp, lip); 373 xfs_ail_insert(ailp, lip);
295 mp->m_ail_gen++; 374 mp->m_ail.xa_gen++;
296 375
297 if (mlip == dlip) { 376 if (mlip == dlip) {
298 mlip = xfs_ail_min(&(mp->m_ail)); 377 mlip = xfs_ail_min(&(mp->m_ail.xa_ail));
299 spin_unlock(&mp->m_ail_lock); 378 spin_unlock(&mp->m_ail_lock);
300 xfs_log_move_tail(mp, mlip->li_lsn); 379 xfs_log_move_tail(mp, mlip->li_lsn);
301 } else { 380 } else {
@@ -330,7 +409,7 @@ xfs_trans_delete_ail(
330 xfs_log_item_t *mlip; 409 xfs_log_item_t *mlip;
331 410
332 if (lip->li_flags & XFS_LI_IN_AIL) { 411 if (lip->li_flags & XFS_LI_IN_AIL) {
333 ailp = &(mp->m_ail); 412 ailp = &(mp->m_ail.xa_ail);
334 mlip = xfs_ail_min(ailp); 413 mlip = xfs_ail_min(ailp);
335 dlip = xfs_ail_delete(ailp, lip); 414 dlip = xfs_ail_delete(ailp, lip);
336 ASSERT(dlip == lip); 415 ASSERT(dlip == lip);
@@ -338,10 +417,10 @@ xfs_trans_delete_ail(
338 417
339 lip->li_flags &= ~XFS_LI_IN_AIL; 418 lip->li_flags &= ~XFS_LI_IN_AIL;
340 lip->li_lsn = 0; 419 lip->li_lsn = 0;
341 mp->m_ail_gen++; 420 mp->m_ail.xa_gen++;
342 421
343 if (mlip == dlip) { 422 if (mlip == dlip) {
344 mlip = xfs_ail_min(&(mp->m_ail)); 423 mlip = xfs_ail_min(&(mp->m_ail.xa_ail));
345 spin_unlock(&mp->m_ail_lock); 424 spin_unlock(&mp->m_ail_lock);
346 xfs_log_move_tail(mp, (mlip ? mlip->li_lsn : 0)); 425 xfs_log_move_tail(mp, (mlip ? mlip->li_lsn : 0));
347 } else { 426 } else {
@@ -379,10 +458,10 @@ xfs_trans_first_ail(
379{ 458{
380 xfs_log_item_t *lip; 459 xfs_log_item_t *lip;
381 460
382 lip = xfs_ail_min(&(mp->m_ail)); 461 lip = xfs_ail_min(&(mp->m_ail.xa_ail));
383 *gen = (int)mp->m_ail_gen; 462 *gen = (int)mp->m_ail.xa_gen;
384 463
385 return (lip); 464 return lip;
386} 465}
387 466
388/* 467/*
@@ -402,11 +481,11 @@ xfs_trans_next_ail(
402 xfs_log_item_t *nlip; 481 xfs_log_item_t *nlip;
403 482
404 ASSERT(mp && lip && gen); 483 ASSERT(mp && lip && gen);
405 if (mp->m_ail_gen == *gen) { 484 if (mp->m_ail.xa_gen == *gen) {
406 nlip = xfs_ail_next(&(mp->m_ail), lip); 485 nlip = xfs_ail_next(&(mp->m_ail.xa_ail), lip);
407 } else { 486 } else {
408 nlip = xfs_ail_min(&(mp->m_ail)); 487 nlip = xfs_ail_min(&(mp->m_ail).xa_ail);
409 *gen = (int)mp->m_ail_gen; 488 *gen = (int)mp->m_ail.xa_gen;
410 if (restarts != NULL) { 489 if (restarts != NULL) {
411 XFS_STATS_INC(xs_push_ail_restarts); 490 XFS_STATS_INC(xs_push_ail_restarts);
412 (*restarts)++; 491 (*restarts)++;
@@ -431,12 +510,20 @@ xfs_trans_next_ail(
431/* 510/*
432 * Initialize the doubly linked list to point only to itself. 511 * Initialize the doubly linked list to point only to itself.
433 */ 512 */
434void 513int
435xfs_trans_ail_init( 514xfs_trans_ail_init(
436 xfs_mount_t *mp) 515 xfs_mount_t *mp)
437{ 516{
438 mp->m_ail.ail_forw = (xfs_log_item_t*)&(mp->m_ail); 517 mp->m_ail.xa_ail.ail_forw = (xfs_log_item_t*)&mp->m_ail.xa_ail;
439 mp->m_ail.ail_back = (xfs_log_item_t*)&(mp->m_ail); 518 mp->m_ail.xa_ail.ail_back = (xfs_log_item_t*)&mp->m_ail.xa_ail;
519 return xfsaild_start(mp);
520}
521
522void
523xfs_trans_ail_destroy(
524 xfs_mount_t *mp)
525{
526 xfsaild_stop(mp);
440} 527}
441 528
442/* 529/*