aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_log_cil.c
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2010-05-21 00:37:18 -0400
committerAlex Elder <aelder@sgi.com>2010-05-24 11:38:03 -0400
commit71e330b593905e40d6c5afa824d38ee02d70ce5f (patch)
tree4c9fa6c4766280752fc40f3057fd6cf64396c16c /fs/xfs/xfs_log_cil.c
parenta9a745daadab26f13884ff26a50fa38247c11ce9 (diff)
xfs: Introduce delayed logging core code
The delayed logging code only changes in-memory structures and as such can be enabled and disabled with a mount option. Add the mount option and emit a warning that this is an experimental feature that should not be used in production yet. We also need infrastructure to track committed items that have not yet been written to the log. This is what the Committed Item List (CIL) is for. The log item also needs to be extended to track the current log vector, the associated memory buffer and it's location in the Commit Item List. Extend the log item and log vector structures to enable this tracking. To maintain the current log format for transactions with delayed logging, we need to introduce a checkpoint transaction and a context for tracking each checkpoint from initiation to transaction completion. This includes adding a log ticket for tracking space log required/used by the context checkpoint. To track all the changes we need an io vector array per log item, rather than a single array for the entire transaction. Using the new log vector structure for this requires two passes - the first to allocate the log vector structures and chain them together, and the second to fill them out. This log vector chain can then be passed to the CIL for formatting, pinning and insertion into the CIL. Formatting of the log vector chain is relatively simple - it's just a loop over the iovecs on each log vector, but it is made slightly more complex because we re-write the iovec after the copy to point back at the memory buffer we just copied into. This code also needs to pin log items. If the log item is not already tracked in this checkpoint context, then it needs to be pinned. Otherwise it is already pinned and we don't need to pin it again. The only other complexity is calculating the amount of new log space the formatting has consumed. This needs to be accounted to the transaction in progress, and the accounting is made more complex becase we need also to steal space from it for log metadata in the checkpoint transaction. Calculate all this at insert time and update all the tickets, counters, etc correctly. Once we've formatted all the log items in the transaction, attach the busy extents to the checkpoint context so the busy extents live until checkpoint completion and can be processed at that point in time. Transactions can then be freed at this point in time. Now we need to issue checkpoints - we are tracking the amount of log space used by the items in the CIL, so we can trigger background checkpoints when the space usage gets to a certain threshold. Otherwise, checkpoints need ot be triggered when a log synchronisation point is reached - a log force event. Because the log write code already handles chained log vectors, writing the transaction is trivial, too. Construct a transaction header, add it to the head of the chain and write it into the log, then issue a commit record write. Then we can release the checkpoint log ticket and attach the context to the log buffer so it can be called during Io completion to complete the checkpoint. We also need to allow for synchronising multiple in-flight checkpoints. This is needed for two things - the first is to ensure that checkpoint commit records appear in the log in the correct sequence order (so they are replayed in the correct order). The second is so that xfs_log_force_lsn() operates correctly and only flushes and/or waits for the specific sequence it was provided with. To do this we need a wait variable and a list tracking the checkpoint commits in progress. We can walk this list and wait for the checkpoints to change state or complete easily, an this provides the necessary synchronisation for correct operation in both cases. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Alex Elder <aelder@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_log_cil.c')
-rw-r--r--fs/xfs/xfs_log_cil.c659
1 files changed, 659 insertions, 0 deletions
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
new file mode 100644
index 000000000000..53abd6b0a333
--- /dev/null
+++ b/fs/xfs/xfs_log_cil.c
@@ -0,0 +1,659 @@
1/*
2 * Copyright (c) 2010 Red Hat, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write the Free Software Foundation,
15 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16 */
17
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_bit.h"
22#include "xfs_log.h"
23#include "xfs_inum.h"
24#include "xfs_trans.h"
25#include "xfs_trans_priv.h"
26#include "xfs_log_priv.h"
27#include "xfs_sb.h"
28#include "xfs_ag.h"
29#include "xfs_dir2.h"
30#include "xfs_dmapi.h"
31#include "xfs_mount.h"
32#include "xfs_error.h"
33#include "xfs_alloc.h"
34
35/*
36 * Perform initial CIL structure initialisation. If the CIL is not
37 * enabled in this filesystem, ensure the log->l_cilp is null so
38 * we can check this conditional to determine if we are doing delayed
39 * logging or not.
40 */
41int
42xlog_cil_init(
43 struct log *log)
44{
45 struct xfs_cil *cil;
46 struct xfs_cil_ctx *ctx;
47
48 log->l_cilp = NULL;
49 if (!(log->l_mp->m_flags & XFS_MOUNT_DELAYLOG))
50 return 0;
51
52 cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL);
53 if (!cil)
54 return ENOMEM;
55
56 ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL);
57 if (!ctx) {
58 kmem_free(cil);
59 return ENOMEM;
60 }
61
62 INIT_LIST_HEAD(&cil->xc_cil);
63 INIT_LIST_HEAD(&cil->xc_committing);
64 spin_lock_init(&cil->xc_cil_lock);
65 init_rwsem(&cil->xc_ctx_lock);
66 sv_init(&cil->xc_commit_wait, SV_DEFAULT, "cilwait");
67
68 INIT_LIST_HEAD(&ctx->committing);
69 INIT_LIST_HEAD(&ctx->busy_extents);
70 ctx->sequence = 1;
71 ctx->cil = cil;
72 cil->xc_ctx = ctx;
73
74 cil->xc_log = log;
75 log->l_cilp = cil;
76 return 0;
77}
78
79void
80xlog_cil_destroy(
81 struct log *log)
82{
83 if (!log->l_cilp)
84 return;
85
86 if (log->l_cilp->xc_ctx) {
87 if (log->l_cilp->xc_ctx->ticket)
88 xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket);
89 kmem_free(log->l_cilp->xc_ctx);
90 }
91
92 ASSERT(list_empty(&log->l_cilp->xc_cil));
93 kmem_free(log->l_cilp);
94}
95
96/*
97 * Allocate a new ticket. Failing to get a new ticket makes it really hard to
98 * recover, so we don't allow failure here. Also, we allocate in a context that
99 * we don't want to be issuing transactions from, so we need to tell the
100 * allocation code this as well.
101 *
102 * We don't reserve any space for the ticket - we are going to steal whatever
103 * space we require from transactions as they commit. To ensure we reserve all
104 * the space required, we need to set the current reservation of the ticket to
105 * zero so that we know to steal the initial transaction overhead from the
106 * first transaction commit.
107 */
108static struct xlog_ticket *
109xlog_cil_ticket_alloc(
110 struct log *log)
111{
112 struct xlog_ticket *tic;
113
114 tic = xlog_ticket_alloc(log, 0, 1, XFS_TRANSACTION, 0,
115 KM_SLEEP|KM_NOFS);
116 tic->t_trans_type = XFS_TRANS_CHECKPOINT;
117
118 /*
119 * set the current reservation to zero so we know to steal the basic
120 * transaction overhead reservation from the first transaction commit.
121 */
122 tic->t_curr_res = 0;
123 return tic;
124}
125
126/*
127 * After the first stage of log recovery is done, we know where the head and
128 * tail of the log are. We need this log initialisation done before we can
129 * initialise the first CIL checkpoint context.
130 *
131 * Here we allocate a log ticket to track space usage during a CIL push. This
132 * ticket is passed to xlog_write() directly so that we don't slowly leak log
133 * space by failing to account for space used by log headers and additional
134 * region headers for split regions.
135 */
136void
137xlog_cil_init_post_recovery(
138 struct log *log)
139{
140 if (!log->l_cilp)
141 return;
142
143 log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log);
144 log->l_cilp->xc_ctx->sequence = 1;
145 log->l_cilp->xc_ctx->commit_lsn = xlog_assign_lsn(log->l_curr_cycle,
146 log->l_curr_block);
147}
148
149/*
150 * Insert the log item into the CIL and calculate the difference in space
151 * consumed by the item. Add the space to the checkpoint ticket and calculate
152 * if the change requires additional log metadata. If it does, take that space
153 * as well. Remove the amount of space we addded to the checkpoint ticket from
154 * the current transaction ticket so that the accounting works out correctly.
155 *
156 * If this is the first time the item is being placed into the CIL in this
157 * context, pin it so it can't be written to disk until the CIL is flushed to
158 * the iclog and the iclog written to disk.
159 */
160static void
161xlog_cil_insert(
162 struct log *log,
163 struct xlog_ticket *ticket,
164 struct xfs_log_item *item,
165 struct xfs_log_vec *lv)
166{
167 struct xfs_cil *cil = log->l_cilp;
168 struct xfs_log_vec *old = lv->lv_item->li_lv;
169 struct xfs_cil_ctx *ctx = cil->xc_ctx;
170 int len;
171 int diff_iovecs;
172 int iclog_space;
173
174 if (old) {
175 /* existing lv on log item, space used is a delta */
176 ASSERT(!list_empty(&item->li_cil));
177 ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs);
178
179 len = lv->lv_buf_len - old->lv_buf_len;
180 diff_iovecs = lv->lv_niovecs - old->lv_niovecs;
181 kmem_free(old->lv_buf);
182 kmem_free(old);
183 } else {
184 /* new lv, must pin the log item */
185 ASSERT(!lv->lv_item->li_lv);
186 ASSERT(list_empty(&item->li_cil));
187
188 len = lv->lv_buf_len;
189 diff_iovecs = lv->lv_niovecs;
190 IOP_PIN(lv->lv_item);
191
192 }
193 len += diff_iovecs * sizeof(xlog_op_header_t);
194
195 /* attach new log vector to log item */
196 lv->lv_item->li_lv = lv;
197
198 spin_lock(&cil->xc_cil_lock);
199 list_move_tail(&item->li_cil, &cil->xc_cil);
200 ctx->nvecs += diff_iovecs;
201
202 /*
203 * Now transfer enough transaction reservation to the context ticket
204 * for the checkpoint. The context ticket is special - the unit
205 * reservation has to grow as well as the current reservation as we
206 * steal from tickets so we can correctly determine the space used
207 * during the transaction commit.
208 */
209 if (ctx->ticket->t_curr_res == 0) {
210 /* first commit in checkpoint, steal the header reservation */
211 ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len);
212 ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
213 ticket->t_curr_res -= ctx->ticket->t_unit_res;
214 }
215
216 /* do we need space for more log record headers? */
217 iclog_space = log->l_iclog_size - log->l_iclog_hsize;
218 if (len > 0 && (ctx->space_used / iclog_space !=
219 (ctx->space_used + len) / iclog_space)) {
220 int hdrs;
221
222 hdrs = (len + iclog_space - 1) / iclog_space;
223 /* need to take into account split region headers, too */
224 hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
225 ctx->ticket->t_unit_res += hdrs;
226 ctx->ticket->t_curr_res += hdrs;
227 ticket->t_curr_res -= hdrs;
228 ASSERT(ticket->t_curr_res >= len);
229 }
230 ticket->t_curr_res -= len;
231 ctx->space_used += len;
232
233 spin_unlock(&cil->xc_cil_lock);
234}
235
236/*
237 * Format log item into a flat buffers
238 *
239 * For delayed logging, we need to hold a formatted buffer containing all the
240 * changes on the log item. This enables us to relog the item in memory and
241 * write it out asynchronously without needing to relock the object that was
242 * modified at the time it gets written into the iclog.
243 *
244 * This function builds a vector for the changes in each log item in the
245 * transaction. It then works out the length of the buffer needed for each log
246 * item, allocates them and formats the vector for the item into the buffer.
247 * The buffer is then attached to the log item are then inserted into the
248 * Committed Item List for tracking until the next checkpoint is written out.
249 *
250 * We don't set up region headers during this process; we simply copy the
251 * regions into the flat buffer. We can do this because we still have to do a
252 * formatting step to write the regions into the iclog buffer. Writing the
253 * ophdrs during the iclog write means that we can support splitting large
254 * regions across iclog boundares without needing a change in the format of the
255 * item/region encapsulation.
256 *
257 * Hence what we need to do now is change the rewrite the vector array to point
258 * to the copied region inside the buffer we just allocated. This allows us to
259 * format the regions into the iclog as though they are being formatted
260 * directly out of the objects themselves.
261 */
262static void
263xlog_cil_format_items(
264 struct log *log,
265 struct xfs_log_vec *log_vector,
266 struct xlog_ticket *ticket,
267 xfs_lsn_t *start_lsn)
268{
269 struct xfs_log_vec *lv;
270
271 if (start_lsn)
272 *start_lsn = log->l_cilp->xc_ctx->sequence;
273
274 ASSERT(log_vector);
275 for (lv = log_vector; lv; lv = lv->lv_next) {
276 void *ptr;
277 int index;
278 int len = 0;
279
280 /* build the vector array and calculate it's length */
281 IOP_FORMAT(lv->lv_item, lv->lv_iovecp);
282 for (index = 0; index < lv->lv_niovecs; index++)
283 len += lv->lv_iovecp[index].i_len;
284
285 lv->lv_buf_len = len;
286 lv->lv_buf = kmem_zalloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS);
287 ptr = lv->lv_buf;
288
289 for (index = 0; index < lv->lv_niovecs; index++) {
290 struct xfs_log_iovec *vec = &lv->lv_iovecp[index];
291
292 memcpy(ptr, vec->i_addr, vec->i_len);
293 vec->i_addr = ptr;
294 ptr += vec->i_len;
295 }
296 ASSERT(ptr == lv->lv_buf + lv->lv_buf_len);
297
298 xlog_cil_insert(log, ticket, lv->lv_item, lv);
299 }
300}
301
302static void
303xlog_cil_free_logvec(
304 struct xfs_log_vec *log_vector)
305{
306 struct xfs_log_vec *lv;
307
308 for (lv = log_vector; lv; ) {
309 struct xfs_log_vec *next = lv->lv_next;
310 kmem_free(lv->lv_buf);
311 kmem_free(lv);
312 lv = next;
313 }
314}
315
316/*
317 * Commit a transaction with the given vector to the Committed Item List.
318 *
319 * To do this, we need to format the item, pin it in memory if required and
320 * account for the space used by the transaction. Once we have done that we
321 * need to release the unused reservation for the transaction, attach the
322 * transaction to the checkpoint context so we carry the busy extents through
323 * to checkpoint completion, and then unlock all the items in the transaction.
324 *
325 * For more specific information about the order of operations in
326 * xfs_log_commit_cil() please refer to the comments in
327 * xfs_trans_commit_iclog().
328 */
329int
330xfs_log_commit_cil(
331 struct xfs_mount *mp,
332 struct xfs_trans *tp,
333 struct xfs_log_vec *log_vector,
334 xfs_lsn_t *commit_lsn,
335 int flags)
336{
337 struct log *log = mp->m_log;
338 int log_flags = 0;
339
340 if (flags & XFS_TRANS_RELEASE_LOG_RES)
341 log_flags = XFS_LOG_REL_PERM_RESERV;
342
343 if (XLOG_FORCED_SHUTDOWN(log)) {
344 xlog_cil_free_logvec(log_vector);
345 return XFS_ERROR(EIO);
346 }
347
348 /* lock out background commit */
349 down_read(&log->l_cilp->xc_ctx_lock);
350 xlog_cil_format_items(log, log_vector, tp->t_ticket, commit_lsn);
351
352 /* check we didn't blow the reservation */
353 if (tp->t_ticket->t_curr_res < 0)
354 xlog_print_tic_res(log->l_mp, tp->t_ticket);
355
356 /* attach the transaction to the CIL if it has any busy extents */
357 if (!list_empty(&tp->t_busy)) {
358 spin_lock(&log->l_cilp->xc_cil_lock);
359 list_splice_init(&tp->t_busy,
360 &log->l_cilp->xc_ctx->busy_extents);
361 spin_unlock(&log->l_cilp->xc_cil_lock);
362 }
363
364 tp->t_commit_lsn = *commit_lsn;
365 xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
366 xfs_trans_unreserve_and_mod_sb(tp);
367
368 /* background commit is allowed again */
369 up_read(&log->l_cilp->xc_ctx_lock);
370 return 0;
371}
372
373/*
374 * Mark all items committed and clear busy extents. We free the log vector
375 * chains in a separate pass so that we unpin the log items as quickly as
376 * possible.
377 */
378static void
379xlog_cil_committed(
380 void *args,
381 int abort)
382{
383 struct xfs_cil_ctx *ctx = args;
384 struct xfs_log_vec *lv;
385 int abortflag = abort ? XFS_LI_ABORTED : 0;
386 struct xfs_busy_extent *busyp, *n;
387
388 /* unpin all the log items */
389 for (lv = ctx->lv_chain; lv; lv = lv->lv_next ) {
390 xfs_trans_item_committed(lv->lv_item, ctx->start_lsn,
391 abortflag);
392 }
393
394 list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list)
395 xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp);
396
397 spin_lock(&ctx->cil->xc_cil_lock);
398 list_del(&ctx->committing);
399 spin_unlock(&ctx->cil->xc_cil_lock);
400
401 xlog_cil_free_logvec(ctx->lv_chain);
402 kmem_free(ctx);
403}
404
405/*
406 * Push the Committed Item List to the log. If the push_now flag is not set,
407 * then it is a background flush and so we can chose to ignore it.
408 */
409int
410xlog_cil_push(
411 struct log *log,
412 int push_now)
413{
414 struct xfs_cil *cil = log->l_cilp;
415 struct xfs_log_vec *lv;
416 struct xfs_cil_ctx *ctx;
417 struct xfs_cil_ctx *new_ctx;
418 struct xlog_in_core *commit_iclog;
419 struct xlog_ticket *tic;
420 int num_lv;
421 int num_iovecs;
422 int len;
423 int error = 0;
424 struct xfs_trans_header thdr;
425 struct xfs_log_iovec lhdr;
426 struct xfs_log_vec lvhdr = { NULL };
427 xfs_lsn_t commit_lsn;
428
429 if (!cil)
430 return 0;
431
432 /* XXX: don't sleep for background? */
433 new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS);
434 new_ctx->ticket = xlog_cil_ticket_alloc(log);
435
436 /* lock out transaction commit */
437 down_write(&cil->xc_ctx_lock);
438 ctx = cil->xc_ctx;
439
440 /* check if we've anything to push */
441 if (list_empty(&cil->xc_cil))
442 goto out_skip;
443
444 /*
445 * pull all the log vectors off the items in the CIL, and
446 * remove the items from the CIL. We don't need the CIL lock
447 * here because it's only needed on the transaction commit
448 * side which is currently locked out by the flush lock.
449 */
450 lv = NULL;
451 num_lv = 0;
452 num_iovecs = 0;
453 len = 0;
454 while (!list_empty(&cil->xc_cil)) {
455 struct xfs_log_item *item;
456 int i;
457
458 item = list_first_entry(&cil->xc_cil,
459 struct xfs_log_item, li_cil);
460 list_del_init(&item->li_cil);
461 if (!ctx->lv_chain)
462 ctx->lv_chain = item->li_lv;
463 else
464 lv->lv_next = item->li_lv;
465 lv = item->li_lv;
466 item->li_lv = NULL;
467
468 num_lv++;
469 num_iovecs += lv->lv_niovecs;
470 for (i = 0; i < lv->lv_niovecs; i++)
471 len += lv->lv_iovecp[i].i_len;
472 }
473
474 /*
475 * initialise the new context and attach it to the CIL. Then attach
476 * the current context to the CIL committing lsit so it can be found
477 * during log forces to extract the commit lsn of the sequence that
478 * needs to be forced.
479 */
480 INIT_LIST_HEAD(&new_ctx->committing);
481 INIT_LIST_HEAD(&new_ctx->busy_extents);
482 new_ctx->sequence = ctx->sequence + 1;
483 new_ctx->cil = cil;
484 cil->xc_ctx = new_ctx;
485
486 /*
487 * The switch is now done, so we can drop the context lock and move out
488 * of a shared context. We can't just go straight to the commit record,
489 * though - we need to synchronise with previous and future commits so
490 * that the commit records are correctly ordered in the log to ensure
491 * that we process items during log IO completion in the correct order.
492 *
493 * For example, if we get an EFI in one checkpoint and the EFD in the
494 * next (e.g. due to log forces), we do not want the checkpoint with
495 * the EFD to be committed before the checkpoint with the EFI. Hence
496 * we must strictly order the commit records of the checkpoints so
497 * that: a) the checkpoint callbacks are attached to the iclogs in the
498 * correct order; and b) the checkpoints are replayed in correct order
499 * in log recovery.
500 *
501 * Hence we need to add this context to the committing context list so
502 * that higher sequences will wait for us to write out a commit record
503 * before they do.
504 */
505 spin_lock(&cil->xc_cil_lock);
506 list_add(&ctx->committing, &cil->xc_committing);
507 spin_unlock(&cil->xc_cil_lock);
508 up_write(&cil->xc_ctx_lock);
509
510 /*
511 * Build a checkpoint transaction header and write it to the log to
512 * begin the transaction. We need to account for the space used by the
513 * transaction header here as it is not accounted for in xlog_write().
514 *
515 * The LSN we need to pass to the log items on transaction commit is
516 * the LSN reported by the first log vector write. If we use the commit
517 * record lsn then we can move the tail beyond the grant write head.
518 */
519 tic = ctx->ticket;
520 thdr.th_magic = XFS_TRANS_HEADER_MAGIC;
521 thdr.th_type = XFS_TRANS_CHECKPOINT;
522 thdr.th_tid = tic->t_tid;
523 thdr.th_num_items = num_iovecs;
524 lhdr.i_addr = (xfs_caddr_t)&thdr;
525 lhdr.i_len = sizeof(xfs_trans_header_t);
526 lhdr.i_type = XLOG_REG_TYPE_TRANSHDR;
527 tic->t_curr_res -= lhdr.i_len + sizeof(xlog_op_header_t);
528
529 lvhdr.lv_niovecs = 1;
530 lvhdr.lv_iovecp = &lhdr;
531 lvhdr.lv_next = ctx->lv_chain;
532
533 error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0);
534 if (error)
535 goto out_abort;
536
537 /*
538 * now that we've written the checkpoint into the log, strictly
539 * order the commit records so replay will get them in the right order.
540 */
541restart:
542 spin_lock(&cil->xc_cil_lock);
543 list_for_each_entry(new_ctx, &cil->xc_committing, committing) {
544 /*
545 * Higher sequences will wait for this one so skip them.
546 * Don't wait for own own sequence, either.
547 */
548 if (new_ctx->sequence >= ctx->sequence)
549 continue;
550 if (!new_ctx->commit_lsn) {
551 /*
552 * It is still being pushed! Wait for the push to
553 * complete, then start again from the beginning.
554 */
555 sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0);
556 goto restart;
557 }
558 }
559 spin_unlock(&cil->xc_cil_lock);
560
561 commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0);
562 if (error || commit_lsn == -1)
563 goto out_abort;
564
565 /* attach all the transactions w/ busy extents to iclog */
566 ctx->log_cb.cb_func = xlog_cil_committed;
567 ctx->log_cb.cb_arg = ctx;
568 error = xfs_log_notify(log->l_mp, commit_iclog, &ctx->log_cb);
569 if (error)
570 goto out_abort;
571
572 /*
573 * now the checkpoint commit is complete and we've attached the
574 * callbacks to the iclog we can assign the commit LSN to the context
575 * and wake up anyone who is waiting for the commit to complete.
576 */
577 spin_lock(&cil->xc_cil_lock);
578 ctx->commit_lsn = commit_lsn;
579 sv_broadcast(&cil->xc_commit_wait);
580 spin_unlock(&cil->xc_cil_lock);
581
582 /* release the hounds! */
583 return xfs_log_release_iclog(log->l_mp, commit_iclog);
584
585out_skip:
586 up_write(&cil->xc_ctx_lock);
587 xfs_log_ticket_put(new_ctx->ticket);
588 kmem_free(new_ctx);
589 return 0;
590
591out_abort:
592 xlog_cil_committed(ctx, XFS_LI_ABORTED);
593 return XFS_ERROR(EIO);
594}
595
596/*
597 * Conditionally push the CIL based on the sequence passed in.
598 *
599 * We only need to push if we haven't already pushed the sequence
600 * number given. Hence the only time we will trigger a push here is
601 * if the push sequence is the same as the current context.
602 *
603 * We return the current commit lsn to allow the callers to determine if a
604 * iclog flush is necessary following this call.
605 *
606 * XXX: Initially, just push the CIL unconditionally and return whatever
607 * commit lsn is there. It'll be empty, so this is broken for now.
608 */
609xfs_lsn_t
610xlog_cil_push_lsn(
611 struct log *log,
612 xfs_lsn_t push_seq)
613{
614 struct xfs_cil *cil = log->l_cilp;
615 struct xfs_cil_ctx *ctx;
616 xfs_lsn_t commit_lsn = NULLCOMMITLSN;
617
618restart:
619 down_write(&cil->xc_ctx_lock);
620 ASSERT(push_seq <= cil->xc_ctx->sequence);
621
622 /* check to see if we need to force out the current context */
623 if (push_seq == cil->xc_ctx->sequence) {
624 up_write(&cil->xc_ctx_lock);
625 xlog_cil_push(log, 1);
626 goto restart;
627 }
628
629 /*
630 * See if we can find a previous sequence still committing.
631 * We can drop the flush lock as soon as we have the cil lock
632 * because we are now only comparing contexts protected by
633 * the cil lock.
634 *
635 * We need to wait for all previous sequence commits to complete
636 * before allowing the force of push_seq to go ahead. Hence block
637 * on commits for those as well.
638 */
639 spin_lock(&cil->xc_cil_lock);
640 up_write(&cil->xc_ctx_lock);
641 list_for_each_entry(ctx, &cil->xc_committing, committing) {
642 if (ctx->sequence > push_seq)
643 continue;
644 if (!ctx->commit_lsn) {
645 /*
646 * It is still being pushed! Wait for the push to
647 * complete, then start again from the beginning.
648 */
649 sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0);
650 goto restart;
651 }
652 if (ctx->sequence != push_seq)
653 continue;
654 /* found it! */
655 commit_lsn = ctx->commit_lsn;
656 }
657 spin_unlock(&cil->xc_cil_lock);
658 return commit_lsn;
659}