diff options
author | Dave Chinner <dchinner@redhat.com> | 2013-06-27 02:04:51 -0400 |
---|---|---|
committer | Ben Myers <bpm@sgi.com> | 2013-06-27 14:32:08 -0400 |
commit | fd63875cc4cd60b9e5c609c24d75eaaad3e6d1c4 (patch) | |
tree | ceef48cc85066703480e23326a46f726d22f893f /fs/xfs | |
parent | 1baaed8fa955ab0d23aab24477dae566ed6a105b (diff) |
xfs: Introduce ordered log vector support
And "ordered log vector" is a log vector that is used for
tracking a log item through the CIL and into the AIL as part of the
log checkpointing. These ordered log vectors are special in that
they are not written to to journal in any way, and are not accounted
to the checkpoint being written.
The reason for this behaviour is to allow operations to attach items
to transactions and have them follow the normal transactional
lifecycle without actually having to write them to the journal. This
allows logging of items that track high level logical changes and
writing them to the log, while the physical items being modified
pass through into the AIL and pin the tail of the log (and therefore
the logical item in the log) until all the modified items are
physically written to disk.
IOWs, it allows us to write metadata without physically logging
every individual change but still maintain the full transactional
integrity guarantees we currently have w.r.t. crash recovery.
This change modifies some of the CIL item insertion loops, as
ordered log vectors introduce some new constraints as they don't
track any data. One advantage of this change is that it combines
two log vector chain walks into a single pass, so there is less
overhead in the transaction commit pass as well. It also kills some
unused code in the log vector walk loop when committing the CIL.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/xfs_log.c | 22 | ||||
-rw-r--r-- | fs/xfs/xfs_log.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_log_cil.c | 75 |
3 files changed, 71 insertions, 28 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index b345a7c85153..d852a2b3e1fd 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -1963,6 +1963,10 @@ xlog_write_calc_vec_length( | |||
1963 | headers++; | 1963 | headers++; |
1964 | 1964 | ||
1965 | for (lv = log_vector; lv; lv = lv->lv_next) { | 1965 | for (lv = log_vector; lv; lv = lv->lv_next) { |
1966 | /* we don't write ordered log vectors */ | ||
1967 | if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) | ||
1968 | continue; | ||
1969 | |||
1966 | headers += lv->lv_niovecs; | 1970 | headers += lv->lv_niovecs; |
1967 | 1971 | ||
1968 | for (i = 0; i < lv->lv_niovecs; i++) { | 1972 | for (i = 0; i < lv->lv_niovecs; i++) { |
@@ -2216,7 +2220,7 @@ xlog_write( | |||
2216 | index = 0; | 2220 | index = 0; |
2217 | lv = log_vector; | 2221 | lv = log_vector; |
2218 | vecp = lv->lv_iovecp; | 2222 | vecp = lv->lv_iovecp; |
2219 | while (lv && index < lv->lv_niovecs) { | 2223 | while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) { |
2220 | void *ptr; | 2224 | void *ptr; |
2221 | int log_offset; | 2225 | int log_offset; |
2222 | 2226 | ||
@@ -2236,13 +2240,22 @@ xlog_write( | |||
2236 | * This loop writes out as many regions as can fit in the amount | 2240 | * This loop writes out as many regions as can fit in the amount |
2237 | * of space which was allocated by xlog_state_get_iclog_space(). | 2241 | * of space which was allocated by xlog_state_get_iclog_space(). |
2238 | */ | 2242 | */ |
2239 | while (lv && index < lv->lv_niovecs) { | 2243 | while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) { |
2240 | struct xfs_log_iovec *reg = &vecp[index]; | 2244 | struct xfs_log_iovec *reg; |
2241 | struct xlog_op_header *ophdr; | 2245 | struct xlog_op_header *ophdr; |
2242 | int start_rec_copy; | 2246 | int start_rec_copy; |
2243 | int copy_len; | 2247 | int copy_len; |
2244 | int copy_off; | 2248 | int copy_off; |
2249 | bool ordered = false; | ||
2250 | |||
2251 | /* ordered log vectors have no regions to write */ | ||
2252 | if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) { | ||
2253 | ASSERT(lv->lv_niovecs == 0); | ||
2254 | ordered = true; | ||
2255 | goto next_lv; | ||
2256 | } | ||
2245 | 2257 | ||
2258 | reg = &vecp[index]; | ||
2246 | ASSERT(reg->i_len % sizeof(__int32_t) == 0); | 2259 | ASSERT(reg->i_len % sizeof(__int32_t) == 0); |
2247 | ASSERT((unsigned long)ptr % sizeof(__int32_t) == 0); | 2260 | ASSERT((unsigned long)ptr % sizeof(__int32_t) == 0); |
2248 | 2261 | ||
@@ -2302,12 +2315,13 @@ xlog_write( | |||
2302 | break; | 2315 | break; |
2303 | 2316 | ||
2304 | if (++index == lv->lv_niovecs) { | 2317 | if (++index == lv->lv_niovecs) { |
2318 | next_lv: | ||
2305 | lv = lv->lv_next; | 2319 | lv = lv->lv_next; |
2306 | index = 0; | 2320 | index = 0; |
2307 | if (lv) | 2321 | if (lv) |
2308 | vecp = lv->lv_iovecp; | 2322 | vecp = lv->lv_iovecp; |
2309 | } | 2323 | } |
2310 | if (record_cnt == 0) { | 2324 | if (record_cnt == 0 && ordered == false) { |
2311 | if (!lv) | 2325 | if (!lv) |
2312 | return 0; | 2326 | return 0; |
2313 | break; | 2327 | break; |
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 5caee96059df..b20918c554aa 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h | |||
@@ -105,6 +105,8 @@ struct xfs_log_vec { | |||
105 | int lv_buf_len; /* size of formatted buffer */ | 105 | int lv_buf_len; /* size of formatted buffer */ |
106 | }; | 106 | }; |
107 | 107 | ||
108 | #define XFS_LOG_VEC_ORDERED (-1) | ||
109 | |||
108 | /* | 110 | /* |
109 | * Structure used to pass callback function and the function's argument | 111 | * Structure used to pass callback function and the function's argument |
110 | * to the log manager. | 112 | * to the log manager. |
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index d0833b54e55d..02b9cf3f8252 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
@@ -127,6 +127,7 @@ xlog_cil_prepare_log_vecs( | |||
127 | int index; | 127 | int index; |
128 | int len = 0; | 128 | int len = 0; |
129 | uint niovecs; | 129 | uint niovecs; |
130 | bool ordered = false; | ||
130 | 131 | ||
131 | /* Skip items which aren't dirty in this transaction. */ | 132 | /* Skip items which aren't dirty in this transaction. */ |
132 | if (!(lidp->lid_flags & XFS_LID_DIRTY)) | 133 | if (!(lidp->lid_flags & XFS_LID_DIRTY)) |
@@ -137,14 +138,30 @@ xlog_cil_prepare_log_vecs( | |||
137 | if (!niovecs) | 138 | if (!niovecs) |
138 | continue; | 139 | continue; |
139 | 140 | ||
141 | /* | ||
142 | * Ordered items need to be tracked but we do not wish to write | ||
143 | * them. We need a logvec to track the object, but we do not | ||
144 | * need an iovec or buffer to be allocated for copying data. | ||
145 | */ | ||
146 | if (niovecs == XFS_LOG_VEC_ORDERED) { | ||
147 | ordered = true; | ||
148 | niovecs = 0; | ||
149 | } | ||
150 | |||
140 | new_lv = kmem_zalloc(sizeof(*new_lv) + | 151 | new_lv = kmem_zalloc(sizeof(*new_lv) + |
141 | niovecs * sizeof(struct xfs_log_iovec), | 152 | niovecs * sizeof(struct xfs_log_iovec), |
142 | KM_SLEEP|KM_NOFS); | 153 | KM_SLEEP|KM_NOFS); |
143 | 154 | ||
155 | new_lv->lv_item = lidp->lid_item; | ||
156 | new_lv->lv_niovecs = niovecs; | ||
157 | if (ordered) { | ||
158 | /* track as an ordered logvec */ | ||
159 | new_lv->lv_buf_len = XFS_LOG_VEC_ORDERED; | ||
160 | goto next; | ||
161 | } | ||
162 | |||
144 | /* The allocated iovec region lies beyond the log vector. */ | 163 | /* The allocated iovec region lies beyond the log vector. */ |
145 | new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1]; | 164 | new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1]; |
146 | new_lv->lv_niovecs = niovecs; | ||
147 | new_lv->lv_item = lidp->lid_item; | ||
148 | 165 | ||
149 | /* build the vector array and calculate it's length */ | 166 | /* build the vector array and calculate it's length */ |
150 | IOP_FORMAT(new_lv->lv_item, new_lv->lv_iovecp); | 167 | IOP_FORMAT(new_lv->lv_item, new_lv->lv_iovecp); |
@@ -165,6 +182,7 @@ xlog_cil_prepare_log_vecs( | |||
165 | } | 182 | } |
166 | ASSERT(ptr == new_lv->lv_buf + new_lv->lv_buf_len); | 183 | ASSERT(ptr == new_lv->lv_buf + new_lv->lv_buf_len); |
167 | 184 | ||
185 | next: | ||
168 | if (!ret_lv) | 186 | if (!ret_lv) |
169 | ret_lv = new_lv; | 187 | ret_lv = new_lv; |
170 | else | 188 | else |
@@ -191,8 +209,18 @@ xfs_cil_prepare_item( | |||
191 | 209 | ||
192 | if (old) { | 210 | if (old) { |
193 | /* existing lv on log item, space used is a delta */ | 211 | /* existing lv on log item, space used is a delta */ |
194 | ASSERT(!list_empty(&lv->lv_item->li_cil)); | 212 | ASSERT((old->lv_buf && old->lv_buf_len && old->lv_niovecs) || |
195 | ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs); | 213 | old->lv_buf_len == XFS_LOG_VEC_ORDERED); |
214 | |||
215 | /* | ||
216 | * If the new item is ordered, keep the old one that is already | ||
217 | * tracking dirty or ordered regions | ||
218 | */ | ||
219 | if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) { | ||
220 | ASSERT(!lv->lv_buf); | ||
221 | kmem_free(lv); | ||
222 | return; | ||
223 | } | ||
196 | 224 | ||
197 | *len += lv->lv_buf_len - old->lv_buf_len; | 225 | *len += lv->lv_buf_len - old->lv_buf_len; |
198 | *diff_iovecs += lv->lv_niovecs - old->lv_niovecs; | 226 | *diff_iovecs += lv->lv_niovecs - old->lv_niovecs; |
@@ -201,10 +229,11 @@ xfs_cil_prepare_item( | |||
201 | } else { | 229 | } else { |
202 | /* new lv, must pin the log item */ | 230 | /* new lv, must pin the log item */ |
203 | ASSERT(!lv->lv_item->li_lv); | 231 | ASSERT(!lv->lv_item->li_lv); |
204 | ASSERT(list_empty(&lv->lv_item->li_cil)); | ||
205 | 232 | ||
206 | *len += lv->lv_buf_len; | 233 | if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) { |
207 | *diff_iovecs += lv->lv_niovecs; | 234 | *len += lv->lv_buf_len; |
235 | *diff_iovecs += lv->lv_niovecs; | ||
236 | } | ||
208 | IOP_PIN(lv->lv_item); | 237 | IOP_PIN(lv->lv_item); |
209 | 238 | ||
210 | } | 239 | } |
@@ -259,18 +288,24 @@ xlog_cil_insert_items( | |||
259 | * We can do this safely because the context can't checkpoint until we | 288 | * We can do this safely because the context can't checkpoint until we |
260 | * are done so it doesn't matter exactly how we update the CIL. | 289 | * are done so it doesn't matter exactly how we update the CIL. |
261 | */ | 290 | */ |
262 | for (lv = log_vector; lv; lv = lv->lv_next) | ||
263 | xfs_cil_prepare_item(log, lv, &len, &diff_iovecs); | ||
264 | |||
265 | /* account for space used by new iovec headers */ | ||
266 | len += diff_iovecs * sizeof(xlog_op_header_t); | ||
267 | |||
268 | spin_lock(&cil->xc_cil_lock); | 291 | spin_lock(&cil->xc_cil_lock); |
292 | for (lv = log_vector; lv; ) { | ||
293 | struct xfs_log_vec *next = lv->lv_next; | ||
269 | 294 | ||
270 | /* move the items to the tail of the CIL */ | 295 | ASSERT(lv->lv_item->li_lv || list_empty(&lv->lv_item->li_cil)); |
271 | for (lv = log_vector; lv; lv = lv->lv_next) | 296 | lv->lv_next = NULL; |
297 | |||
298 | /* | ||
299 | * xfs_cil_prepare_item() may free the lv, so move the item on | ||
300 | * the CIL first. | ||
301 | */ | ||
272 | list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil); | 302 | list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil); |
303 | xfs_cil_prepare_item(log, lv, &len, &diff_iovecs); | ||
304 | lv = next; | ||
305 | } | ||
273 | 306 | ||
307 | /* account for space used by new iovec headers */ | ||
308 | len += diff_iovecs * sizeof(xlog_op_header_t); | ||
274 | ctx->nvecs += diff_iovecs; | 309 | ctx->nvecs += diff_iovecs; |
275 | 310 | ||
276 | /* | 311 | /* |
@@ -381,9 +416,7 @@ xlog_cil_push( | |||
381 | struct xfs_cil_ctx *new_ctx; | 416 | struct xfs_cil_ctx *new_ctx; |
382 | struct xlog_in_core *commit_iclog; | 417 | struct xlog_in_core *commit_iclog; |
383 | struct xlog_ticket *tic; | 418 | struct xlog_ticket *tic; |
384 | int num_lv; | ||
385 | int num_iovecs; | 419 | int num_iovecs; |
386 | int len; | ||
387 | int error = 0; | 420 | int error = 0; |
388 | struct xfs_trans_header thdr; | 421 | struct xfs_trans_header thdr; |
389 | struct xfs_log_iovec lhdr; | 422 | struct xfs_log_iovec lhdr; |
@@ -428,12 +461,9 @@ xlog_cil_push( | |||
428 | * side which is currently locked out by the flush lock. | 461 | * side which is currently locked out by the flush lock. |
429 | */ | 462 | */ |
430 | lv = NULL; | 463 | lv = NULL; |
431 | num_lv = 0; | ||
432 | num_iovecs = 0; | 464 | num_iovecs = 0; |
433 | len = 0; | ||
434 | while (!list_empty(&cil->xc_cil)) { | 465 | while (!list_empty(&cil->xc_cil)) { |
435 | struct xfs_log_item *item; | 466 | struct xfs_log_item *item; |
436 | int i; | ||
437 | 467 | ||
438 | item = list_first_entry(&cil->xc_cil, | 468 | item = list_first_entry(&cil->xc_cil, |
439 | struct xfs_log_item, li_cil); | 469 | struct xfs_log_item, li_cil); |
@@ -444,11 +474,7 @@ xlog_cil_push( | |||
444 | lv->lv_next = item->li_lv; | 474 | lv->lv_next = item->li_lv; |
445 | lv = item->li_lv; | 475 | lv = item->li_lv; |
446 | item->li_lv = NULL; | 476 | item->li_lv = NULL; |
447 | |||
448 | num_lv++; | ||
449 | num_iovecs += lv->lv_niovecs; | 477 | num_iovecs += lv->lv_niovecs; |
450 | for (i = 0; i < lv->lv_niovecs; i++) | ||
451 | len += lv->lv_iovecp[i].i_len; | ||
452 | } | 478 | } |
453 | 479 | ||
454 | /* | 480 | /* |
@@ -701,6 +727,7 @@ xfs_log_commit_cil( | |||
701 | if (commit_lsn) | 727 | if (commit_lsn) |
702 | *commit_lsn = log->l_cilp->xc_ctx->sequence; | 728 | *commit_lsn = log->l_cilp->xc_ctx->sequence; |
703 | 729 | ||
730 | /* xlog_cil_insert_items() destroys log_vector list */ | ||
704 | xlog_cil_insert_items(log, log_vector, tp->t_ticket); | 731 | xlog_cil_insert_items(log, log_vector, tp->t_ticket); |
705 | 732 | ||
706 | /* check we didn't blow the reservation */ | 733 | /* check we didn't blow the reservation */ |