aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2010-09-24 04:14:13 -0400
committerAlex Elder <aelder@sgi.com>2010-10-18 16:07:42 -0400
commitd1583a3833290ab9f8b13a064acbb5e508c59f60 (patch)
tree8d349b284c80bc42e6e0ea5bb60824f723be617d /fs/xfs
parent9c169915ad374cd9efb1556943b2074ec07e1749 (diff)
xfs: reduce the number of CIL lock round trips during commit
When commiting a transaction, we do a lock CIL state lock round trip on every single log vector we insert into the CIL. This is resulting in the lock being as hot as the inode and dcache locks on 8-way create workloads. Rework the insertion loops to bring the number of lock round trips to one per transaction for log vectors, and one more do the busy extents. Also change the allocation of the log vector buffer not to zero it as we copy over the entire allocated buffer anyway. This patch also includes a structural cleanup to the CIL item insertion provided by Christoph Hellwig. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Alex Elder <aelder@sgi.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_log_cil.c232
1 files changed, 127 insertions, 105 deletions
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 7e206fc1fa36..23d6ceb5e97b 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -146,102 +146,6 @@ xlog_cil_init_post_recovery(
146} 146}
147 147
148/* 148/*
149 * Insert the log item into the CIL and calculate the difference in space
150 * consumed by the item. Add the space to the checkpoint ticket and calculate
151 * if the change requires additional log metadata. If it does, take that space
152 * as well. Remove the amount of space we addded to the checkpoint ticket from
153 * the current transaction ticket so that the accounting works out correctly.
154 *
155 * If this is the first time the item is being placed into the CIL in this
156 * context, pin it so it can't be written to disk until the CIL is flushed to
157 * the iclog and the iclog written to disk.
158 */
159static void
160xlog_cil_insert(
161 struct log *log,
162 struct xlog_ticket *ticket,
163 struct xfs_log_item *item,
164 struct xfs_log_vec *lv)
165{
166 struct xfs_cil *cil = log->l_cilp;
167 struct xfs_log_vec *old = lv->lv_item->li_lv;
168 struct xfs_cil_ctx *ctx = cil->xc_ctx;
169 int len;
170 int diff_iovecs;
171 int iclog_space;
172
173 if (old) {
174 /* existing lv on log item, space used is a delta */
175 ASSERT(!list_empty(&item->li_cil));
176 ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs);
177
178 len = lv->lv_buf_len - old->lv_buf_len;
179 diff_iovecs = lv->lv_niovecs - old->lv_niovecs;
180 kmem_free(old->lv_buf);
181 kmem_free(old);
182 } else {
183 /* new lv, must pin the log item */
184 ASSERT(!lv->lv_item->li_lv);
185 ASSERT(list_empty(&item->li_cil));
186
187 len = lv->lv_buf_len;
188 diff_iovecs = lv->lv_niovecs;
189 IOP_PIN(lv->lv_item);
190
191 }
192 len += diff_iovecs * sizeof(xlog_op_header_t);
193
194 /* attach new log vector to log item */
195 lv->lv_item->li_lv = lv;
196
197 spin_lock(&cil->xc_cil_lock);
198 list_move_tail(&item->li_cil, &cil->xc_cil);
199 ctx->nvecs += diff_iovecs;
200
201 /*
202 * If this is the first time the item is being committed to the CIL,
203 * store the sequence number on the log item so we can tell
204 * in future commits whether this is the first checkpoint the item is
205 * being committed into.
206 */
207 if (!item->li_seq)
208 item->li_seq = ctx->sequence;
209
210 /*
211 * Now transfer enough transaction reservation to the context ticket
212 * for the checkpoint. The context ticket is special - the unit
213 * reservation has to grow as well as the current reservation as we
214 * steal from tickets so we can correctly determine the space used
215 * during the transaction commit.
216 */
217 if (ctx->ticket->t_curr_res == 0) {
218 /* first commit in checkpoint, steal the header reservation */
219 ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len);
220 ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
221 ticket->t_curr_res -= ctx->ticket->t_unit_res;
222 }
223
224 /* do we need space for more log record headers? */
225 iclog_space = log->l_iclog_size - log->l_iclog_hsize;
226 if (len > 0 && (ctx->space_used / iclog_space !=
227 (ctx->space_used + len) / iclog_space)) {
228 int hdrs;
229
230 hdrs = (len + iclog_space - 1) / iclog_space;
231 /* need to take into account split region headers, too */
232 hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
233 ctx->ticket->t_unit_res += hdrs;
234 ctx->ticket->t_curr_res += hdrs;
235 ticket->t_curr_res -= hdrs;
236 ASSERT(ticket->t_curr_res >= len);
237 }
238 ticket->t_curr_res -= len;
239 ctx->space_used += len;
240
241 spin_unlock(&cil->xc_cil_lock);
242}
243
244/*
245 * Format log item into a flat buffers 149 * Format log item into a flat buffers
246 * 150 *
247 * For delayed logging, we need to hold a formatted buffer containing all the 151 * For delayed logging, we need to hold a formatted buffer containing all the
@@ -286,7 +190,7 @@ xlog_cil_format_items(
286 len += lv->lv_iovecp[index].i_len; 190 len += lv->lv_iovecp[index].i_len;
287 191
288 lv->lv_buf_len = len; 192 lv->lv_buf_len = len;
289 lv->lv_buf = kmem_zalloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS); 193 lv->lv_buf = kmem_alloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS);
290 ptr = lv->lv_buf; 194 ptr = lv->lv_buf;
291 195
292 for (index = 0; index < lv->lv_niovecs; index++) { 196 for (index = 0; index < lv->lv_niovecs; index++) {
@@ -300,21 +204,136 @@ xlog_cil_format_items(
300 } 204 }
301} 205}
302 206
207/*
208 * Prepare the log item for insertion into the CIL. Calculate the difference in
209 * log space and vectors it will consume, and if it is a new item pin it as
210 * well.
211 */
212STATIC void
213xfs_cil_prepare_item(
214 struct log *log,
215 struct xfs_log_vec *lv,
216 int *len,
217 int *diff_iovecs)
218{
219 struct xfs_log_vec *old = lv->lv_item->li_lv;
220
221 if (old) {
222 /* existing lv on log item, space used is a delta */
223 ASSERT(!list_empty(&lv->lv_item->li_cil));
224 ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs);
225
226 *len += lv->lv_buf_len - old->lv_buf_len;
227 *diff_iovecs += lv->lv_niovecs - old->lv_niovecs;
228 kmem_free(old->lv_buf);
229 kmem_free(old);
230 } else {
231 /* new lv, must pin the log item */
232 ASSERT(!lv->lv_item->li_lv);
233 ASSERT(list_empty(&lv->lv_item->li_cil));
234
235 *len += lv->lv_buf_len;
236 *diff_iovecs += lv->lv_niovecs;
237 IOP_PIN(lv->lv_item);
238
239 }
240
241 /* attach new log vector to log item */
242 lv->lv_item->li_lv = lv;
243
244 /*
245 * If this is the first time the item is being committed to the
246 * CIL, store the sequence number on the log item so we can
247 * tell in future commits whether this is the first checkpoint
248 * the item is being committed into.
249 */
250 if (!lv->lv_item->li_seq)
251 lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence;
252}
253
254/*
255 * Insert the log items into the CIL and calculate the difference in space
256 * consumed by the item. Add the space to the checkpoint ticket and calculate
257 * if the change requires additional log metadata. If it does, take that space
258 * as well. Remove the amount of space we addded to the checkpoint ticket from
259 * the current transaction ticket so that the accounting works out correctly.
260 */
303static void 261static void
304xlog_cil_insert_items( 262xlog_cil_insert_items(
305 struct log *log, 263 struct log *log,
306 struct xfs_log_vec *log_vector, 264 struct xfs_log_vec *log_vector,
307 struct xlog_ticket *ticket, 265 struct xlog_ticket *ticket)
308 xfs_lsn_t *start_lsn)
309{ 266{
310 struct xfs_log_vec *lv; 267 struct xfs_cil *cil = log->l_cilp;
311 268 struct xfs_cil_ctx *ctx = cil->xc_ctx;
312 if (start_lsn) 269 struct xfs_log_vec *lv;
313 *start_lsn = log->l_cilp->xc_ctx->sequence; 270 int len = 0;
271 int diff_iovecs = 0;
272 int iclog_space;
314 273
315 ASSERT(log_vector); 274 ASSERT(log_vector);
275
276 /*
277 * Do all the accounting aggregation and switching of log vectors
278 * around in a separate loop to the insertion of items into the CIL.
279 * Then we can do a separate loop to update the CIL within a single
280 * lock/unlock pair. This reduces the number of round trips on the CIL
281 * lock from O(nr_logvectors) to O(1) and greatly reduces the overall
282 * hold time for the transaction commit.
283 *
284 * If this is the first time the item is being placed into the CIL in
285 * this context, pin it so it can't be written to disk until the CIL is
286 * flushed to the iclog and the iclog written to disk.
287 *
288 * We can do this safely because the context can't checkpoint until we
289 * are done so it doesn't matter exactly how we update the CIL.
290 */
291 for (lv = log_vector; lv; lv = lv->lv_next)
292 xfs_cil_prepare_item(log, lv, &len, &diff_iovecs);
293
294 /* account for space used by new iovec headers */
295 len += diff_iovecs * sizeof(xlog_op_header_t);
296
297 spin_lock(&cil->xc_cil_lock);
298
299 /* move the items to the tail of the CIL */
316 for (lv = log_vector; lv; lv = lv->lv_next) 300 for (lv = log_vector; lv; lv = lv->lv_next)
317 xlog_cil_insert(log, ticket, lv->lv_item, lv); 301 list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil);
302
303 ctx->nvecs += diff_iovecs;
304
305 /*
306 * Now transfer enough transaction reservation to the context ticket
307 * for the checkpoint. The context ticket is special - the unit
308 * reservation has to grow as well as the current reservation as we
309 * steal from tickets so we can correctly determine the space used
310 * during the transaction commit.
311 */
312 if (ctx->ticket->t_curr_res == 0) {
313 /* first commit in checkpoint, steal the header reservation */
314 ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len);
315 ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
316 ticket->t_curr_res -= ctx->ticket->t_unit_res;
317 }
318
319 /* do we need space for more log record headers? */
320 iclog_space = log->l_iclog_size - log->l_iclog_hsize;
321 if (len > 0 && (ctx->space_used / iclog_space !=
322 (ctx->space_used + len) / iclog_space)) {
323 int hdrs;
324
325 hdrs = (len + iclog_space - 1) / iclog_space;
326 /* need to take into account split region headers, too */
327 hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
328 ctx->ticket->t_unit_res += hdrs;
329 ctx->ticket->t_curr_res += hdrs;
330 ticket->t_curr_res -= hdrs;
331 ASSERT(ticket->t_curr_res >= len);
332 }
333 ticket->t_curr_res -= len;
334 ctx->space_used += len;
335
336 spin_unlock(&cil->xc_cil_lock);
318} 337}
319 338
320static void 339static void
@@ -638,7 +657,10 @@ xfs_log_commit_cil(
638 657
639 /* lock out background commit */ 658 /* lock out background commit */
640 down_read(&log->l_cilp->xc_ctx_lock); 659 down_read(&log->l_cilp->xc_ctx_lock);
641 xlog_cil_insert_items(log, log_vector, tp->t_ticket, commit_lsn); 660 if (commit_lsn)
661 *commit_lsn = log->l_cilp->xc_ctx->sequence;
662
663 xlog_cil_insert_items(log, log_vector, tp->t_ticket);
642 664
643 /* check we didn't blow the reservation */ 665 /* check we didn't blow the reservation */
644 if (tp->t_ticket->t_curr_res < 0) 666 if (tp->t_ticket->t_curr_res < 0)