aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c10
-rw-r--r--fs/xfs/xfs_log.c64
-rw-r--r--fs/xfs/xfs_log.h9
-rw-r--r--fs/xfs/xfs_log_cil.c659
-rw-r--r--fs/xfs/xfs_log_priv.h71
-rw-r--r--fs/xfs/xfs_mount.h1
-rw-r--r--fs/xfs/xfs_trans.c103
-rw-r--r--fs/xfs/xfs_trans.h8
-rw-r--r--fs/xfs/xfs_trans_item.c5
-rw-r--r--fs/xfs/xfs_trans_priv.h11
11 files changed, 912 insertions, 30 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index b4769e40e8bc..c8fb13f83b3f 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -77,6 +77,7 @@ xfs-y += xfs_alloc.o \
77 xfs_itable.o \ 77 xfs_itable.o \
78 xfs_dfrag.o \ 78 xfs_dfrag.o \
79 xfs_log.o \ 79 xfs_log.o \
80 xfs_log_cil.o \
80 xfs_log_recover.o \ 81 xfs_log_recover.o \
81 xfs_mount.o \ 82 xfs_mount.o \
82 xfs_mru_cache.o \ 83 xfs_mru_cache.o \
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index a8ea03afe2e3..775de2b5727c 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -119,6 +119,8 @@ mempool_t *xfs_ioend_pool;
119#define MNTOPT_DMAPI "dmapi" /* DMI enabled (DMAPI / XDSM) */ 119#define MNTOPT_DMAPI "dmapi" /* DMI enabled (DMAPI / XDSM) */
120#define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */ 120#define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */
121#define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */ 121#define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */
122#define MNTOPT_DELAYLOG "delaylog" /* Delayed loging enabled */
123#define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed loging disabled */
122 124
123/* 125/*
124 * Table driven mount option parser. 126 * Table driven mount option parser.
@@ -374,6 +376,13 @@ xfs_parseargs(
374 mp->m_flags |= XFS_MOUNT_DMAPI; 376 mp->m_flags |= XFS_MOUNT_DMAPI;
375 } else if (!strcmp(this_char, MNTOPT_DMI)) { 377 } else if (!strcmp(this_char, MNTOPT_DMI)) {
376 mp->m_flags |= XFS_MOUNT_DMAPI; 378 mp->m_flags |= XFS_MOUNT_DMAPI;
379 } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
380 mp->m_flags |= XFS_MOUNT_DELAYLOG;
381 cmn_err(CE_WARN,
382 "Enabling EXPERIMENTAL delayed logging feature "
383 "- use at your own risk.\n");
384 } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
385 mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
377 } else if (!strcmp(this_char, "ihashsize")) { 386 } else if (!strcmp(this_char, "ihashsize")) {
378 cmn_err(CE_WARN, 387 cmn_err(CE_WARN,
379 "XFS: ihashsize no longer used, option is deprecated."); 388 "XFS: ihashsize no longer used, option is deprecated.");
@@ -535,6 +544,7 @@ xfs_showargs(
535 { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, 544 { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM },
536 { XFS_MOUNT_DMAPI, "," MNTOPT_DMAPI }, 545 { XFS_MOUNT_DMAPI, "," MNTOPT_DMAPI },
537 { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, 546 { XFS_MOUNT_GRPID, "," MNTOPT_GRPID },
547 { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG },
538 { 0, NULL } 548 { 0, NULL }
539 }; 549 };
540 static struct proc_xfs_info xfs_info_unset[] = { 550 static struct proc_xfs_info xfs_info_unset[] = {
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 19d0c5f73e24..027ebfe20677 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -54,9 +54,6 @@ STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp,
54STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes); 54STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes);
55STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); 55STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
56STATIC void xlog_dealloc_log(xlog_t *log); 56STATIC void xlog_dealloc_log(xlog_t *log);
57STATIC int xlog_write(struct log *log, struct xfs_log_vec *log_vector,
58 struct xlog_ticket *tic, xfs_lsn_t *start_lsn,
59 xlog_in_core_t **commit_iclog, uint flags);
60 57
61/* local state machine functions */ 58/* local state machine functions */
62STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int); 59STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int);
@@ -86,12 +83,6 @@ STATIC int xlog_regrant_write_log_space(xlog_t *log,
86STATIC void xlog_ungrant_log_space(xlog_t *log, 83STATIC void xlog_ungrant_log_space(xlog_t *log,
87 xlog_ticket_t *ticket); 84 xlog_ticket_t *ticket);
88 85
89
90/* local ticket functions */
91STATIC xlog_ticket_t *xlog_ticket_alloc(xlog_t *log, int unit_bytes, int count,
92 char clientid, uint flags,
93 int alloc_flags);
94
95#if defined(DEBUG) 86#if defined(DEBUG)
96STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr); 87STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr);
97STATIC void xlog_verify_grant_head(xlog_t *log, int equals); 88STATIC void xlog_verify_grant_head(xlog_t *log, int equals);
@@ -460,6 +451,13 @@ xfs_log_mount(
460 /* Normal transactions can now occur */ 451 /* Normal transactions can now occur */
461 mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY; 452 mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY;
462 453
454 /*
455 * Now the log has been fully initialised and we know were our
456 * space grant counters are, we can initialise the permanent ticket
457 * needed for delayed logging to work.
458 */
459 xlog_cil_init_post_recovery(mp->m_log);
460
463 return 0; 461 return 0;
464 462
465out_destroy_ail: 463out_destroy_ail:
@@ -666,6 +664,10 @@ xfs_log_item_init(
666 item->li_ailp = mp->m_ail; 664 item->li_ailp = mp->m_ail;
667 item->li_type = type; 665 item->li_type = type;
668 item->li_ops = ops; 666 item->li_ops = ops;
667 item->li_lv = NULL;
668
669 INIT_LIST_HEAD(&item->li_ail);
670 INIT_LIST_HEAD(&item->li_cil);
669} 671}
670 672
671/* 673/*
@@ -1176,6 +1178,9 @@ xlog_alloc_log(xfs_mount_t *mp,
1176 *iclogp = log->l_iclog; /* complete ring */ 1178 *iclogp = log->l_iclog; /* complete ring */
1177 log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */ 1179 log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */
1178 1180
1181 error = xlog_cil_init(log);
1182 if (error)
1183 goto out_free_iclog;
1179 return log; 1184 return log;
1180 1185
1181out_free_iclog: 1186out_free_iclog:
@@ -1502,6 +1507,8 @@ xlog_dealloc_log(xlog_t *log)
1502 xlog_in_core_t *iclog, *next_iclog; 1507 xlog_in_core_t *iclog, *next_iclog;
1503 int i; 1508 int i;
1504 1509
1510 xlog_cil_destroy(log);
1511
1505 iclog = log->l_iclog; 1512 iclog = log->l_iclog;
1506 for (i=0; i<log->l_iclog_bufs; i++) { 1513 for (i=0; i<log->l_iclog_bufs; i++) {
1507 sv_destroy(&iclog->ic_force_wait); 1514 sv_destroy(&iclog->ic_force_wait);
@@ -1544,8 +1551,10 @@ xlog_state_finish_copy(xlog_t *log,
1544 * print out info relating to regions written which consume 1551 * print out info relating to regions written which consume
1545 * the reservation 1552 * the reservation
1546 */ 1553 */
1547STATIC void 1554void
1548xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket) 1555xlog_print_tic_res(
1556 struct xfs_mount *mp,
1557 struct xlog_ticket *ticket)
1549{ 1558{
1550 uint i; 1559 uint i;
1551 uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t); 1560 uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t);
@@ -1877,7 +1886,7 @@ xlog_write_copy_finish(
1877 * we don't update ic_offset until the end when we know exactly how many 1886 * we don't update ic_offset until the end when we know exactly how many
1878 * bytes have been written out. 1887 * bytes have been written out.
1879 */ 1888 */
1880STATIC int 1889int
1881xlog_write( 1890xlog_write(
1882 struct log *log, 1891 struct log *log,
1883 struct xfs_log_vec *log_vector, 1892 struct xfs_log_vec *log_vector,
@@ -1901,9 +1910,26 @@ xlog_write(
1901 *start_lsn = 0; 1910 *start_lsn = 0;
1902 1911
1903 len = xlog_write_calc_vec_length(ticket, log_vector); 1912 len = xlog_write_calc_vec_length(ticket, log_vector);
1904 if (ticket->t_curr_res < len) 1913 if (log->l_cilp) {
1914 /*
1915 * Region headers and bytes are already accounted for.
1916 * We only need to take into account start records and
1917 * split regions in this function.
1918 */
1919 if (ticket->t_flags & XLOG_TIC_INITED)
1920 ticket->t_curr_res -= sizeof(xlog_op_header_t);
1921
1922 /*
1923 * Commit record headers need to be accounted for. These
1924 * come in as separate writes so are easy to detect.
1925 */
1926 if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
1927 ticket->t_curr_res -= sizeof(xlog_op_header_t);
1928 } else
1929 ticket->t_curr_res -= len;
1930
1931 if (ticket->t_curr_res < 0)
1905 xlog_print_tic_res(log->l_mp, ticket); 1932 xlog_print_tic_res(log->l_mp, ticket);
1906 ticket->t_curr_res -= len;
1907 1933
1908 index = 0; 1934 index = 0;
1909 lv = log_vector; 1935 lv = log_vector;
@@ -2999,6 +3025,8 @@ _xfs_log_force(
2999 3025
3000 XFS_STATS_INC(xs_log_force); 3026 XFS_STATS_INC(xs_log_force);
3001 3027
3028 xlog_cil_push(log, 1);
3029
3002 spin_lock(&log->l_icloglock); 3030 spin_lock(&log->l_icloglock);
3003 3031
3004 iclog = log->l_iclog; 3032 iclog = log->l_iclog;
@@ -3148,6 +3176,12 @@ _xfs_log_force_lsn(
3148 3176
3149 XFS_STATS_INC(xs_log_force); 3177 XFS_STATS_INC(xs_log_force);
3150 3178
3179 if (log->l_cilp) {
3180 lsn = xlog_cil_push_lsn(log, lsn);
3181 if (lsn == NULLCOMMITLSN)
3182 return 0;
3183 }
3184
3151try_again: 3185try_again:
3152 spin_lock(&log->l_icloglock); 3186 spin_lock(&log->l_icloglock);
3153 iclog = log->l_iclog; 3187 iclog = log->l_iclog;
@@ -3322,7 +3356,7 @@ xfs_log_get_trans_ident(
3322/* 3356/*
3323 * Allocate and initialise a new log ticket. 3357 * Allocate and initialise a new log ticket.
3324 */ 3358 */
3325STATIC xlog_ticket_t * 3359xlog_ticket_t *
3326xlog_ticket_alloc( 3360xlog_ticket_alloc(
3327 struct log *log, 3361 struct log *log,
3328 int unit_bytes, 3362 int unit_bytes,
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 05f205aac913..4a0c57432e8f 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -113,6 +113,9 @@ struct xfs_log_vec {
113 struct xfs_log_vec *lv_next; /* next lv in build list */ 113 struct xfs_log_vec *lv_next; /* next lv in build list */
114 int lv_niovecs; /* number of iovecs in lv */ 114 int lv_niovecs; /* number of iovecs in lv */
115 struct xfs_log_iovec *lv_iovecp; /* iovec array */ 115 struct xfs_log_iovec *lv_iovecp; /* iovec array */
116 struct xfs_log_item *lv_item; /* owner */
117 char *lv_buf; /* formatted buffer */
118 int lv_buf_len; /* size of formatted buffer */
116}; 119};
117 120
118/* 121/*
@@ -187,11 +190,15 @@ int xfs_log_need_covered(struct xfs_mount *mp);
187 190
188void xlog_iodone(struct xfs_buf *); 191void xlog_iodone(struct xfs_buf *);
189 192
190struct xlog_ticket * xfs_log_ticket_get(struct xlog_ticket *ticket); 193struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
191void xfs_log_ticket_put(struct xlog_ticket *ticket); 194void xfs_log_ticket_put(struct xlog_ticket *ticket);
192 195
193xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp); 196xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp);
194 197
198int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
199 struct xfs_log_vec *log_vector,
200 xfs_lsn_t *commit_lsn, int flags);
201
195#endif 202#endif
196 203
197 204
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
new file mode 100644
index 000000000000..53abd6b0a333
--- /dev/null
+++ b/fs/xfs/xfs_log_cil.c
@@ -0,0 +1,659 @@
1/*
2 * Copyright (c) 2010 Red Hat, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write the Free Software Foundation,
15 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16 */
17
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_bit.h"
22#include "xfs_log.h"
23#include "xfs_inum.h"
24#include "xfs_trans.h"
25#include "xfs_trans_priv.h"
26#include "xfs_log_priv.h"
27#include "xfs_sb.h"
28#include "xfs_ag.h"
29#include "xfs_dir2.h"
30#include "xfs_dmapi.h"
31#include "xfs_mount.h"
32#include "xfs_error.h"
33#include "xfs_alloc.h"
34
35/*
36 * Perform initial CIL structure initialisation. If the CIL is not
37 * enabled in this filesystem, ensure the log->l_cilp is null so
38 * we can check this conditional to determine if we are doing delayed
39 * logging or not.
40 */
41int
42xlog_cil_init(
43 struct log *log)
44{
45 struct xfs_cil *cil;
46 struct xfs_cil_ctx *ctx;
47
48 log->l_cilp = NULL;
49 if (!(log->l_mp->m_flags & XFS_MOUNT_DELAYLOG))
50 return 0;
51
52 cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL);
53 if (!cil)
54 return ENOMEM;
55
56 ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL);
57 if (!ctx) {
58 kmem_free(cil);
59 return ENOMEM;
60 }
61
62 INIT_LIST_HEAD(&cil->xc_cil);
63 INIT_LIST_HEAD(&cil->xc_committing);
64 spin_lock_init(&cil->xc_cil_lock);
65 init_rwsem(&cil->xc_ctx_lock);
66 sv_init(&cil->xc_commit_wait, SV_DEFAULT, "cilwait");
67
68 INIT_LIST_HEAD(&ctx->committing);
69 INIT_LIST_HEAD(&ctx->busy_extents);
70 ctx->sequence = 1;
71 ctx->cil = cil;
72 cil->xc_ctx = ctx;
73
74 cil->xc_log = log;
75 log->l_cilp = cil;
76 return 0;
77}
78
79void
80xlog_cil_destroy(
81 struct log *log)
82{
83 if (!log->l_cilp)
84 return;
85
86 if (log->l_cilp->xc_ctx) {
87 if (log->l_cilp->xc_ctx->ticket)
88 xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket);
89 kmem_free(log->l_cilp->xc_ctx);
90 }
91
92 ASSERT(list_empty(&log->l_cilp->xc_cil));
93 kmem_free(log->l_cilp);
94}
95
96/*
97 * Allocate a new ticket. Failing to get a new ticket makes it really hard to
98 * recover, so we don't allow failure here. Also, we allocate in a context that
99 * we don't want to be issuing transactions from, so we need to tell the
100 * allocation code this as well.
101 *
102 * We don't reserve any space for the ticket - we are going to steal whatever
103 * space we require from transactions as they commit. To ensure we reserve all
104 * the space required, we need to set the current reservation of the ticket to
105 * zero so that we know to steal the initial transaction overhead from the
106 * first transaction commit.
107 */
108static struct xlog_ticket *
109xlog_cil_ticket_alloc(
110 struct log *log)
111{
112 struct xlog_ticket *tic;
113
114 tic = xlog_ticket_alloc(log, 0, 1, XFS_TRANSACTION, 0,
115 KM_SLEEP|KM_NOFS);
116 tic->t_trans_type = XFS_TRANS_CHECKPOINT;
117
118 /*
119 * set the current reservation to zero so we know to steal the basic
120 * transaction overhead reservation from the first transaction commit.
121 */
122 tic->t_curr_res = 0;
123 return tic;
124}
125
126/*
127 * After the first stage of log recovery is done, we know where the head and
128 * tail of the log are. We need this log initialisation done before we can
129 * initialise the first CIL checkpoint context.
130 *
131 * Here we allocate a log ticket to track space usage during a CIL push. This
132 * ticket is passed to xlog_write() directly so that we don't slowly leak log
133 * space by failing to account for space used by log headers and additional
134 * region headers for split regions.
135 */
136void
137xlog_cil_init_post_recovery(
138 struct log *log)
139{
140 if (!log->l_cilp)
141 return;
142
143 log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log);
144 log->l_cilp->xc_ctx->sequence = 1;
145 log->l_cilp->xc_ctx->commit_lsn = xlog_assign_lsn(log->l_curr_cycle,
146 log->l_curr_block);
147}
148
149/*
150 * Insert the log item into the CIL and calculate the difference in space
151 * consumed by the item. Add the space to the checkpoint ticket and calculate
152 * if the change requires additional log metadata. If it does, take that space
153 * as well. Remove the amount of space we addded to the checkpoint ticket from
154 * the current transaction ticket so that the accounting works out correctly.
155 *
156 * If this is the first time the item is being placed into the CIL in this
157 * context, pin it so it can't be written to disk until the CIL is flushed to
158 * the iclog and the iclog written to disk.
159 */
160static void
161xlog_cil_insert(
162 struct log *log,
163 struct xlog_ticket *ticket,
164 struct xfs_log_item *item,
165 struct xfs_log_vec *lv)
166{
167 struct xfs_cil *cil = log->l_cilp;
168 struct xfs_log_vec *old = lv->lv_item->li_lv;
169 struct xfs_cil_ctx *ctx = cil->xc_ctx;
170 int len;
171 int diff_iovecs;
172 int iclog_space;
173
174 if (old) {
175 /* existing lv on log item, space used is a delta */
176 ASSERT(!list_empty(&item->li_cil));
177 ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs);
178
179 len = lv->lv_buf_len - old->lv_buf_len;
180 diff_iovecs = lv->lv_niovecs - old->lv_niovecs;
181 kmem_free(old->lv_buf);
182 kmem_free(old);
183 } else {
184 /* new lv, must pin the log item */
185 ASSERT(!lv->lv_item->li_lv);
186 ASSERT(list_empty(&item->li_cil));
187
188 len = lv->lv_buf_len;
189 diff_iovecs = lv->lv_niovecs;
190 IOP_PIN(lv->lv_item);
191
192 }
193 len += diff_iovecs * sizeof(xlog_op_header_t);
194
195 /* attach new log vector to log item */
196 lv->lv_item->li_lv = lv;
197
198 spin_lock(&cil->xc_cil_lock);
199 list_move_tail(&item->li_cil, &cil->xc_cil);
200 ctx->nvecs += diff_iovecs;
201
202 /*
203 * Now transfer enough transaction reservation to the context ticket
204 * for the checkpoint. The context ticket is special - the unit
205 * reservation has to grow as well as the current reservation as we
206 * steal from tickets so we can correctly determine the space used
207 * during the transaction commit.
208 */
209 if (ctx->ticket->t_curr_res == 0) {
210 /* first commit in checkpoint, steal the header reservation */
211 ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len);
212 ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
213 ticket->t_curr_res -= ctx->ticket->t_unit_res;
214 }
215
216 /* do we need space for more log record headers? */
217 iclog_space = log->l_iclog_size - log->l_iclog_hsize;
218 if (len > 0 && (ctx->space_used / iclog_space !=
219 (ctx->space_used + len) / iclog_space)) {
220 int hdrs;
221
222 hdrs = (len + iclog_space - 1) / iclog_space;
223 /* need to take into account split region headers, too */
224 hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
225 ctx->ticket->t_unit_res += hdrs;
226 ctx->ticket->t_curr_res += hdrs;
227 ticket->t_curr_res -= hdrs;
228 ASSERT(ticket->t_curr_res >= len);
229 }
230 ticket->t_curr_res -= len;
231 ctx->space_used += len;
232
233 spin_unlock(&cil->xc_cil_lock);
234}
235
236/*
237 * Format log item into a flat buffers
238 *
239 * For delayed logging, we need to hold a formatted buffer containing all the
240 * changes on the log item. This enables us to relog the item in memory and
241 * write it out asynchronously without needing to relock the object that was
242 * modified at the time it gets written into the iclog.
243 *
244 * This function builds a vector for the changes in each log item in the
245 * transaction. It then works out the length of the buffer needed for each log
246 * item, allocates them and formats the vector for the item into the buffer.
247 * The buffer is then attached to the log item are then inserted into the
248 * Committed Item List for tracking until the next checkpoint is written out.
249 *
250 * We don't set up region headers during this process; we simply copy the
251 * regions into the flat buffer. We can do this because we still have to do a
252 * formatting step to write the regions into the iclog buffer. Writing the
253 * ophdrs during the iclog write means that we can support splitting large
254 * regions across iclog boundares without needing a change in the format of the
255 * item/region encapsulation.
256 *
257 * Hence what we need to do now is change the rewrite the vector array to point
258 * to the copied region inside the buffer we just allocated. This allows us to
259 * format the regions into the iclog as though they are being formatted
260 * directly out of the objects themselves.
261 */
262static void
263xlog_cil_format_items(
264 struct log *log,
265 struct xfs_log_vec *log_vector,
266 struct xlog_ticket *ticket,
267 xfs_lsn_t *start_lsn)
268{
269 struct xfs_log_vec *lv;
270
271 if (start_lsn)
272 *start_lsn = log->l_cilp->xc_ctx->sequence;
273
274 ASSERT(log_vector);
275 for (lv = log_vector; lv; lv = lv->lv_next) {
276 void *ptr;
277 int index;
278 int len = 0;
279
280 /* build the vector array and calculate it's length */
281 IOP_FORMAT(lv->lv_item, lv->lv_iovecp);
282 for (index = 0; index < lv->lv_niovecs; index++)
283 len += lv->lv_iovecp[index].i_len;
284
285 lv->lv_buf_len = len;
286 lv->lv_buf = kmem_zalloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS);
287 ptr = lv->lv_buf;
288
289 for (index = 0; index < lv->lv_niovecs; index++) {
290 struct xfs_log_iovec *vec = &lv->lv_iovecp[index];
291
292 memcpy(ptr, vec->i_addr, vec->i_len);
293 vec->i_addr = ptr;
294 ptr += vec->i_len;
295 }
296 ASSERT(ptr == lv->lv_buf + lv->lv_buf_len);
297
298 xlog_cil_insert(log, ticket, lv->lv_item, lv);
299 }
300}
301
302static void
303xlog_cil_free_logvec(
304 struct xfs_log_vec *log_vector)
305{
306 struct xfs_log_vec *lv;
307
308 for (lv = log_vector; lv; ) {
309 struct xfs_log_vec *next = lv->lv_next;
310 kmem_free(lv->lv_buf);
311 kmem_free(lv);
312 lv = next;
313 }
314}
315
316/*
317 * Commit a transaction with the given vector to the Committed Item List.
318 *
319 * To do this, we need to format the item, pin it in memory if required and
320 * account for the space used by the transaction. Once we have done that we
321 * need to release the unused reservation for the transaction, attach the
322 * transaction to the checkpoint context so we carry the busy extents through
323 * to checkpoint completion, and then unlock all the items in the transaction.
324 *
325 * For more specific information about the order of operations in
326 * xfs_log_commit_cil() please refer to the comments in
327 * xfs_trans_commit_iclog().
328 */
329int
330xfs_log_commit_cil(
331 struct xfs_mount *mp,
332 struct xfs_trans *tp,
333 struct xfs_log_vec *log_vector,
334 xfs_lsn_t *commit_lsn,
335 int flags)
336{
337 struct log *log = mp->m_log;
338 int log_flags = 0;
339
340 if (flags & XFS_TRANS_RELEASE_LOG_RES)
341 log_flags = XFS_LOG_REL_PERM_RESERV;
342
343 if (XLOG_FORCED_SHUTDOWN(log)) {
344 xlog_cil_free_logvec(log_vector);
345 return XFS_ERROR(EIO);
346 }
347
348 /* lock out background commit */
349 down_read(&log->l_cilp->xc_ctx_lock);
350 xlog_cil_format_items(log, log_vector, tp->t_ticket, commit_lsn);
351
352 /* check we didn't blow the reservation */
353 if (tp->t_ticket->t_curr_res < 0)
354 xlog_print_tic_res(log->l_mp, tp->t_ticket);
355
356 /* attach the transaction to the CIL if it has any busy extents */
357 if (!list_empty(&tp->t_busy)) {
358 spin_lock(&log->l_cilp->xc_cil_lock);
359 list_splice_init(&tp->t_busy,
360 &log->l_cilp->xc_ctx->busy_extents);
361 spin_unlock(&log->l_cilp->xc_cil_lock);
362 }
363
364 tp->t_commit_lsn = *commit_lsn;
365 xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
366 xfs_trans_unreserve_and_mod_sb(tp);
367
368 /* background commit is allowed again */
369 up_read(&log->l_cilp->xc_ctx_lock);
370 return 0;
371}
372
373/*
374 * Mark all items committed and clear busy extents. We free the log vector
375 * chains in a separate pass so that we unpin the log items as quickly as
376 * possible.
377 */
378static void
379xlog_cil_committed(
380 void *args,
381 int abort)
382{
383 struct xfs_cil_ctx *ctx = args;
384 struct xfs_log_vec *lv;
385 int abortflag = abort ? XFS_LI_ABORTED : 0;
386 struct xfs_busy_extent *busyp, *n;
387
388 /* unpin all the log items */
389 for (lv = ctx->lv_chain; lv; lv = lv->lv_next ) {
390 xfs_trans_item_committed(lv->lv_item, ctx->start_lsn,
391 abortflag);
392 }
393
394 list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list)
395 xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp);
396
397 spin_lock(&ctx->cil->xc_cil_lock);
398 list_del(&ctx->committing);
399 spin_unlock(&ctx->cil->xc_cil_lock);
400
401 xlog_cil_free_logvec(ctx->lv_chain);
402 kmem_free(ctx);
403}
404
405/*
406 * Push the Committed Item List to the log. If the push_now flag is not set,
407 * then it is a background flush and so we can chose to ignore it.
408 */
409int
410xlog_cil_push(
411 struct log *log,
412 int push_now)
413{
414 struct xfs_cil *cil = log->l_cilp;
415 struct xfs_log_vec *lv;
416 struct xfs_cil_ctx *ctx;
417 struct xfs_cil_ctx *new_ctx;
418 struct xlog_in_core *commit_iclog;
419 struct xlog_ticket *tic;
420 int num_lv;
421 int num_iovecs;
422 int len;
423 int error = 0;
424 struct xfs_trans_header thdr;
425 struct xfs_log_iovec lhdr;
426 struct xfs_log_vec lvhdr = { NULL };
427 xfs_lsn_t commit_lsn;
428
429 if (!cil)
430 return 0;
431
432 /* XXX: don't sleep for background? */
433 new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS);
434 new_ctx->ticket = xlog_cil_ticket_alloc(log);
435
436 /* lock out transaction commit */
437 down_write(&cil->xc_ctx_lock);
438 ctx = cil->xc_ctx;
439
440 /* check if we've anything to push */
441 if (list_empty(&cil->xc_cil))
442 goto out_skip;
443
444 /*
445 * pull all the log vectors off the items in the CIL, and
446 * remove the items from the CIL. We don't need the CIL lock
447 * here because it's only needed on the transaction commit
448 * side which is currently locked out by the flush lock.
449 */
450 lv = NULL;
451 num_lv = 0;
452 num_iovecs = 0;
453 len = 0;
454 while (!list_empty(&cil->xc_cil)) {
455 struct xfs_log_item *item;
456 int i;
457
458 item = list_first_entry(&cil->xc_cil,
459 struct xfs_log_item, li_cil);
460 list_del_init(&item->li_cil);
461 if (!ctx->lv_chain)
462 ctx->lv_chain = item->li_lv;
463 else
464 lv->lv_next = item->li_lv;
465 lv = item->li_lv;
466 item->li_lv = NULL;
467
468 num_lv++;
469 num_iovecs += lv->lv_niovecs;
470 for (i = 0; i < lv->lv_niovecs; i++)
471 len += lv->lv_iovecp[i].i_len;
472 }
473
474 /*
475 * initialise the new context and attach it to the CIL. Then attach
476 * the current context to the CIL committing lsit so it can be found
477 * during log forces to extract the commit lsn of the sequence that
478 * needs to be forced.
479 */
480 INIT_LIST_HEAD(&new_ctx->committing);
481 INIT_LIST_HEAD(&new_ctx->busy_extents);
482 new_ctx->sequence = ctx->sequence + 1;
483 new_ctx->cil = cil;
484 cil->xc_ctx = new_ctx;
485
486 /*
487 * The switch is now done, so we can drop the context lock and move out
488 * of a shared context. We can't just go straight to the commit record,
489 * though - we need to synchronise with previous and future commits so
490 * that the commit records are correctly ordered in the log to ensure
491 * that we process items during log IO completion in the correct order.
492 *
493 * For example, if we get an EFI in one checkpoint and the EFD in the
494 * next (e.g. due to log forces), we do not want the checkpoint with
495 * the EFD to be committed before the checkpoint with the EFI. Hence
496 * we must strictly order the commit records of the checkpoints so
497 * that: a) the checkpoint callbacks are attached to the iclogs in the
498 * correct order; and b) the checkpoints are replayed in correct order
499 * in log recovery.
500 *
501 * Hence we need to add this context to the committing context list so
502 * that higher sequences will wait for us to write out a commit record
503 * before they do.
504 */
505 spin_lock(&cil->xc_cil_lock);
506 list_add(&ctx->committing, &cil->xc_committing);
507 spin_unlock(&cil->xc_cil_lock);
508 up_write(&cil->xc_ctx_lock);
509
510 /*
511 * Build a checkpoint transaction header and write it to the log to
512 * begin the transaction. We need to account for the space used by the
513 * transaction header here as it is not accounted for in xlog_write().
514 *
515 * The LSN we need to pass to the log items on transaction commit is
516 * the LSN reported by the first log vector write. If we use the commit
517 * record lsn then we can move the tail beyond the grant write head.
518 */
519 tic = ctx->ticket;
520 thdr.th_magic = XFS_TRANS_HEADER_MAGIC;
521 thdr.th_type = XFS_TRANS_CHECKPOINT;
522 thdr.th_tid = tic->t_tid;
523 thdr.th_num_items = num_iovecs;
524 lhdr.i_addr = (xfs_caddr_t)&thdr;
525 lhdr.i_len = sizeof(xfs_trans_header_t);
526 lhdr.i_type = XLOG_REG_TYPE_TRANSHDR;
527 tic->t_curr_res -= lhdr.i_len + sizeof(xlog_op_header_t);
528
529 lvhdr.lv_niovecs = 1;
530 lvhdr.lv_iovecp = &lhdr;
531 lvhdr.lv_next = ctx->lv_chain;
532
533 error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0);
534 if (error)
535 goto out_abort;
536
537 /*
538 * now that we've written the checkpoint into the log, strictly
539 * order the commit records so replay will get them in the right order.
540 */
541restart:
542 spin_lock(&cil->xc_cil_lock);
543 list_for_each_entry(new_ctx, &cil->xc_committing, committing) {
544 /*
545 * Higher sequences will wait for this one so skip them.
546 * Don't wait for own own sequence, either.
547 */
548 if (new_ctx->sequence >= ctx->sequence)
549 continue;
550 if (!new_ctx->commit_lsn) {
551 /*
552 * It is still being pushed! Wait for the push to
553 * complete, then start again from the beginning.
554 */
555 sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0);
556 goto restart;
557 }
558 }
559 spin_unlock(&cil->xc_cil_lock);
560
561 commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0);
562 if (error || commit_lsn == -1)
563 goto out_abort;
564
565 /* attach all the transactions w/ busy extents to iclog */
566 ctx->log_cb.cb_func = xlog_cil_committed;
567 ctx->log_cb.cb_arg = ctx;
568 error = xfs_log_notify(log->l_mp, commit_iclog, &ctx->log_cb);
569 if (error)
570 goto out_abort;
571
572 /*
573 * now the checkpoint commit is complete and we've attached the
574 * callbacks to the iclog we can assign the commit LSN to the context
575 * and wake up anyone who is waiting for the commit to complete.
576 */
577 spin_lock(&cil->xc_cil_lock);
578 ctx->commit_lsn = commit_lsn;
579 sv_broadcast(&cil->xc_commit_wait);
580 spin_unlock(&cil->xc_cil_lock);
581
582 /* release the hounds! */
583 return xfs_log_release_iclog(log->l_mp, commit_iclog);
584
585out_skip:
586 up_write(&cil->xc_ctx_lock);
587 xfs_log_ticket_put(new_ctx->ticket);
588 kmem_free(new_ctx);
589 return 0;
590
591out_abort:
592 xlog_cil_committed(ctx, XFS_LI_ABORTED);
593 return XFS_ERROR(EIO);
594}
595
596/*
597 * Conditionally push the CIL based on the sequence passed in.
598 *
599 * We only need to push if we haven't already pushed the sequence
600 * number given. Hence the only time we will trigger a push here is
601 * if the push sequence is the same as the current context.
602 *
603 * We return the current commit lsn to allow the callers to determine if a
604 * iclog flush is necessary following this call.
605 *
606 * XXX: Initially, just push the CIL unconditionally and return whatever
607 * commit lsn is there. It'll be empty, so this is broken for now.
608 */
609xfs_lsn_t
610xlog_cil_push_lsn(
611 struct log *log,
612 xfs_lsn_t push_seq)
613{
614 struct xfs_cil *cil = log->l_cilp;
615 struct xfs_cil_ctx *ctx;
616 xfs_lsn_t commit_lsn = NULLCOMMITLSN;
617
618restart:
619 down_write(&cil->xc_ctx_lock);
620 ASSERT(push_seq <= cil->xc_ctx->sequence);
621
622 /* check to see if we need to force out the current context */
623 if (push_seq == cil->xc_ctx->sequence) {
624 up_write(&cil->xc_ctx_lock);
625 xlog_cil_push(log, 1);
626 goto restart;
627 }
628
629 /*
630 * See if we can find a previous sequence still committing.
631 * We can drop the flush lock as soon as we have the cil lock
632 * because we are now only comparing contexts protected by
633 * the cil lock.
634 *
635 * We need to wait for all previous sequence commits to complete
636 * before allowing the force of push_seq to go ahead. Hence block
637 * on commits for those as well.
638 */
639 spin_lock(&cil->xc_cil_lock);
640 up_write(&cil->xc_ctx_lock);
641 list_for_each_entry(ctx, &cil->xc_committing, committing) {
642 if (ctx->sequence > push_seq)
643 continue;
644 if (!ctx->commit_lsn) {
645 /*
646 * It is still being pushed! Wait for the push to
647 * complete, then start again from the beginning.
648 */
649 sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0);
650 goto restart;
651 }
652 if (ctx->sequence != push_seq)
653 continue;
654 /* found it! */
655 commit_lsn = ctx->commit_lsn;
656 }
657 spin_unlock(&cil->xc_cil_lock);
658 return commit_lsn;
659}
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index ac97bddcadba..48d920891b94 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -377,6 +377,54 @@ typedef struct xlog_in_core {
377} xlog_in_core_t; 377} xlog_in_core_t;
378 378
379/* 379/*
380 * The CIL context is used to aggregate per-transaction details as well be
381 * passed to the iclog for checkpoint post-commit processing. After being
382 * passed to the iclog, another context needs to be allocated for tracking the
383 * next set of transactions to be aggregated into a checkpoint.
384 */
385struct xfs_cil;
386
387struct xfs_cil_ctx {
388 struct xfs_cil *cil;
389 xfs_lsn_t sequence; /* chkpt sequence # */
390 xfs_lsn_t start_lsn; /* first LSN of chkpt commit */
391 xfs_lsn_t commit_lsn; /* chkpt commit record lsn */
392 struct xlog_ticket *ticket; /* chkpt ticket */
393 int nvecs; /* number of regions */
394 int space_used; /* aggregate size of regions */
395 struct list_head busy_extents; /* busy extents in chkpt */
396 struct xfs_log_vec *lv_chain; /* logvecs being pushed */
397 xfs_log_callback_t log_cb; /* completion callback hook. */
398 struct list_head committing; /* ctx committing list */
399};
400
401/*
402 * Committed Item List structure
403 *
404 * This structure is used to track log items that have been committed but not
405 * yet written into the log. It is used only when the delayed logging mount
406 * option is enabled.
407 *
408 * This structure tracks the list of committing checkpoint contexts so
409 * we can avoid the problem of having to hold out new transactions during a
410 * flush until we have a the commit record LSN of the checkpoint. We can
411 * traverse the list of committing contexts in xlog_cil_push_lsn() to find a
412 * sequence match and extract the commit LSN directly from there. If the
413 * checkpoint is still in the process of committing, we can block waiting for
414 * the commit LSN to be determined as well. This should make synchronous
415 * operations almost as efficient as the old logging methods.
416 */
417struct xfs_cil {
418 struct log *xc_log;
419 struct list_head xc_cil;
420 spinlock_t xc_cil_lock;
421 struct xfs_cil_ctx *xc_ctx;
422 struct rw_semaphore xc_ctx_lock;
423 struct list_head xc_committing;
424 sv_t xc_commit_wait;
425};
426
427/*
380 * The reservation head lsn is not made up of a cycle number and block number. 428 * The reservation head lsn is not made up of a cycle number and block number.
381 * Instead, it uses a cycle number and byte number. Logs don't expect to 429 * Instead, it uses a cycle number and byte number. Logs don't expect to
382 * overflow 31 bits worth of byte offset, so using a byte number will mean 430 * overflow 31 bits worth of byte offset, so using a byte number will mean
@@ -386,6 +434,7 @@ typedef struct log {
386 /* The following fields don't need locking */ 434 /* The following fields don't need locking */
387 struct xfs_mount *l_mp; /* mount point */ 435 struct xfs_mount *l_mp; /* mount point */
388 struct xfs_ail *l_ailp; /* AIL log is working with */ 436 struct xfs_ail *l_ailp; /* AIL log is working with */
437 struct xfs_cil *l_cilp; /* CIL log is working with */
389 struct xfs_buf *l_xbuf; /* extra buffer for log 438 struct xfs_buf *l_xbuf; /* extra buffer for log
390 * wrapping */ 439 * wrapping */
391 struct xfs_buftarg *l_targ; /* buftarg of log */ 440 struct xfs_buftarg *l_targ; /* buftarg of log */
@@ -436,14 +485,17 @@ typedef struct log {
436 485
437#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) 486#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR)
438 487
439
440/* common routines */ 488/* common routines */
441extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); 489extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
442extern int xlog_recover(xlog_t *log); 490extern int xlog_recover(xlog_t *log);
443extern int xlog_recover_finish(xlog_t *log); 491extern int xlog_recover_finish(xlog_t *log);
444extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); 492extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
445 493
446extern kmem_zone_t *xfs_log_ticket_zone; 494extern kmem_zone_t *xfs_log_ticket_zone;
495struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes,
496 int count, char client, uint xflags,
497 int alloc_flags);
498
447 499
448static inline void 500static inline void
449xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes) 501xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
@@ -453,6 +505,21 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
453 *off += bytes; 505 *off += bytes;
454} 506}
455 507
508void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket);
509int xlog_write(struct log *log, struct xfs_log_vec *log_vector,
510 struct xlog_ticket *tic, xfs_lsn_t *start_lsn,
511 xlog_in_core_t **commit_iclog, uint flags);
512
513/*
514 * Committed Item List interfaces
515 */
516int xlog_cil_init(struct log *log);
517void xlog_cil_init_post_recovery(struct log *log);
518void xlog_cil_destroy(struct log *log);
519
520int xlog_cil_push(struct log *log, int push_now);
521xfs_lsn_t xlog_cil_push_lsn(struct log *log, xfs_lsn_t push_sequence);
522
456/* 523/*
457 * Unmount record type is used as a pseudo transaction type for the ticket. 524 * Unmount record type is used as a pseudo transaction type for the ticket.
458 * It's value must be outside the range of XFS_TRANS_* values. 525 * It's value must be outside the range of XFS_TRANS_* values.
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 9ff48a16a7ee..1d2c7eed4eda 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -268,6 +268,7 @@ typedef struct xfs_mount {
268#define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops 268#define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops
269 must be synchronous except 269 must be synchronous except
270 for space allocations */ 270 for space allocations */
271#define XFS_MOUNT_DELAYLOG (1ULL << 1) /* delayed logging is enabled */
271#define XFS_MOUNT_DMAPI (1ULL << 2) /* dmapi is enabled */ 272#define XFS_MOUNT_DMAPI (1ULL << 2) /* dmapi is enabled */
272#define XFS_MOUNT_WAS_CLEAN (1ULL << 3) 273#define XFS_MOUNT_WAS_CLEAN (1ULL << 3)
273#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem 274#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 40d9595a8de2..ce558efa2ea0 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -655,7 +655,7 @@ xfs_trans_apply_sb_deltas(
655 * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we 655 * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we
656 * still need to update the incore superblock with the changes. 656 * still need to update the incore superblock with the changes.
657 */ 657 */
658STATIC void 658void
659xfs_trans_unreserve_and_mod_sb( 659xfs_trans_unreserve_and_mod_sb(
660 xfs_trans_t *tp) 660 xfs_trans_t *tp)
661{ 661{
@@ -883,7 +883,7 @@ xfs_trans_fill_vecs(
883 * they could be immediately flushed and we'd have to race with the flusher 883 * they could be immediately flushed and we'd have to race with the flusher
884 * trying to pull the item from the AIL as we add it. 884 * trying to pull the item from the AIL as we add it.
885 */ 885 */
886static void 886void
887xfs_trans_item_committed( 887xfs_trans_item_committed(
888 struct xfs_log_item *lip, 888 struct xfs_log_item *lip,
889 xfs_lsn_t commit_lsn, 889 xfs_lsn_t commit_lsn,
@@ -994,7 +994,7 @@ xfs_trans_uncommit(
994 xfs_trans_unreserve_and_mod_sb(tp); 994 xfs_trans_unreserve_and_mod_sb(tp);
995 xfs_trans_unreserve_and_mod_dquots(tp); 995 xfs_trans_unreserve_and_mod_dquots(tp);
996 996
997 xfs_trans_free_items(tp, flags); 997 xfs_trans_free_items(tp, NULLCOMMITLSN, flags);
998 xfs_trans_free(tp); 998 xfs_trans_free(tp);
999} 999}
1000 1000
@@ -1144,6 +1144,93 @@ xfs_trans_commit_iclog(
1144 return xfs_log_release_iclog(mp, commit_iclog); 1144 return xfs_log_release_iclog(mp, commit_iclog);
1145} 1145}
1146 1146
1147/*
1148 * Walk the log items and allocate log vector structures for
1149 * each item large enough to fit all the vectors they require.
1150 * Note that this format differs from the old log vector format in
1151 * that there is no transaction header in these log vectors.
1152 */
1153STATIC struct xfs_log_vec *
1154xfs_trans_alloc_log_vecs(
1155 xfs_trans_t *tp)
1156{
1157 xfs_log_item_desc_t *lidp;
1158 struct xfs_log_vec *lv = NULL;
1159 struct xfs_log_vec *ret_lv = NULL;
1160
1161 lidp = xfs_trans_first_item(tp);
1162
1163 /* Bail out if we didn't find a log item. */
1164 if (!lidp) {
1165 ASSERT(0);
1166 return NULL;
1167 }
1168
1169 while (lidp != NULL) {
1170 struct xfs_log_vec *new_lv;
1171
1172 /* Skip items which aren't dirty in this transaction. */
1173 if (!(lidp->lid_flags & XFS_LID_DIRTY)) {
1174 lidp = xfs_trans_next_item(tp, lidp);
1175 continue;
1176 }
1177
1178 /* Skip items that do not have any vectors for writing */
1179 lidp->lid_size = IOP_SIZE(lidp->lid_item);
1180 if (!lidp->lid_size) {
1181 lidp = xfs_trans_next_item(tp, lidp);
1182 continue;
1183 }
1184
1185 new_lv = kmem_zalloc(sizeof(*new_lv) +
1186 lidp->lid_size * sizeof(struct xfs_log_iovec),
1187 KM_SLEEP);
1188
1189 /* The allocated iovec region lies beyond the log vector. */
1190 new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1];
1191 new_lv->lv_niovecs = lidp->lid_size;
1192 new_lv->lv_item = lidp->lid_item;
1193 if (!ret_lv)
1194 ret_lv = new_lv;
1195 else
1196 lv->lv_next = new_lv;
1197 lv = new_lv;
1198 lidp = xfs_trans_next_item(tp, lidp);
1199 }
1200
1201 return ret_lv;
1202}
1203
1204static int
1205xfs_trans_commit_cil(
1206 struct xfs_mount *mp,
1207 struct xfs_trans *tp,
1208 xfs_lsn_t *commit_lsn,
1209 int flags)
1210{
1211 struct xfs_log_vec *log_vector;
1212 int error;
1213
1214 /*
1215 * Get each log item to allocate a vector structure for
1216 * the log item to to pass to the log write code. The
1217 * CIL commit code will format the vector and save it away.
1218 */
1219 log_vector = xfs_trans_alloc_log_vecs(tp);
1220 if (!log_vector)
1221 return ENOMEM;
1222
1223 error = xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags);
1224 if (error)
1225 return error;
1226
1227 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
1228
1229 /* xfs_trans_free_items() unlocks them first */
1230 xfs_trans_free_items(tp, *commit_lsn, 0);
1231 xfs_trans_free(tp);
1232 return 0;
1233}
1147 1234
1148/* 1235/*
1149 * xfs_trans_commit 1236 * xfs_trans_commit
@@ -1204,7 +1291,11 @@ _xfs_trans_commit(
1204 xfs_trans_apply_sb_deltas(tp); 1291 xfs_trans_apply_sb_deltas(tp);
1205 xfs_trans_apply_dquot_deltas(tp); 1292 xfs_trans_apply_dquot_deltas(tp);
1206 1293
1207 error = xfs_trans_commit_iclog(mp, tp, &commit_lsn, flags); 1294 if (mp->m_flags & XFS_MOUNT_DELAYLOG)
1295 error = xfs_trans_commit_cil(mp, tp, &commit_lsn, flags);
1296 else
1297 error = xfs_trans_commit_iclog(mp, tp, &commit_lsn, flags);
1298
1208 if (error == ENOMEM) { 1299 if (error == ENOMEM) {
1209 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); 1300 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
1210 error = XFS_ERROR(EIO); 1301 error = XFS_ERROR(EIO);
@@ -1242,7 +1333,7 @@ out_unreserve:
1242 error = XFS_ERROR(EIO); 1333 error = XFS_ERROR(EIO);
1243 } 1334 }
1244 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 1335 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
1245 xfs_trans_free_items(tp, error ? XFS_TRANS_ABORT : 0); 1336 xfs_trans_free_items(tp, NULLCOMMITLSN, error ? XFS_TRANS_ABORT : 0);
1246 xfs_trans_free(tp); 1337 xfs_trans_free(tp);
1247 1338
1248 XFS_STATS_INC(xs_trans_empty); 1339 XFS_STATS_INC(xs_trans_empty);
@@ -1320,7 +1411,7 @@ xfs_trans_cancel(
1320 /* mark this thread as no longer being in a transaction */ 1411 /* mark this thread as no longer being in a transaction */
1321 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 1412 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
1322 1413
1323 xfs_trans_free_items(tp, flags); 1414 xfs_trans_free_items(tp, NULLCOMMITLSN, flags);
1324 xfs_trans_free(tp); 1415 xfs_trans_free(tp);
1325} 1416}
1326 1417
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index ff7e9e6eee84..b1ea20c66b3e 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -106,7 +106,8 @@ typedef struct xfs_trans_header {
106#define XFS_TRANS_GROWFSRT_FREE 39 106#define XFS_TRANS_GROWFSRT_FREE 39
107#define XFS_TRANS_SWAPEXT 40 107#define XFS_TRANS_SWAPEXT 40
108#define XFS_TRANS_SB_COUNT 41 108#define XFS_TRANS_SB_COUNT 41
109#define XFS_TRANS_TYPE_MAX 41 109#define XFS_TRANS_CHECKPOINT 42
110#define XFS_TRANS_TYPE_MAX 42
110/* new transaction types need to be reflected in xfs_logprint(8) */ 111/* new transaction types need to be reflected in xfs_logprint(8) */
111 112
112#define XFS_TRANS_TYPES \ 113#define XFS_TRANS_TYPES \
@@ -148,6 +149,7 @@ typedef struct xfs_trans_header {
148 { XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \ 149 { XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \
149 { XFS_TRANS_SWAPEXT, "SWAPEXT" }, \ 150 { XFS_TRANS_SWAPEXT, "SWAPEXT" }, \
150 { XFS_TRANS_SB_COUNT, "SB_COUNT" }, \ 151 { XFS_TRANS_SB_COUNT, "SB_COUNT" }, \
152 { XFS_TRANS_CHECKPOINT, "CHECKPOINT" }, \
151 { XFS_TRANS_DUMMY1, "DUMMY1" }, \ 153 { XFS_TRANS_DUMMY1, "DUMMY1" }, \
152 { XFS_TRANS_DUMMY2, "DUMMY2" }, \ 154 { XFS_TRANS_DUMMY2, "DUMMY2" }, \
153 { XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" } 155 { XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" }
@@ -829,6 +831,10 @@ typedef struct xfs_log_item {
829 /* buffer item iodone */ 831 /* buffer item iodone */
830 /* callback func */ 832 /* callback func */
831 struct xfs_item_ops *li_ops; /* function list */ 833 struct xfs_item_ops *li_ops; /* function list */
834
835 /* delayed logging */
836 struct list_head li_cil; /* CIL pointers */
837 struct xfs_log_vec *li_lv; /* active log vector */
832} xfs_log_item_t; 838} xfs_log_item_t;
833 839
834#define XFS_LI_IN_AIL 0x1 840#define XFS_LI_IN_AIL 0x1
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c
index 2937a1e53318..f11d37d06dcc 100644
--- a/fs/xfs/xfs_trans_item.c
+++ b/fs/xfs/xfs_trans_item.c
@@ -299,6 +299,7 @@ xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
299void 299void
300xfs_trans_free_items( 300xfs_trans_free_items(
301 xfs_trans_t *tp, 301 xfs_trans_t *tp,
302 xfs_lsn_t commit_lsn,
302 int flags) 303 int flags)
303{ 304{
304 xfs_log_item_chunk_t *licp; 305 xfs_log_item_chunk_t *licp;
@@ -311,7 +312,7 @@ xfs_trans_free_items(
311 * Special case the embedded chunk so we don't free it below. 312 * Special case the embedded chunk so we don't free it below.
312 */ 313 */
313 if (!xfs_lic_are_all_free(licp)) { 314 if (!xfs_lic_are_all_free(licp)) {
314 (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN); 315 (void) xfs_trans_unlock_chunk(licp, 1, abort, commit_lsn);
315 xfs_lic_all_free(licp); 316 xfs_lic_all_free(licp);
316 licp->lic_unused = 0; 317 licp->lic_unused = 0;
317 } 318 }
@@ -322,7 +323,7 @@ xfs_trans_free_items(
322 */ 323 */
323 while (licp != NULL) { 324 while (licp != NULL) {
324 ASSERT(!xfs_lic_are_all_free(licp)); 325 ASSERT(!xfs_lic_are_all_free(licp));
325 (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN); 326 (void) xfs_trans_unlock_chunk(licp, 1, abort, commit_lsn);
326 next_licp = licp->lic_next; 327 next_licp = licp->lic_next;
327 kmem_free(licp); 328 kmem_free(licp);
328 licp = next_licp; 329 licp = next_licp;
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 901dc0f032da..c6e4f2c8de6e 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -35,9 +35,14 @@ struct xfs_log_item_desc *xfs_trans_find_item(struct xfs_trans *,
35struct xfs_log_item_desc *xfs_trans_first_item(struct xfs_trans *); 35struct xfs_log_item_desc *xfs_trans_first_item(struct xfs_trans *);
36struct xfs_log_item_desc *xfs_trans_next_item(struct xfs_trans *, 36struct xfs_log_item_desc *xfs_trans_next_item(struct xfs_trans *,
37 struct xfs_log_item_desc *); 37 struct xfs_log_item_desc *);
38void xfs_trans_free_items(struct xfs_trans *, int); 38
39void xfs_trans_unlock_items(struct xfs_trans *, 39void xfs_trans_unlock_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn);
40 xfs_lsn_t); 40void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
41 int flags);
42
43void xfs_trans_item_committed(struct xfs_log_item *lip,
44 xfs_lsn_t commit_lsn, int aborted);
45void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
41 46
42/* 47/*
43 * AIL traversal cursor. 48 * AIL traversal cursor.