aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2016-08-02 22:04:45 -0400
committerDave Chinner <david@fromorbit.com>2016-08-02 22:04:45 -0400
commit5880f2d78ff17c6ee7c7f6d4071bfd13090c264c (patch)
tree6f90df23bcb16a03e1e1bb90bccf2336467c0cba
parentabf09233817b5ea1241db0c187136d3b4738d218 (diff)
xfs: create rmap update intent log items
Create rmap update intent/done log items to record redo information in the log. Because we need to roll transactions between updating the bmbt mapping and updating the reverse mapping, we also have to track the status of the metadata updates that will be recorded in the post-roll transactions, just in case we crash before committing the final transaction. This mechanism enables log recovery to finish what was already started. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h64
-rw-r--r--fs/xfs/libxfs/xfs_rmap.h19
-rw-r--r--fs/xfs/xfs_rmap_item.c459
-rw-r--r--fs/xfs/xfs_rmap_item.h100
-rw-r--r--fs/xfs/xfs_super.c21
6 files changed, 662 insertions, 2 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index b76e937f0729..6c9039384d34 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -103,6 +103,7 @@ xfs-y += xfs_log.o \
103 xfs_extfree_item.o \ 103 xfs_extfree_item.o \
104 xfs_icreate_item.o \ 104 xfs_icreate_item.o \
105 xfs_inode_item.o \ 105 xfs_inode_item.o \
106 xfs_rmap_item.o \
106 xfs_log_recover.o \ 107 xfs_log_recover.o \
107 xfs_trans_ail.o \ 108 xfs_trans_ail.o \
108 xfs_trans_buf.o \ 109 xfs_trans_buf.o \
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index e8f49c029ff0..a8d794d1ae9a 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -110,7 +110,9 @@ static inline uint xlog_get_cycle(char *ptr)
110#define XLOG_REG_TYPE_COMMIT 18 110#define XLOG_REG_TYPE_COMMIT 18
111#define XLOG_REG_TYPE_TRANSHDR 19 111#define XLOG_REG_TYPE_TRANSHDR 19
112#define XLOG_REG_TYPE_ICREATE 20 112#define XLOG_REG_TYPE_ICREATE 20
113#define XLOG_REG_TYPE_MAX 20 113#define XLOG_REG_TYPE_RUI_FORMAT 21
114#define XLOG_REG_TYPE_RUD_FORMAT 22
115#define XLOG_REG_TYPE_MAX 22
114 116
115/* 117/*
116 * Flags to log operation header 118 * Flags to log operation header
@@ -227,6 +229,8 @@ typedef struct xfs_trans_header {
227#define XFS_LI_DQUOT 0x123d 229#define XFS_LI_DQUOT 0x123d
228#define XFS_LI_QUOTAOFF 0x123e 230#define XFS_LI_QUOTAOFF 0x123e
229#define XFS_LI_ICREATE 0x123f 231#define XFS_LI_ICREATE 0x123f
232#define XFS_LI_RUI 0x1240 /* rmap update intent */
233#define XFS_LI_RUD 0x1241
230 234
231#define XFS_LI_TYPE_DESC \ 235#define XFS_LI_TYPE_DESC \
232 { XFS_LI_EFI, "XFS_LI_EFI" }, \ 236 { XFS_LI_EFI, "XFS_LI_EFI" }, \
@@ -236,7 +240,9 @@ typedef struct xfs_trans_header {
236 { XFS_LI_BUF, "XFS_LI_BUF" }, \ 240 { XFS_LI_BUF, "XFS_LI_BUF" }, \
237 { XFS_LI_DQUOT, "XFS_LI_DQUOT" }, \ 241 { XFS_LI_DQUOT, "XFS_LI_DQUOT" }, \
238 { XFS_LI_QUOTAOFF, "XFS_LI_QUOTAOFF" }, \ 242 { XFS_LI_QUOTAOFF, "XFS_LI_QUOTAOFF" }, \
239 { XFS_LI_ICREATE, "XFS_LI_ICREATE" } 243 { XFS_LI_ICREATE, "XFS_LI_ICREATE" }, \
244 { XFS_LI_RUI, "XFS_LI_RUI" }, \
245 { XFS_LI_RUD, "XFS_LI_RUD" }
240 246
241/* 247/*
242 * Inode Log Item Format definitions. 248 * Inode Log Item Format definitions.
@@ -604,6 +610,60 @@ typedef struct xfs_efd_log_format_64 {
604} xfs_efd_log_format_64_t; 610} xfs_efd_log_format_64_t;
605 611
606/* 612/*
613 * RUI/RUD (reverse mapping) log format definitions
614 */
615struct xfs_map_extent {
616 __uint64_t me_owner;
617 __uint64_t me_startblock;
618 __uint64_t me_startoff;
619 __uint32_t me_len;
620 __uint32_t me_flags;
621};
622
623/* rmap me_flags: upper bits are flags, lower byte is type code */
624#define XFS_RMAP_EXTENT_MAP 1
625#define XFS_RMAP_EXTENT_UNMAP 3
626#define XFS_RMAP_EXTENT_CONVERT 5
627#define XFS_RMAP_EXTENT_ALLOC 7
628#define XFS_RMAP_EXTENT_FREE 8
629#define XFS_RMAP_EXTENT_TYPE_MASK 0xFF
630
631#define XFS_RMAP_EXTENT_ATTR_FORK (1U << 31)
632#define XFS_RMAP_EXTENT_BMBT_BLOCK (1U << 30)
633#define XFS_RMAP_EXTENT_UNWRITTEN (1U << 29)
634
635#define XFS_RMAP_EXTENT_FLAGS (XFS_RMAP_EXTENT_TYPE_MASK | \
636 XFS_RMAP_EXTENT_ATTR_FORK | \
637 XFS_RMAP_EXTENT_BMBT_BLOCK | \
638 XFS_RMAP_EXTENT_UNWRITTEN)
639
640/*
641 * This is the structure used to lay out an rui log item in the
642 * log. The rui_extents field is a variable size array whose
643 * size is given by rui_nextents.
644 */
645struct xfs_rui_log_format {
646 __uint16_t rui_type; /* rui log item type */
647 __uint16_t rui_size; /* size of this item */
648 __uint32_t rui_nextents; /* # extents to free */
649 __uint64_t rui_id; /* rui identifier */
650 struct xfs_map_extent rui_extents[1]; /* array of extents to rmap */
651};
652
653/*
654 * This is the structure used to lay out an rud log item in the
655 * log. The rud_extents array is a variable size array whose
656 * size is given by rud_nextents;
657 */
658struct xfs_rud_log_format {
659 __uint16_t rud_type; /* rud log item type */
660 __uint16_t rud_size; /* size of this item */
661 __uint32_t rud_nextents; /* # of extents freed */
662 __uint64_t rud_rui_id; /* id of corresponding rui */
663 struct xfs_map_extent rud_extents[1]; /* array of extents rmapped */
664};
665
666/*
607 * Dquot Log format definitions. 667 * Dquot Log format definitions.
608 * 668 *
609 * The first two fields must be the type and size fitting into 669 * The first two fields must be the type and size fitting into
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 92ac067da7dd..c4b1133e2ff9 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -163,4 +163,23 @@ int xfs_rmap_query_range(struct xfs_btree_cur *cur,
163 struct xfs_rmap_irec *low_rec, struct xfs_rmap_irec *high_rec, 163 struct xfs_rmap_irec *low_rec, struct xfs_rmap_irec *high_rec,
164 xfs_rmap_query_range_fn fn, void *priv); 164 xfs_rmap_query_range_fn fn, void *priv);
165 165
166enum xfs_rmap_intent_type {
167 XFS_RMAP_MAP,
168 XFS_RMAP_MAP_SHARED,
169 XFS_RMAP_UNMAP,
170 XFS_RMAP_UNMAP_SHARED,
171 XFS_RMAP_CONVERT,
172 XFS_RMAP_CONVERT_SHARED,
173 XFS_RMAP_ALLOC,
174 XFS_RMAP_FREE,
175};
176
177struct xfs_rmap_intent {
178 struct list_head ri_list;
179 enum xfs_rmap_intent_type ri_type;
180 __uint64_t ri_owner;
181 int ri_whichfork;
182 struct xfs_bmbt_irec ri_bmap;
183};
184
166#endif /* __XFS_RMAP_H__ */ 185#endif /* __XFS_RMAP_H__ */
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
new file mode 100644
index 000000000000..5398b8478f02
--- /dev/null
+++ b/fs/xfs/xfs_rmap_item.c
@@ -0,0 +1,459 @@
1/*
2 * Copyright (C) 2016 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_format.h"
23#include "xfs_log_format.h"
24#include "xfs_trans_resv.h"
25#include "xfs_mount.h"
26#include "xfs_trans.h"
27#include "xfs_trans_priv.h"
28#include "xfs_buf_item.h"
29#include "xfs_rmap_item.h"
30#include "xfs_log.h"
31
32
33kmem_zone_t *xfs_rui_zone;
34kmem_zone_t *xfs_rud_zone;
35
36static inline struct xfs_rui_log_item *RUI_ITEM(struct xfs_log_item *lip)
37{
38 return container_of(lip, struct xfs_rui_log_item, rui_item);
39}
40
41void
42xfs_rui_item_free(
43 struct xfs_rui_log_item *ruip)
44{
45 if (ruip->rui_format.rui_nextents > XFS_RUI_MAX_FAST_EXTENTS)
46 kmem_free(ruip);
47 else
48 kmem_zone_free(xfs_rui_zone, ruip);
49}
50
51/*
52 * This returns the number of iovecs needed to log the given rui item.
53 * We only need 1 iovec for an rui item. It just logs the rui_log_format
54 * structure.
55 */
56static inline int
57xfs_rui_item_sizeof(
58 struct xfs_rui_log_item *ruip)
59{
60 return sizeof(struct xfs_rui_log_format) +
61 (ruip->rui_format.rui_nextents - 1) *
62 sizeof(struct xfs_map_extent);
63}
64
65STATIC void
66xfs_rui_item_size(
67 struct xfs_log_item *lip,
68 int *nvecs,
69 int *nbytes)
70{
71 *nvecs += 1;
72 *nbytes += xfs_rui_item_sizeof(RUI_ITEM(lip));
73}
74
75/*
76 * This is called to fill in the vector of log iovecs for the
77 * given rui log item. We use only 1 iovec, and we point that
78 * at the rui_log_format structure embedded in the rui item.
79 * It is at this point that we assert that all of the extent
80 * slots in the rui item have been filled.
81 */
82STATIC void
83xfs_rui_item_format(
84 struct xfs_log_item *lip,
85 struct xfs_log_vec *lv)
86{
87 struct xfs_rui_log_item *ruip = RUI_ITEM(lip);
88 struct xfs_log_iovec *vecp = NULL;
89
90 ASSERT(atomic_read(&ruip->rui_next_extent) ==
91 ruip->rui_format.rui_nextents);
92
93 ruip->rui_format.rui_type = XFS_LI_RUI;
94 ruip->rui_format.rui_size = 1;
95
96 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUI_FORMAT, &ruip->rui_format,
97 xfs_rui_item_sizeof(ruip));
98}
99
100/*
101 * Pinning has no meaning for an rui item, so just return.
102 */
103STATIC void
104xfs_rui_item_pin(
105 struct xfs_log_item *lip)
106{
107}
108
109/*
110 * The unpin operation is the last place an RUI is manipulated in the log. It is
111 * either inserted in the AIL or aborted in the event of a log I/O error. In
112 * either case, the RUI transaction has been successfully committed to make it
113 * this far. Therefore, we expect whoever committed the RUI to either construct
114 * and commit the RUD or drop the RUD's reference in the event of error. Simply
115 * drop the log's RUI reference now that the log is done with it.
116 */
117STATIC void
118xfs_rui_item_unpin(
119 struct xfs_log_item *lip,
120 int remove)
121{
122 struct xfs_rui_log_item *ruip = RUI_ITEM(lip);
123
124 xfs_rui_release(ruip);
125}
126
127/*
128 * RUI items have no locking or pushing. However, since RUIs are pulled from
129 * the AIL when their corresponding RUDs are committed to disk, their situation
130 * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller
131 * will eventually flush the log. This should help in getting the RUI out of
132 * the AIL.
133 */
134STATIC uint
135xfs_rui_item_push(
136 struct xfs_log_item *lip,
137 struct list_head *buffer_list)
138{
139 return XFS_ITEM_PINNED;
140}
141
142/*
143 * The RUI has been either committed or aborted if the transaction has been
144 * cancelled. If the transaction was cancelled, an RUD isn't going to be
145 * constructed and thus we free the RUI here directly.
146 */
147STATIC void
148xfs_rui_item_unlock(
149 struct xfs_log_item *lip)
150{
151 if (lip->li_flags & XFS_LI_ABORTED)
152 xfs_rui_item_free(RUI_ITEM(lip));
153}
154
155/*
156 * The RUI is logged only once and cannot be moved in the log, so simply return
157 * the lsn at which it's been logged.
158 */
159STATIC xfs_lsn_t
160xfs_rui_item_committed(
161 struct xfs_log_item *lip,
162 xfs_lsn_t lsn)
163{
164 return lsn;
165}
166
167/*
168 * The RUI dependency tracking op doesn't do squat. It can't because
169 * it doesn't know where the free extent is coming from. The dependency
170 * tracking has to be handled by the "enclosing" metadata object. For
171 * example, for inodes, the inode is locked throughout the extent freeing
172 * so the dependency should be recorded there.
173 */
174STATIC void
175xfs_rui_item_committing(
176 struct xfs_log_item *lip,
177 xfs_lsn_t lsn)
178{
179}
180
181/*
182 * This is the ops vector shared by all rui log items.
183 */
184static const struct xfs_item_ops xfs_rui_item_ops = {
185 .iop_size = xfs_rui_item_size,
186 .iop_format = xfs_rui_item_format,
187 .iop_pin = xfs_rui_item_pin,
188 .iop_unpin = xfs_rui_item_unpin,
189 .iop_unlock = xfs_rui_item_unlock,
190 .iop_committed = xfs_rui_item_committed,
191 .iop_push = xfs_rui_item_push,
192 .iop_committing = xfs_rui_item_committing,
193};
194
195/*
196 * Allocate and initialize an rui item with the given number of extents.
197 */
198struct xfs_rui_log_item *
199xfs_rui_init(
200 struct xfs_mount *mp,
201 uint nextents)
202
203{
204 struct xfs_rui_log_item *ruip;
205 uint size;
206
207 ASSERT(nextents > 0);
208 if (nextents > XFS_RUI_MAX_FAST_EXTENTS) {
209 size = (uint)(sizeof(struct xfs_rui_log_item) +
210 ((nextents - 1) * sizeof(struct xfs_map_extent)));
211 ruip = kmem_zalloc(size, KM_SLEEP);
212 } else {
213 ruip = kmem_zone_zalloc(xfs_rui_zone, KM_SLEEP);
214 }
215
216 xfs_log_item_init(mp, &ruip->rui_item, XFS_LI_RUI, &xfs_rui_item_ops);
217 ruip->rui_format.rui_nextents = nextents;
218 ruip->rui_format.rui_id = (uintptr_t)(void *)ruip;
219 atomic_set(&ruip->rui_next_extent, 0);
220 atomic_set(&ruip->rui_refcount, 2);
221
222 return ruip;
223}
224
225/*
226 * Copy an RUI format buffer from the given buf, and into the destination
227 * RUI format structure. The RUI/RUD items were designed not to need any
228 * special alignment handling.
229 */
230int
231xfs_rui_copy_format(
232 struct xfs_log_iovec *buf,
233 struct xfs_rui_log_format *dst_rui_fmt)
234{
235 struct xfs_rui_log_format *src_rui_fmt;
236 uint len;
237
238 src_rui_fmt = buf->i_addr;
239 len = sizeof(struct xfs_rui_log_format) +
240 (src_rui_fmt->rui_nextents - 1) *
241 sizeof(struct xfs_map_extent);
242
243 if (buf->i_len != len)
244 return -EFSCORRUPTED;
245
246 memcpy((char *)dst_rui_fmt, (char *)src_rui_fmt, len);
247 return 0;
248}
249
250/*
251 * Freeing the RUI requires that we remove it from the AIL if it has already
252 * been placed there. However, the RUI may not yet have been placed in the AIL
253 * when called by xfs_rui_release() from RUD processing due to the ordering of
254 * committed vs unpin operations in bulk insert operations. Hence the reference
255 * count to ensure only the last caller frees the RUI.
256 */
257void
258xfs_rui_release(
259 struct xfs_rui_log_item *ruip)
260{
261 if (atomic_dec_and_test(&ruip->rui_refcount)) {
262 xfs_trans_ail_remove(&ruip->rui_item, SHUTDOWN_LOG_IO_ERROR);
263 xfs_rui_item_free(ruip);
264 }
265}
266
267static inline struct xfs_rud_log_item *RUD_ITEM(struct xfs_log_item *lip)
268{
269 return container_of(lip, struct xfs_rud_log_item, rud_item);
270}
271
272STATIC void
273xfs_rud_item_free(struct xfs_rud_log_item *rudp)
274{
275 if (rudp->rud_format.rud_nextents > XFS_RUD_MAX_FAST_EXTENTS)
276 kmem_free(rudp);
277 else
278 kmem_zone_free(xfs_rud_zone, rudp);
279}
280
281/*
282 * This returns the number of iovecs needed to log the given rud item.
283 * We only need 1 iovec for an rud item. It just logs the rud_log_format
284 * structure.
285 */
286static inline int
287xfs_rud_item_sizeof(
288 struct xfs_rud_log_item *rudp)
289{
290 return sizeof(struct xfs_rud_log_format) +
291 (rudp->rud_format.rud_nextents - 1) *
292 sizeof(struct xfs_map_extent);
293}
294
295STATIC void
296xfs_rud_item_size(
297 struct xfs_log_item *lip,
298 int *nvecs,
299 int *nbytes)
300{
301 *nvecs += 1;
302 *nbytes += xfs_rud_item_sizeof(RUD_ITEM(lip));
303}
304
305/*
306 * This is called to fill in the vector of log iovecs for the
307 * given rud log item. We use only 1 iovec, and we point that
308 * at the rud_log_format structure embedded in the rud item.
309 * It is at this point that we assert that all of the extent
310 * slots in the rud item have been filled.
311 */
312STATIC void
313xfs_rud_item_format(
314 struct xfs_log_item *lip,
315 struct xfs_log_vec *lv)
316{
317 struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
318 struct xfs_log_iovec *vecp = NULL;
319
320 ASSERT(rudp->rud_next_extent == rudp->rud_format.rud_nextents);
321
322 rudp->rud_format.rud_type = XFS_LI_RUD;
323 rudp->rud_format.rud_size = 1;
324
325 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUD_FORMAT, &rudp->rud_format,
326 xfs_rud_item_sizeof(rudp));
327}
328
329/*
330 * Pinning has no meaning for an rud item, so just return.
331 */
332STATIC void
333xfs_rud_item_pin(
334 struct xfs_log_item *lip)
335{
336}
337
338/*
339 * Since pinning has no meaning for an rud item, unpinning does
340 * not either.
341 */
342STATIC void
343xfs_rud_item_unpin(
344 struct xfs_log_item *lip,
345 int remove)
346{
347}
348
349/*
350 * There isn't much you can do to push on an rud item. It is simply stuck
351 * waiting for the log to be flushed to disk.
352 */
353STATIC uint
354xfs_rud_item_push(
355 struct xfs_log_item *lip,
356 struct list_head *buffer_list)
357{
358 return XFS_ITEM_PINNED;
359}
360
361/*
362 * The RUD is either committed or aborted if the transaction is cancelled. If
363 * the transaction is cancelled, drop our reference to the RUI and free the
364 * RUD.
365 */
366STATIC void
367xfs_rud_item_unlock(
368 struct xfs_log_item *lip)
369{
370 struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
371
372 if (lip->li_flags & XFS_LI_ABORTED) {
373 xfs_rui_release(rudp->rud_ruip);
374 xfs_rud_item_free(rudp);
375 }
376}
377
378/*
379 * When the rud item is committed to disk, all we need to do is delete our
380 * reference to our partner rui item and then free ourselves. Since we're
381 * freeing ourselves we must return -1 to keep the transaction code from
382 * further referencing this item.
383 */
384STATIC xfs_lsn_t
385xfs_rud_item_committed(
386 struct xfs_log_item *lip,
387 xfs_lsn_t lsn)
388{
389 struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
390
391 /*
392 * Drop the RUI reference regardless of whether the RUD has been
393 * aborted. Once the RUD transaction is constructed, it is the sole
394 * responsibility of the RUD to release the RUI (even if the RUI is
395 * aborted due to log I/O error).
396 */
397 xfs_rui_release(rudp->rud_ruip);
398 xfs_rud_item_free(rudp);
399
400 return (xfs_lsn_t)-1;
401}
402
403/*
404 * The RUD dependency tracking op doesn't do squat. It can't because
405 * it doesn't know where the free extent is coming from. The dependency
406 * tracking has to be handled by the "enclosing" metadata object. For
407 * example, for inodes, the inode is locked throughout the extent freeing
408 * so the dependency should be recorded there.
409 */
410STATIC void
411xfs_rud_item_committing(
412 struct xfs_log_item *lip,
413 xfs_lsn_t lsn)
414{
415}
416
417/*
418 * This is the ops vector shared by all rud log items.
419 */
420static const struct xfs_item_ops xfs_rud_item_ops = {
421 .iop_size = xfs_rud_item_size,
422 .iop_format = xfs_rud_item_format,
423 .iop_pin = xfs_rud_item_pin,
424 .iop_unpin = xfs_rud_item_unpin,
425 .iop_unlock = xfs_rud_item_unlock,
426 .iop_committed = xfs_rud_item_committed,
427 .iop_push = xfs_rud_item_push,
428 .iop_committing = xfs_rud_item_committing,
429};
430
431/*
432 * Allocate and initialize an rud item with the given number of extents.
433 */
434struct xfs_rud_log_item *
435xfs_rud_init(
436 struct xfs_mount *mp,
437 struct xfs_rui_log_item *ruip,
438 uint nextents)
439
440{
441 struct xfs_rud_log_item *rudp;
442 uint size;
443
444 ASSERT(nextents > 0);
445 if (nextents > XFS_RUD_MAX_FAST_EXTENTS) {
446 size = (uint)(sizeof(struct xfs_rud_log_item) +
447 ((nextents - 1) * sizeof(struct xfs_map_extent)));
448 rudp = kmem_zalloc(size, KM_SLEEP);
449 } else {
450 rudp = kmem_zone_zalloc(xfs_rud_zone, KM_SLEEP);
451 }
452
453 xfs_log_item_init(mp, &rudp->rud_item, XFS_LI_RUD, &xfs_rud_item_ops);
454 rudp->rud_ruip = ruip;
455 rudp->rud_format.rud_nextents = nextents;
456 rudp->rud_format.rud_rui_id = ruip->rui_format.rui_id;
457
458 return rudp;
459}
diff --git a/fs/xfs/xfs_rmap_item.h b/fs/xfs/xfs_rmap_item.h
new file mode 100644
index 000000000000..bd36ab50c0fe
--- /dev/null
+++ b/fs/xfs/xfs_rmap_item.h
@@ -0,0 +1,100 @@
1/*
2 * Copyright (C) 2016 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#ifndef __XFS_RMAP_ITEM_H__
21#define __XFS_RMAP_ITEM_H__
22
23/*
24 * There are (currently) three pairs of rmap btree redo item types: map, unmap,
25 * and convert. The common abbreviations for these are RUI (rmap update
26 * intent) and RUD (rmap update done). The redo item type is encoded in the
27 * flags field of each xfs_map_extent.
28 *
29 * *I items should be recorded in the *first* of a series of rolled
30 * transactions, and the *D items should be recorded in the same transaction
31 * that records the associated rmapbt updates. Typically, the first
32 * transaction will record a bmbt update, followed by some number of
33 * transactions containing rmapbt updates, and finally transactions with any
34 * bnobt/cntbt updates.
35 *
36 * Should the system crash after the commit of the first transaction but
37 * before the commit of the final transaction in a series, log recovery will
38 * use the redo information recorded by the intent items to replay the
39 * (rmapbt/bnobt/cntbt) metadata updates in the non-first transaction.
40 */
41
42/* kernel only RUI/RUD definitions */
43
44struct xfs_mount;
45struct kmem_zone;
46
47/*
48 * Max number of extents in fast allocation path.
49 */
50#define XFS_RUI_MAX_FAST_EXTENTS 16
51
52/*
53 * Define RUI flag bits. Manipulated by set/clear/test_bit operators.
54 */
55#define XFS_RUI_RECOVERED 1
56
57/*
58 * This is the "rmap update intent" log item. It is used to log the fact that
59 * some reverse mappings need to change. It is used in conjunction with the
60 * "rmap update done" log item described below.
61 *
62 * These log items follow the same rules as struct xfs_efi_log_item; see the
63 * comments about that structure (in xfs_extfree_item.h) for more details.
64 */
65struct xfs_rui_log_item {
66 struct xfs_log_item rui_item;
67 atomic_t rui_refcount;
68 atomic_t rui_next_extent;
69 unsigned long rui_flags; /* misc flags */
70 struct xfs_rui_log_format rui_format;
71};
72
73/*
74 * This is the "rmap update done" log item. It is used to log the fact that
75 * some rmapbt updates mentioned in an earlier rui item have been performed.
76 */
77struct xfs_rud_log_item {
78 struct xfs_log_item rud_item;
79 struct xfs_rui_log_item *rud_ruip;
80 uint rud_next_extent;
81 struct xfs_rud_log_format rud_format;
82};
83
84/*
85 * Max number of extents in fast allocation path.
86 */
87#define XFS_RUD_MAX_FAST_EXTENTS 16
88
89extern struct kmem_zone *xfs_rui_zone;
90extern struct kmem_zone *xfs_rud_zone;
91
92struct xfs_rui_log_item *xfs_rui_init(struct xfs_mount *, uint);
93struct xfs_rud_log_item *xfs_rud_init(struct xfs_mount *,
94 struct xfs_rui_log_item *, uint);
95int xfs_rui_copy_format(struct xfs_log_iovec *buf,
96 struct xfs_rui_log_format *dst_rui_fmt);
97void xfs_rui_item_free(struct xfs_rui_log_item *);
98void xfs_rui_release(struct xfs_rui_log_item *);
99
100#endif /* __XFS_RMAP_ITEM_H__ */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 449cadf149f9..654a0924b3d3 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -46,6 +46,7 @@
46#include "xfs_quota.h" 46#include "xfs_quota.h"
47#include "xfs_sysfs.h" 47#include "xfs_sysfs.h"
48#include "xfs_ondisk.h" 48#include "xfs_ondisk.h"
49#include "xfs_rmap_item.h"
49 50
50#include <linux/namei.h> 51#include <linux/namei.h>
51#include <linux/init.h> 52#include <linux/init.h>
@@ -1765,8 +1766,26 @@ xfs_init_zones(void)
1765 if (!xfs_icreate_zone) 1766 if (!xfs_icreate_zone)
1766 goto out_destroy_ili_zone; 1767 goto out_destroy_ili_zone;
1767 1768
1769 xfs_rud_zone = kmem_zone_init((sizeof(struct xfs_rud_log_item) +
1770 ((XFS_RUD_MAX_FAST_EXTENTS - 1) *
1771 sizeof(struct xfs_map_extent))),
1772 "xfs_rud_item");
1773 if (!xfs_rud_zone)
1774 goto out_destroy_icreate_zone;
1775
1776 xfs_rui_zone = kmem_zone_init((sizeof(struct xfs_rui_log_item) +
1777 ((XFS_RUI_MAX_FAST_EXTENTS - 1) *
1778 sizeof(struct xfs_map_extent))),
1779 "xfs_rui_item");
1780 if (!xfs_rui_zone)
1781 goto out_destroy_rud_zone;
1782
1768 return 0; 1783 return 0;
1769 1784
1785 out_destroy_rud_zone:
1786 kmem_zone_destroy(xfs_rud_zone);
1787 out_destroy_icreate_zone:
1788 kmem_zone_destroy(xfs_icreate_zone);
1770 out_destroy_ili_zone: 1789 out_destroy_ili_zone:
1771 kmem_zone_destroy(xfs_ili_zone); 1790 kmem_zone_destroy(xfs_ili_zone);
1772 out_destroy_inode_zone: 1791 out_destroy_inode_zone:
@@ -1805,6 +1824,8 @@ xfs_destroy_zones(void)
1805 * destroy caches. 1824 * destroy caches.
1806 */ 1825 */
1807 rcu_barrier(); 1826 rcu_barrier();
1827 kmem_zone_destroy(xfs_rui_zone);
1828 kmem_zone_destroy(xfs_rud_zone);
1808 kmem_zone_destroy(xfs_icreate_zone); 1829 kmem_zone_destroy(xfs_icreate_zone);
1809 kmem_zone_destroy(xfs_ili_zone); 1830 kmem_zone_destroy(xfs_ili_zone);
1810 kmem_zone_destroy(xfs_inode_zone); 1831 kmem_zone_destroy(xfs_inode_zone);