aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/Makefile6
-rw-r--r--fs/xfs/xfs_ag.h56
-rw-r--r--fs/xfs/xfs_alloc.c201
-rw-r--r--fs/xfs/xfs_alloc_btree.c105
-rw-r--r--fs/xfs/xfs_alloc_btree.h12
-rw-r--r--fs/xfs/xfs_aops.c12
-rw-r--r--fs/xfs/xfs_attr.c454
-rw-r--r--fs/xfs/xfs_attr.h1
-rw-r--r--fs/xfs/xfs_attr_leaf.c1781
-rw-r--r--fs/xfs/xfs_attr_leaf.h122
-rw-r--r--fs/xfs/xfs_attr_remote.c541
-rw-r--r--fs/xfs/xfs_attr_remote.h46
-rw-r--r--fs/xfs/xfs_bmap.c4314
-rw-r--r--fs/xfs/xfs_bmap_btree.c110
-rw-r--r--fs/xfs/xfs_bmap_btree.h19
-rw-r--r--fs/xfs/xfs_btree.c256
-rw-r--r--fs/xfs/xfs_btree.h64
-rw-r--r--fs/xfs/xfs_buf.c4
-rw-r--r--fs/xfs/xfs_buf_item.h64
-rw-r--r--fs/xfs/xfs_da_btree.c1501
-rw-r--r--fs/xfs/xfs_da_btree.h130
-rw-r--r--fs/xfs/xfs_dinode.h43
-rw-r--r--fs/xfs/xfs_dir2_block.c179
-rw-r--r--fs/xfs/xfs_dir2_data.c266
-rw-r--r--fs/xfs/xfs_dir2_format.h278
-rw-r--r--fs/xfs/xfs_dir2_leaf.c898
-rw-r--r--fs/xfs/xfs_dir2_node.c1007
-rw-r--r--fs/xfs/xfs_dir2_priv.h50
-rw-r--r--fs/xfs/xfs_dir2_sf.c12
-rw-r--r--fs/xfs/xfs_dquot.c160
-rw-r--r--fs/xfs/xfs_dquot.h16
-rw-r--r--fs/xfs/xfs_error.c4
-rw-r--r--fs/xfs/xfs_extfree_item.c27
-rw-r--r--fs/xfs/xfs_extfree_item.h14
-rw-r--r--fs/xfs/xfs_file.c2
-rw-r--r--fs/xfs/xfs_fsops.c34
-rw-r--r--fs/xfs/xfs_ialloc.c109
-rw-r--r--fs/xfs/xfs_ialloc_btree.c87
-rw-r--r--fs/xfs/xfs_ialloc_btree.h9
-rw-r--r--fs/xfs/xfs_inode.c212
-rw-r--r--fs/xfs/xfs_inode.h31
-rw-r--r--fs/xfs/xfs_inode_item.c2
-rw-r--r--fs/xfs/xfs_iomap.c163
-rw-r--r--fs/xfs/xfs_linux.h1
-rw-r--r--fs/xfs/xfs_log.c2
-rw-r--r--fs/xfs/xfs_log_cil.c4
-rw-r--r--fs/xfs/xfs_log_priv.h1
-rw-r--r--fs/xfs/xfs_log_recover.c246
-rw-r--r--fs/xfs/xfs_message.h26
-rw-r--r--fs/xfs/xfs_mount.c146
-rw-r--r--fs/xfs/xfs_mount.h2
-rw-r--r--fs/xfs/xfs_qm.c25
-rw-r--r--fs/xfs/xfs_qm.h4
-rw-r--r--fs/xfs/xfs_qm_syscalls.c9
-rw-r--r--fs/xfs/xfs_quota.h11
-rw-r--r--fs/xfs/xfs_sb.h166
-rw-r--r--fs/xfs/xfs_symlink.c730
-rw-r--r--fs/xfs/xfs_symlink.h66
-rw-r--r--fs/xfs/xfs_trace.c2
-rw-r--r--fs/xfs/xfs_trace.h24
-rw-r--r--fs/xfs/xfs_trans_buf.c63
-rw-r--r--fs/xfs/xfs_trans_dquot.c10
-rw-r--r--fs/xfs/xfs_vnodeops.c478
63 files changed, 9572 insertions, 5846 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index d02201df855b..6313b69b6644 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -45,11 +45,11 @@ xfs-y += xfs_aops.o \
45 xfs_itable.o \ 45 xfs_itable.o \
46 xfs_message.o \ 46 xfs_message.o \
47 xfs_mru_cache.o \ 47 xfs_mru_cache.o \
48 xfs_super.o \
49 xfs_xattr.o \
50 xfs_rename.o \ 48 xfs_rename.o \
49 xfs_super.o \
51 xfs_utils.o \ 50 xfs_utils.o \
52 xfs_vnodeops.o \ 51 xfs_vnodeops.o \
52 xfs_xattr.o \
53 kmem.o \ 53 kmem.o \
54 uuid.o 54 uuid.o
55 55
@@ -58,6 +58,7 @@ xfs-y += xfs_alloc.o \
58 xfs_alloc_btree.o \ 58 xfs_alloc_btree.o \
59 xfs_attr.o \ 59 xfs_attr.o \
60 xfs_attr_leaf.o \ 60 xfs_attr_leaf.o \
61 xfs_attr_remote.o \
61 xfs_bmap.o \ 62 xfs_bmap.o \
62 xfs_bmap_btree.o \ 63 xfs_bmap_btree.o \
63 xfs_btree.o \ 64 xfs_btree.o \
@@ -73,6 +74,7 @@ xfs-y += xfs_alloc.o \
73 xfs_inode.o \ 74 xfs_inode.o \
74 xfs_log_recover.o \ 75 xfs_log_recover.o \
75 xfs_mount.o \ 76 xfs_mount.o \
77 xfs_symlink.o \
76 xfs_trans.o 78 xfs_trans.o
77 79
78# low-level transaction/log code 80# low-level transaction/log code
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index f2aeedb6a579..317aa86d96ea 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -30,6 +30,7 @@ struct xfs_trans;
30 30
31#define XFS_AGF_MAGIC 0x58414746 /* 'XAGF' */ 31#define XFS_AGF_MAGIC 0x58414746 /* 'XAGF' */
32#define XFS_AGI_MAGIC 0x58414749 /* 'XAGI' */ 32#define XFS_AGI_MAGIC 0x58414749 /* 'XAGI' */
33#define XFS_AGFL_MAGIC 0x5841464c /* 'XAFL' */
33#define XFS_AGF_VERSION 1 34#define XFS_AGF_VERSION 1
34#define XFS_AGI_VERSION 1 35#define XFS_AGI_VERSION 1
35 36
@@ -63,12 +64,29 @@ typedef struct xfs_agf {
63 __be32 agf_spare0; /* spare field */ 64 __be32 agf_spare0; /* spare field */
64 __be32 agf_levels[XFS_BTNUM_AGF]; /* btree levels */ 65 __be32 agf_levels[XFS_BTNUM_AGF]; /* btree levels */
65 __be32 agf_spare1; /* spare field */ 66 __be32 agf_spare1; /* spare field */
67
66 __be32 agf_flfirst; /* first freelist block's index */ 68 __be32 agf_flfirst; /* first freelist block's index */
67 __be32 agf_fllast; /* last freelist block's index */ 69 __be32 agf_fllast; /* last freelist block's index */
68 __be32 agf_flcount; /* count of blocks in freelist */ 70 __be32 agf_flcount; /* count of blocks in freelist */
69 __be32 agf_freeblks; /* total free blocks */ 71 __be32 agf_freeblks; /* total free blocks */
72
70 __be32 agf_longest; /* longest free space */ 73 __be32 agf_longest; /* longest free space */
71 __be32 agf_btreeblks; /* # of blocks held in AGF btrees */ 74 __be32 agf_btreeblks; /* # of blocks held in AGF btrees */
75 uuid_t agf_uuid; /* uuid of filesystem */
76
77 /*
78 * reserve some contiguous space for future logged fields before we add
79 * the unlogged fields. This makes the range logging via flags and
80 * structure offsets much simpler.
81 */
82 __be64 agf_spare64[16];
83
84 /* unlogged fields, written during buffer writeback. */
85 __be64 agf_lsn; /* last write sequence */
86 __be32 agf_crc; /* crc of agf sector */
87 __be32 agf_spare2;
88
89 /* structure must be padded to 64 bit alignment */
72} xfs_agf_t; 90} xfs_agf_t;
73 91
74#define XFS_AGF_MAGICNUM 0x00000001 92#define XFS_AGF_MAGICNUM 0x00000001
@@ -83,7 +101,8 @@ typedef struct xfs_agf {
83#define XFS_AGF_FREEBLKS 0x00000200 101#define XFS_AGF_FREEBLKS 0x00000200
84#define XFS_AGF_LONGEST 0x00000400 102#define XFS_AGF_LONGEST 0x00000400
85#define XFS_AGF_BTREEBLKS 0x00000800 103#define XFS_AGF_BTREEBLKS 0x00000800
86#define XFS_AGF_NUM_BITS 12 104#define XFS_AGF_UUID 0x00001000
105#define XFS_AGF_NUM_BITS 13
87#define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1) 106#define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1)
88 107
89#define XFS_AGF_FLAGS \ 108#define XFS_AGF_FLAGS \
@@ -98,7 +117,8 @@ typedef struct xfs_agf {
98 { XFS_AGF_FLCOUNT, "FLCOUNT" }, \ 117 { XFS_AGF_FLCOUNT, "FLCOUNT" }, \
99 { XFS_AGF_FREEBLKS, "FREEBLKS" }, \ 118 { XFS_AGF_FREEBLKS, "FREEBLKS" }, \
100 { XFS_AGF_LONGEST, "LONGEST" }, \ 119 { XFS_AGF_LONGEST, "LONGEST" }, \
101 { XFS_AGF_BTREEBLKS, "BTREEBLKS" } 120 { XFS_AGF_BTREEBLKS, "BTREEBLKS" }, \
121 { XFS_AGF_UUID, "UUID" }
102 122
103/* disk block (xfs_daddr_t) in the AG */ 123/* disk block (xfs_daddr_t) in the AG */
104#define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log)) 124#define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log))
@@ -132,6 +152,7 @@ typedef struct xfs_agi {
132 __be32 agi_root; /* root of inode btree */ 152 __be32 agi_root; /* root of inode btree */
133 __be32 agi_level; /* levels in inode btree */ 153 __be32 agi_level; /* levels in inode btree */
134 __be32 agi_freecount; /* number of free inodes */ 154 __be32 agi_freecount; /* number of free inodes */
155
135 __be32 agi_newino; /* new inode just allocated */ 156 __be32 agi_newino; /* new inode just allocated */
136 __be32 agi_dirino; /* last directory inode chunk */ 157 __be32 agi_dirino; /* last directory inode chunk */
137 /* 158 /*
@@ -139,6 +160,13 @@ typedef struct xfs_agi {
139 * still being referenced. 160 * still being referenced.
140 */ 161 */
141 __be32 agi_unlinked[XFS_AGI_UNLINKED_BUCKETS]; 162 __be32 agi_unlinked[XFS_AGI_UNLINKED_BUCKETS];
163
164 uuid_t agi_uuid; /* uuid of filesystem */
165 __be32 agi_crc; /* crc of agi sector */
166 __be32 agi_pad32;
167 __be64 agi_lsn; /* last write sequence */
168
169 /* structure must be padded to 64 bit alignment */
142} xfs_agi_t; 170} xfs_agi_t;
143 171
144#define XFS_AGI_MAGICNUM 0x00000001 172#define XFS_AGI_MAGICNUM 0x00000001
@@ -171,11 +199,31 @@ extern const struct xfs_buf_ops xfs_agi_buf_ops;
171 */ 199 */
172#define XFS_AGFL_DADDR(mp) ((xfs_daddr_t)(3 << (mp)->m_sectbb_log)) 200#define XFS_AGFL_DADDR(mp) ((xfs_daddr_t)(3 << (mp)->m_sectbb_log))
173#define XFS_AGFL_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR(mp)) 201#define XFS_AGFL_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR(mp))
174#define XFS_AGFL_SIZE(mp) ((mp)->m_sb.sb_sectsize / sizeof(xfs_agblock_t))
175#define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)((bp)->b_addr)) 202#define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)((bp)->b_addr))
176 203
204#define XFS_BUF_TO_AGFL_BNO(mp, bp) \
205 (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
206 &(XFS_BUF_TO_AGFL(bp)->agfl_bno[0]) : \
207 (__be32 *)(bp)->b_addr)
208
209/*
210 * Size of the AGFL. For CRC-enabled filesystes we steal a couple of
211 * slots in the beginning of the block for a proper header with the
212 * location information and CRC.
213 */
214#define XFS_AGFL_SIZE(mp) \
215 (((mp)->m_sb.sb_sectsize - \
216 (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
217 sizeof(struct xfs_agfl) : 0)) / \
218 sizeof(xfs_agblock_t))
219
177typedef struct xfs_agfl { 220typedef struct xfs_agfl {
178 __be32 agfl_bno[1]; /* actually XFS_AGFL_SIZE(mp) */ 221 __be32 agfl_magicnum;
222 __be32 agfl_seqno;
223 uuid_t agfl_uuid;
224 __be64 agfl_lsn;
225 __be32 agfl_crc;
226 __be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */
179} xfs_agfl_t; 227} xfs_agfl_t;
180 228
181/* 229/*
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 0ad23253e8b1..5673bcfda2f0 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -33,7 +33,9 @@
33#include "xfs_alloc.h" 33#include "xfs_alloc.h"
34#include "xfs_extent_busy.h" 34#include "xfs_extent_busy.h"
35#include "xfs_error.h" 35#include "xfs_error.h"
36#include "xfs_cksum.h"
36#include "xfs_trace.h" 37#include "xfs_trace.h"
38#include "xfs_buf_item.h"
37 39
38struct workqueue_struct *xfs_alloc_wq; 40struct workqueue_struct *xfs_alloc_wq;
39 41
@@ -430,53 +432,84 @@ xfs_alloc_fixup_trees(
430 return 0; 432 return 0;
431} 433}
432 434
433static void 435static bool
434xfs_agfl_verify( 436xfs_agfl_verify(
435 struct xfs_buf *bp) 437 struct xfs_buf *bp)
436{ 438{
437#ifdef WHEN_CRCS_COME_ALONG
438 /*
439 * we cannot actually do any verification of the AGFL because mkfs does
440 * not initialise the AGFL to zero or NULL. Hence the only valid part of
441 * the AGFL is what the AGF says is active. We can't get to the AGF, so
442 * we can't verify just those entries are valid.
443 *
444 * This problem goes away when the CRC format change comes along as that
445 * requires the AGFL to be initialised by mkfs. At that point, we can
446 * verify the blocks in the agfl -active or not- lie within the bounds
447 * of the AG. Until then, just leave this check ifdef'd out.
448 */
449 struct xfs_mount *mp = bp->b_target->bt_mount; 439 struct xfs_mount *mp = bp->b_target->bt_mount;
450 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); 440 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
451 int agfl_ok = 1;
452
453 int i; 441 int i;
454 442
443 if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_uuid))
444 return false;
445 if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
446 return false;
447 /*
448 * during growfs operations, the perag is not fully initialised,
449 * so we can't use it for any useful checking. growfs ensures we can't
450 * use it by using uncached buffers that don't have the perag attached
451 * so we can detect and avoid this problem.
452 */
453 if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
454 return false;
455
455 for (i = 0; i < XFS_AGFL_SIZE(mp); i++) { 456 for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
456 if (be32_to_cpu(agfl->agfl_bno[i]) == NULLAGBLOCK || 457 if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
457 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) 458 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
458 agfl_ok = 0; 459 return false;
459 } 460 }
461 return true;
462}
463
464static void
465xfs_agfl_read_verify(
466 struct xfs_buf *bp)
467{
468 struct xfs_mount *mp = bp->b_target->bt_mount;
469 int agfl_ok = 1;
470
471 /*
472 * There is no verification of non-crc AGFLs because mkfs does not
473 * initialise the AGFL to zero or NULL. Hence the only valid part of the
474 * AGFL is what the AGF says is active. We can't get to the AGF, so we
475 * can't verify just those entries are valid.
476 */
477 if (!xfs_sb_version_hascrc(&mp->m_sb))
478 return;
479
480 agfl_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
481 offsetof(struct xfs_agfl, agfl_crc));
482
483 agfl_ok = agfl_ok && xfs_agfl_verify(bp);
460 484
461 if (!agfl_ok) { 485 if (!agfl_ok) {
462 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agfl); 486 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
463 xfs_buf_ioerror(bp, EFSCORRUPTED); 487 xfs_buf_ioerror(bp, EFSCORRUPTED);
464 } 488 }
465#endif
466} 489}
467 490
468static void 491static void
469xfs_agfl_write_verify( 492xfs_agfl_write_verify(
470 struct xfs_buf *bp) 493 struct xfs_buf *bp)
471{ 494{
472 xfs_agfl_verify(bp); 495 struct xfs_mount *mp = bp->b_target->bt_mount;
473} 496 struct xfs_buf_log_item *bip = bp->b_fspriv;
474 497
475static void 498 /* no verification of non-crc AGFLs */
476xfs_agfl_read_verify( 499 if (!xfs_sb_version_hascrc(&mp->m_sb))
477 struct xfs_buf *bp) 500 return;
478{ 501
479 xfs_agfl_verify(bp); 502 if (!xfs_agfl_verify(bp)) {
503 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
504 xfs_buf_ioerror(bp, EFSCORRUPTED);
505 return;
506 }
507
508 if (bip)
509 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
510
511 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
512 offsetof(struct xfs_agfl, agfl_crc));
480} 513}
481 514
482const struct xfs_buf_ops xfs_agfl_buf_ops = { 515const struct xfs_buf_ops xfs_agfl_buf_ops = {
@@ -842,7 +875,7 @@ xfs_alloc_ag_vextent_near(
842 */ 875 */
843 int dofirst; /* set to do first algorithm */ 876 int dofirst; /* set to do first algorithm */
844 877
845 dofirst = random32() & 1; 878 dofirst = prandom_u32() & 1;
846#endif 879#endif
847 880
848restart: 881restart:
@@ -1982,18 +2015,18 @@ xfs_alloc_get_freelist(
1982 int btreeblk) /* destination is a AGF btree */ 2015 int btreeblk) /* destination is a AGF btree */
1983{ 2016{
1984 xfs_agf_t *agf; /* a.g. freespace structure */ 2017 xfs_agf_t *agf; /* a.g. freespace structure */
1985 xfs_agfl_t *agfl; /* a.g. freelist structure */
1986 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */ 2018 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */
1987 xfs_agblock_t bno; /* block number returned */ 2019 xfs_agblock_t bno; /* block number returned */
2020 __be32 *agfl_bno;
1988 int error; 2021 int error;
1989 int logflags; 2022 int logflags;
1990 xfs_mount_t *mp; /* mount structure */ 2023 xfs_mount_t *mp = tp->t_mountp;
1991 xfs_perag_t *pag; /* per allocation group data */ 2024 xfs_perag_t *pag; /* per allocation group data */
1992 2025
1993 agf = XFS_BUF_TO_AGF(agbp);
1994 /* 2026 /*
1995 * Freelist is empty, give up. 2027 * Freelist is empty, give up.
1996 */ 2028 */
2029 agf = XFS_BUF_TO_AGF(agbp);
1997 if (!agf->agf_flcount) { 2030 if (!agf->agf_flcount) {
1998 *bnop = NULLAGBLOCK; 2031 *bnop = NULLAGBLOCK;
1999 return 0; 2032 return 0;
@@ -2001,15 +2034,17 @@ xfs_alloc_get_freelist(
2001 /* 2034 /*
2002 * Read the array of free blocks. 2035 * Read the array of free blocks.
2003 */ 2036 */
2004 mp = tp->t_mountp; 2037 error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2005 if ((error = xfs_alloc_read_agfl(mp, tp, 2038 &agflbp);
2006 be32_to_cpu(agf->agf_seqno), &agflbp))) 2039 if (error)
2007 return error; 2040 return error;
2008 agfl = XFS_BUF_TO_AGFL(agflbp); 2041
2042
2009 /* 2043 /*
2010 * Get the block number and update the data structures. 2044 * Get the block number and update the data structures.
2011 */ 2045 */
2012 bno = be32_to_cpu(agfl->agfl_bno[be32_to_cpu(agf->agf_flfirst)]); 2046 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2047 bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
2013 be32_add_cpu(&agf->agf_flfirst, 1); 2048 be32_add_cpu(&agf->agf_flfirst, 1);
2014 xfs_trans_brelse(tp, agflbp); 2049 xfs_trans_brelse(tp, agflbp);
2015 if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp)) 2050 if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
@@ -2058,11 +2093,14 @@ xfs_alloc_log_agf(
2058 offsetof(xfs_agf_t, agf_freeblks), 2093 offsetof(xfs_agf_t, agf_freeblks),
2059 offsetof(xfs_agf_t, agf_longest), 2094 offsetof(xfs_agf_t, agf_longest),
2060 offsetof(xfs_agf_t, agf_btreeblks), 2095 offsetof(xfs_agf_t, agf_btreeblks),
2096 offsetof(xfs_agf_t, agf_uuid),
2061 sizeof(xfs_agf_t) 2097 sizeof(xfs_agf_t)
2062 }; 2098 };
2063 2099
2064 trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_); 2100 trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2065 2101
2102 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
2103
2066 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last); 2104 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2067 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last); 2105 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2068} 2106}
@@ -2099,12 +2137,13 @@ xfs_alloc_put_freelist(
2099 int btreeblk) /* block came from a AGF btree */ 2137 int btreeblk) /* block came from a AGF btree */
2100{ 2138{
2101 xfs_agf_t *agf; /* a.g. freespace structure */ 2139 xfs_agf_t *agf; /* a.g. freespace structure */
2102 xfs_agfl_t *agfl; /* a.g. free block array */
2103 __be32 *blockp;/* pointer to array entry */ 2140 __be32 *blockp;/* pointer to array entry */
2104 int error; 2141 int error;
2105 int logflags; 2142 int logflags;
2106 xfs_mount_t *mp; /* mount structure */ 2143 xfs_mount_t *mp; /* mount structure */
2107 xfs_perag_t *pag; /* per allocation group data */ 2144 xfs_perag_t *pag; /* per allocation group data */
2145 __be32 *agfl_bno;
2146 int startoff;
2108 2147
2109 agf = XFS_BUF_TO_AGF(agbp); 2148 agf = XFS_BUF_TO_AGF(agbp);
2110 mp = tp->t_mountp; 2149 mp = tp->t_mountp;
@@ -2112,7 +2151,6 @@ xfs_alloc_put_freelist(
2112 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp, 2151 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
2113 be32_to_cpu(agf->agf_seqno), &agflbp))) 2152 be32_to_cpu(agf->agf_seqno), &agflbp)))
2114 return error; 2153 return error;
2115 agfl = XFS_BUF_TO_AGFL(agflbp);
2116 be32_add_cpu(&agf->agf_fllast, 1); 2154 be32_add_cpu(&agf->agf_fllast, 1);
2117 if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp)) 2155 if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
2118 agf->agf_fllast = 0; 2156 agf->agf_fllast = 0;
@@ -2133,32 +2171,38 @@ xfs_alloc_put_freelist(
2133 xfs_alloc_log_agf(tp, agbp, logflags); 2171 xfs_alloc_log_agf(tp, agbp, logflags);
2134 2172
2135 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)); 2173 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
2136 blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)]; 2174
2175 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2176 blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
2137 *blockp = cpu_to_be32(bno); 2177 *blockp = cpu_to_be32(bno);
2178 startoff = (char *)blockp - (char *)agflbp->b_addr;
2179
2138 xfs_alloc_log_agf(tp, agbp, logflags); 2180 xfs_alloc_log_agf(tp, agbp, logflags);
2139 xfs_trans_log_buf(tp, agflbp, 2181
2140 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl), 2182 xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
2141 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl + 2183 xfs_trans_log_buf(tp, agflbp, startoff,
2142 sizeof(xfs_agblock_t) - 1)); 2184 startoff + sizeof(xfs_agblock_t) - 1);
2143 return 0; 2185 return 0;
2144} 2186}
2145 2187
2146static void 2188static bool
2147xfs_agf_verify( 2189xfs_agf_verify(
2190 struct xfs_mount *mp,
2148 struct xfs_buf *bp) 2191 struct xfs_buf *bp)
2149 { 2192 {
2150 struct xfs_mount *mp = bp->b_target->bt_mount; 2193 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
2151 struct xfs_agf *agf;
2152 int agf_ok;
2153 2194
2154 agf = XFS_BUF_TO_AGF(bp); 2195 if (xfs_sb_version_hascrc(&mp->m_sb) &&
2196 !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_uuid))
2197 return false;
2155 2198
2156 agf_ok = agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && 2199 if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
2157 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && 2200 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2158 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && 2201 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2159 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && 2202 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2160 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && 2203 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
2161 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp); 2204 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
2205 return false;
2162 2206
2163 /* 2207 /*
2164 * during growfs operations, the perag is not fully initialised, 2208 * during growfs operations, the perag is not fully initialised,
@@ -2166,33 +2210,58 @@ xfs_agf_verify(
2166 * use it by using uncached buffers that don't have the perag attached 2210 * use it by using uncached buffers that don't have the perag attached
2167 * so we can detect and avoid this problem. 2211 * so we can detect and avoid this problem.
2168 */ 2212 */
2169 if (bp->b_pag) 2213 if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
2170 agf_ok = agf_ok && be32_to_cpu(agf->agf_seqno) == 2214 return false;
2171 bp->b_pag->pag_agno;
2172 2215
2173 if (xfs_sb_version_haslazysbcount(&mp->m_sb)) 2216 if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2174 agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <= 2217 be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
2175 be32_to_cpu(agf->agf_length); 2218 return false;
2219
2220 return true;;
2176 2221
2177 if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
2178 XFS_RANDOM_ALLOC_READ_AGF))) {
2179 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agf);
2180 xfs_buf_ioerror(bp, EFSCORRUPTED);
2181 }
2182} 2222}
2183 2223
2184static void 2224static void
2185xfs_agf_read_verify( 2225xfs_agf_read_verify(
2186 struct xfs_buf *bp) 2226 struct xfs_buf *bp)
2187{ 2227{
2188 xfs_agf_verify(bp); 2228 struct xfs_mount *mp = bp->b_target->bt_mount;
2229 int agf_ok = 1;
2230
2231 if (xfs_sb_version_hascrc(&mp->m_sb))
2232 agf_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
2233 offsetof(struct xfs_agf, agf_crc));
2234
2235 agf_ok = agf_ok && xfs_agf_verify(mp, bp);
2236
2237 if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
2238 XFS_RANDOM_ALLOC_READ_AGF))) {
2239 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
2240 xfs_buf_ioerror(bp, EFSCORRUPTED);
2241 }
2189} 2242}
2190 2243
2191static void 2244static void
2192xfs_agf_write_verify( 2245xfs_agf_write_verify(
2193 struct xfs_buf *bp) 2246 struct xfs_buf *bp)
2194{ 2247{
2195 xfs_agf_verify(bp); 2248 struct xfs_mount *mp = bp->b_target->bt_mount;
2249 struct xfs_buf_log_item *bip = bp->b_fspriv;
2250
2251 if (!xfs_agf_verify(mp, bp)) {
2252 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
2253 xfs_buf_ioerror(bp, EFSCORRUPTED);
2254 return;
2255 }
2256
2257 if (!xfs_sb_version_hascrc(&mp->m_sb))
2258 return;
2259
2260 if (bip)
2261 XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2262
2263 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
2264 offsetof(struct xfs_agf, agf_crc));
2196} 2265}
2197 2266
2198const struct xfs_buf_ops xfs_agf_buf_ops = { 2267const struct xfs_buf_ops xfs_agf_buf_ops = {
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index b1ddef6b2689..30c4c1434faf 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -33,6 +33,7 @@
33#include "xfs_extent_busy.h" 33#include "xfs_extent_busy.h"
34#include "xfs_error.h" 34#include "xfs_error.h"
35#include "xfs_trace.h" 35#include "xfs_trace.h"
36#include "xfs_cksum.h"
36 37
37 38
38STATIC struct xfs_btree_cur * 39STATIC struct xfs_btree_cur *
@@ -272,7 +273,7 @@ xfs_allocbt_key_diff(
272 return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; 273 return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
273} 274}
274 275
275static void 276static bool
276xfs_allocbt_verify( 277xfs_allocbt_verify(
277 struct xfs_buf *bp) 278 struct xfs_buf *bp)
278{ 279{
@@ -280,66 +281,103 @@ xfs_allocbt_verify(
280 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 281 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
281 struct xfs_perag *pag = bp->b_pag; 282 struct xfs_perag *pag = bp->b_pag;
282 unsigned int level; 283 unsigned int level;
283 int sblock_ok; /* block passes checks */
284 284
285 /* 285 /*
286 * magic number and level verification 286 * magic number and level verification
287 * 287 *
288 * During growfs operations, we can't verify the exact level as the 288 * During growfs operations, we can't verify the exact level or owner as
289 * perag is not fully initialised and hence not attached to the buffer. 289 * the perag is not fully initialised and hence not attached to the
290 * In this case, check against the maximum tree depth. 290 * buffer. In this case, check against the maximum tree depth.
291 *
292 * Similarly, during log recovery we will have a perag structure
293 * attached, but the agf information will not yet have been initialised
294 * from the on disk AGF. Again, we can only check against maximum limits
295 * in this case.
291 */ 296 */
292 level = be16_to_cpu(block->bb_level); 297 level = be16_to_cpu(block->bb_level);
293 switch (block->bb_magic) { 298 switch (block->bb_magic) {
299 case cpu_to_be32(XFS_ABTB_CRC_MAGIC):
300 if (!xfs_sb_version_hascrc(&mp->m_sb))
301 return false;
302 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
303 return false;
304 if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
305 return false;
306 if (pag &&
307 be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
308 return false;
309 /* fall through */
294 case cpu_to_be32(XFS_ABTB_MAGIC): 310 case cpu_to_be32(XFS_ABTB_MAGIC):
295 if (pag) 311 if (pag && pag->pagf_init) {
296 sblock_ok = level < pag->pagf_levels[XFS_BTNUM_BNOi]; 312 if (level >= pag->pagf_levels[XFS_BTNUM_BNOi])
297 else 313 return false;
298 sblock_ok = level < mp->m_ag_maxlevels; 314 } else if (level >= mp->m_ag_maxlevels)
315 return false;
299 break; 316 break;
317 case cpu_to_be32(XFS_ABTC_CRC_MAGIC):
318 if (!xfs_sb_version_hascrc(&mp->m_sb))
319 return false;
320 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
321 return false;
322 if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
323 return false;
324 if (pag &&
325 be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
326 return false;
327 /* fall through */
300 case cpu_to_be32(XFS_ABTC_MAGIC): 328 case cpu_to_be32(XFS_ABTC_MAGIC):
301 if (pag) 329 if (pag && pag->pagf_init) {
302 sblock_ok = level < pag->pagf_levels[XFS_BTNUM_CNTi]; 330 if (level >= pag->pagf_levels[XFS_BTNUM_CNTi])
303 else 331 return false;
304 sblock_ok = level < mp->m_ag_maxlevels; 332 } else if (level >= mp->m_ag_maxlevels)
333 return false;
305 break; 334 break;
306 default: 335 default:
307 sblock_ok = 0; 336 return false;
308 break;
309 } 337 }
310 338
311 /* numrecs verification */ 339 /* numrecs verification */
312 sblock_ok = sblock_ok && 340 if (be16_to_cpu(block->bb_numrecs) > mp->m_alloc_mxr[level != 0])
313 be16_to_cpu(block->bb_numrecs) <= mp->m_alloc_mxr[level != 0]; 341 return false;
314 342
315 /* sibling pointer verification */ 343 /* sibling pointer verification */
316 sblock_ok = sblock_ok && 344 if (!block->bb_u.s.bb_leftsib ||
317 (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) || 345 (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
318 be32_to_cpu(block->bb_u.s.bb_leftsib) < mp->m_sb.sb_agblocks) && 346 block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
319 block->bb_u.s.bb_leftsib && 347 return false;
320 (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || 348 if (!block->bb_u.s.bb_rightsib ||
321 be32_to_cpu(block->bb_u.s.bb_rightsib) < mp->m_sb.sb_agblocks) && 349 (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
322 block->bb_u.s.bb_rightsib; 350 block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
323 351 return false;
324 if (!sblock_ok) { 352
325 trace_xfs_btree_corrupt(bp, _RET_IP_); 353 return true;
326 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block);
327 xfs_buf_ioerror(bp, EFSCORRUPTED);
328 }
329} 354}
330 355
331static void 356static void
332xfs_allocbt_read_verify( 357xfs_allocbt_read_verify(
333 struct xfs_buf *bp) 358 struct xfs_buf *bp)
334{ 359{
335 xfs_allocbt_verify(bp); 360 if (!(xfs_btree_sblock_verify_crc(bp) &&
361 xfs_allocbt_verify(bp))) {
362 trace_xfs_btree_corrupt(bp, _RET_IP_);
363 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
364 bp->b_target->bt_mount, bp->b_addr);
365 xfs_buf_ioerror(bp, EFSCORRUPTED);
366 }
336} 367}
337 368
338static void 369static void
339xfs_allocbt_write_verify( 370xfs_allocbt_write_verify(
340 struct xfs_buf *bp) 371 struct xfs_buf *bp)
341{ 372{
342 xfs_allocbt_verify(bp); 373 if (!xfs_allocbt_verify(bp)) {
374 trace_xfs_btree_corrupt(bp, _RET_IP_);
375 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
376 bp->b_target->bt_mount, bp->b_addr);
377 xfs_buf_ioerror(bp, EFSCORRUPTED);
378 }
379 xfs_btree_sblock_calc_crc(bp);
380
343} 381}
344 382
345const struct xfs_buf_ops xfs_allocbt_buf_ops = { 383const struct xfs_buf_ops xfs_allocbt_buf_ops = {
@@ -444,6 +482,9 @@ xfs_allocbt_init_cursor(
444 cur->bc_private.a.agbp = agbp; 482 cur->bc_private.a.agbp = agbp;
445 cur->bc_private.a.agno = agno; 483 cur->bc_private.a.agno = agno;
446 484
485 if (xfs_sb_version_hascrc(&mp->m_sb))
486 cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
487
447 return cur; 488 return cur;
448} 489}
449 490
diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h
index 7e89a2b429dd..e3a3f7424192 100644
--- a/fs/xfs/xfs_alloc_btree.h
+++ b/fs/xfs/xfs_alloc_btree.h
@@ -31,8 +31,10 @@ struct xfs_mount;
31 * by blockcount and blockno. All blocks look the same to make the code 31 * by blockcount and blockno. All blocks look the same to make the code
32 * simpler; if we have time later, we'll make the optimizations. 32 * simpler; if we have time later, we'll make the optimizations.
33 */ 33 */
34#define XFS_ABTB_MAGIC 0x41425442 /* 'ABTB' for bno tree */ 34#define XFS_ABTB_MAGIC 0x41425442 /* 'ABTB' for bno tree */
35#define XFS_ABTC_MAGIC 0x41425443 /* 'ABTC' for cnt tree */ 35#define XFS_ABTB_CRC_MAGIC 0x41423342 /* 'AB3B' */
36#define XFS_ABTC_MAGIC 0x41425443 /* 'ABTC' for cnt tree */
37#define XFS_ABTC_CRC_MAGIC 0x41423343 /* 'AB3C' */
36 38
37/* 39/*
38 * Data record/key structure 40 * Data record/key structure
@@ -59,10 +61,10 @@ typedef __be32 xfs_alloc_ptr_t;
59 61
60/* 62/*
61 * Btree block header size depends on a superblock flag. 63 * Btree block header size depends on a superblock flag.
62 *
63 * (not quite yet, but soon)
64 */ 64 */
65#define XFS_ALLOC_BLOCK_LEN(mp) XFS_BTREE_SBLOCK_LEN 65#define XFS_ALLOC_BLOCK_LEN(mp) \
66 (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
67 XFS_BTREE_SBLOCK_CRC_LEN : XFS_BTREE_SBLOCK_LEN)
66 68
67/* 69/*
68 * Record, key, and pointer address macros for btree blocks. 70 * Record, key, and pointer address macros for btree blocks.
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 5f707e537171..3244c988d379 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -953,13 +953,13 @@ xfs_vm_writepage(
953 unsigned offset_into_page = offset & (PAGE_CACHE_SIZE - 1); 953 unsigned offset_into_page = offset & (PAGE_CACHE_SIZE - 1);
954 954
955 /* 955 /*
956 * Just skip the page if it is fully outside i_size, e.g. due 956 * Skip the page if it is fully outside i_size, e.g. due to a
957 * to a truncate operation that is in progress. 957 * truncate operation that is in progress. We must redirty the
958 * page so that reclaim stops reclaiming it. Otherwise
959 * xfs_vm_releasepage() is called on it and gets confused.
958 */ 960 */
959 if (page->index >= end_index + 1 || offset_into_page == 0) { 961 if (page->index >= end_index + 1 || offset_into_page == 0)
960 unlock_page(page); 962 goto redirty;
961 return 0;
962 }
963 963
964 /* 964 /*
965 * The page straddles i_size. It must be zeroed out on each 965 * The page straddles i_size. It must be zeroed out on each
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 888683844d98..20fe3fe9d341 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -15,7 +15,6 @@
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18
19#include "xfs.h" 18#include "xfs.h"
20#include "xfs_fs.h" 19#include "xfs_fs.h"
21#include "xfs_types.h" 20#include "xfs_types.h"
@@ -35,6 +34,7 @@
35#include "xfs_bmap.h" 34#include "xfs_bmap.h"
36#include "xfs_attr.h" 35#include "xfs_attr.h"
37#include "xfs_attr_leaf.h" 36#include "xfs_attr_leaf.h"
37#include "xfs_attr_remote.h"
38#include "xfs_error.h" 38#include "xfs_error.h"
39#include "xfs_quota.h" 39#include "xfs_quota.h"
40#include "xfs_trans_space.h" 40#include "xfs_trans_space.h"
@@ -74,13 +74,6 @@ STATIC int xfs_attr_node_list(xfs_attr_list_context_t *context);
74STATIC int xfs_attr_fillstate(xfs_da_state_t *state); 74STATIC int xfs_attr_fillstate(xfs_da_state_t *state);
75STATIC int xfs_attr_refillstate(xfs_da_state_t *state); 75STATIC int xfs_attr_refillstate(xfs_da_state_t *state);
76 76
77/*
78 * Routines to manipulate out-of-line attribute values.
79 */
80STATIC int xfs_attr_rmtval_set(xfs_da_args_t *args);
81STATIC int xfs_attr_rmtval_remove(xfs_da_args_t *args);
82
83#define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */
84 77
85STATIC int 78STATIC int
86xfs_attr_name_to_xname( 79xfs_attr_name_to_xname(
@@ -820,7 +813,7 @@ xfs_attr_inactive(xfs_inode_t *dp)
820 error = 0; 813 error = 0;
821 goto out; 814 goto out;
822 } 815 }
823 error = xfs_attr_root_inactive(&trans, dp); 816 error = xfs_attr3_root_inactive(&trans, dp);
824 if (error) 817 if (error)
825 goto out; 818 goto out;
826 819
@@ -906,7 +899,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
906 */ 899 */
907 dp = args->dp; 900 dp = args->dp;
908 args->blkno = 0; 901 args->blkno = 0;
909 error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); 902 error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
910 if (error) 903 if (error)
911 return error; 904 return error;
912 905
@@ -914,14 +907,14 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
914 * Look up the given attribute in the leaf block. Figure out if 907 * Look up the given attribute in the leaf block. Figure out if
915 * the given flags produce an error or call for an atomic rename. 908 * the given flags produce an error or call for an atomic rename.
916 */ 909 */
917 retval = xfs_attr_leaf_lookup_int(bp, args); 910 retval = xfs_attr3_leaf_lookup_int(bp, args);
918 if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) { 911 if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
919 xfs_trans_brelse(args->trans, bp); 912 xfs_trans_brelse(args->trans, bp);
920 return(retval); 913 return retval;
921 } else if (retval == EEXIST) { 914 } else if (retval == EEXIST) {
922 if (args->flags & ATTR_CREATE) { /* pure create op */ 915 if (args->flags & ATTR_CREATE) { /* pure create op */
923 xfs_trans_brelse(args->trans, bp); 916 xfs_trans_brelse(args->trans, bp);
924 return(retval); 917 return retval;
925 } 918 }
926 919
927 trace_xfs_attr_leaf_replace(args); 920 trace_xfs_attr_leaf_replace(args);
@@ -937,7 +930,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
937 * Add the attribute to the leaf block, transitioning to a Btree 930 * Add the attribute to the leaf block, transitioning to a Btree
938 * if required. 931 * if required.
939 */ 932 */
940 retval = xfs_attr_leaf_add(bp, args); 933 retval = xfs_attr3_leaf_add(bp, args);
941 if (retval == ENOSPC) { 934 if (retval == ENOSPC) {
942 /* 935 /*
943 * Promote the attribute list to the Btree format, then 936 * Promote the attribute list to the Btree format, then
@@ -945,7 +938,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
945 * can manage its own transactions. 938 * can manage its own transactions.
946 */ 939 */
947 xfs_bmap_init(args->flist, args->firstblock); 940 xfs_bmap_init(args->flist, args->firstblock);
948 error = xfs_attr_leaf_to_node(args); 941 error = xfs_attr3_leaf_to_node(args);
949 if (!error) { 942 if (!error) {
950 error = xfs_bmap_finish(&args->trans, args->flist, 943 error = xfs_bmap_finish(&args->trans, args->flist,
951 &committed); 944 &committed);
@@ -1010,7 +1003,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
1010 * In a separate transaction, set the incomplete flag on the 1003 * In a separate transaction, set the incomplete flag on the
1011 * "old" attr and clear the incomplete flag on the "new" attr. 1004 * "old" attr and clear the incomplete flag on the "new" attr.
1012 */ 1005 */
1013 error = xfs_attr_leaf_flipflags(args); 1006 error = xfs_attr3_leaf_flipflags(args);
1014 if (error) 1007 if (error)
1015 return(error); 1008 return(error);
1016 1009
@@ -1032,19 +1025,19 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
1032 * Read in the block containing the "old" attr, then 1025 * Read in the block containing the "old" attr, then
1033 * remove the "old" attr from that block (neat, huh!) 1026 * remove the "old" attr from that block (neat, huh!)
1034 */ 1027 */
1035 error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, 1028 error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno,
1036 -1, &bp); 1029 -1, &bp);
1037 if (error) 1030 if (error)
1038 return error; 1031 return error;
1039 1032
1040 xfs_attr_leaf_remove(bp, args); 1033 xfs_attr3_leaf_remove(bp, args);
1041 1034
1042 /* 1035 /*
1043 * If the result is small enough, shrink it all into the inode. 1036 * If the result is small enough, shrink it all into the inode.
1044 */ 1037 */
1045 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { 1038 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1046 xfs_bmap_init(args->flist, args->firstblock); 1039 xfs_bmap_init(args->flist, args->firstblock);
1047 error = xfs_attr_leaf_to_shortform(bp, args, forkoff); 1040 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
1048 /* bp is gone due to xfs_da_shrink_inode */ 1041 /* bp is gone due to xfs_da_shrink_inode */
1049 if (!error) { 1042 if (!error) {
1050 error = xfs_bmap_finish(&args->trans, 1043 error = xfs_bmap_finish(&args->trans,
@@ -1076,9 +1069,9 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
1076 /* 1069 /*
1077 * Added a "remote" value, just clear the incomplete flag. 1070 * Added a "remote" value, just clear the incomplete flag.
1078 */ 1071 */
1079 error = xfs_attr_leaf_clearflag(args); 1072 error = xfs_attr3_leaf_clearflag(args);
1080 } 1073 }
1081 return(error); 1074 return error;
1082} 1075}
1083 1076
1084/* 1077/*
@@ -1101,24 +1094,24 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
1101 */ 1094 */
1102 dp = args->dp; 1095 dp = args->dp;
1103 args->blkno = 0; 1096 args->blkno = 0;
1104 error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); 1097 error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
1105 if (error) 1098 if (error)
1106 return error; 1099 return error;
1107 1100
1108 error = xfs_attr_leaf_lookup_int(bp, args); 1101 error = xfs_attr3_leaf_lookup_int(bp, args);
1109 if (error == ENOATTR) { 1102 if (error == ENOATTR) {
1110 xfs_trans_brelse(args->trans, bp); 1103 xfs_trans_brelse(args->trans, bp);
1111 return(error); 1104 return error;
1112 } 1105 }
1113 1106
1114 xfs_attr_leaf_remove(bp, args); 1107 xfs_attr3_leaf_remove(bp, args);
1115 1108
1116 /* 1109 /*
1117 * If the result is small enough, shrink it all into the inode. 1110 * If the result is small enough, shrink it all into the inode.
1118 */ 1111 */
1119 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { 1112 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1120 xfs_bmap_init(args->flist, args->firstblock); 1113 xfs_bmap_init(args->flist, args->firstblock);
1121 error = xfs_attr_leaf_to_shortform(bp, args, forkoff); 1114 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
1122 /* bp is gone due to xfs_da_shrink_inode */ 1115 /* bp is gone due to xfs_da_shrink_inode */
1123 if (!error) { 1116 if (!error) {
1124 error = xfs_bmap_finish(&args->trans, args->flist, 1117 error = xfs_bmap_finish(&args->trans, args->flist,
@@ -1128,7 +1121,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
1128 ASSERT(committed); 1121 ASSERT(committed);
1129 args->trans = NULL; 1122 args->trans = NULL;
1130 xfs_bmap_cancel(args->flist); 1123 xfs_bmap_cancel(args->flist);
1131 return(error); 1124 return error;
1132 } 1125 }
1133 1126
1134 /* 1127 /*
@@ -1138,7 +1131,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
1138 if (committed) 1131 if (committed)
1139 xfs_trans_ijoin(args->trans, dp, 0); 1132 xfs_trans_ijoin(args->trans, dp, 0);
1140 } 1133 }
1141 return(0); 1134 return 0;
1142} 1135}
1143 1136
1144/* 1137/*
@@ -1156,21 +1149,21 @@ xfs_attr_leaf_get(xfs_da_args_t *args)
1156 trace_xfs_attr_leaf_get(args); 1149 trace_xfs_attr_leaf_get(args);
1157 1150
1158 args->blkno = 0; 1151 args->blkno = 0;
1159 error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); 1152 error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
1160 if (error) 1153 if (error)
1161 return error; 1154 return error;
1162 1155
1163 error = xfs_attr_leaf_lookup_int(bp, args); 1156 error = xfs_attr3_leaf_lookup_int(bp, args);
1164 if (error != EEXIST) { 1157 if (error != EEXIST) {
1165 xfs_trans_brelse(args->trans, bp); 1158 xfs_trans_brelse(args->trans, bp);
1166 return(error); 1159 return error;
1167 } 1160 }
1168 error = xfs_attr_leaf_getvalue(bp, args); 1161 error = xfs_attr3_leaf_getvalue(bp, args);
1169 xfs_trans_brelse(args->trans, bp); 1162 xfs_trans_brelse(args->trans, bp);
1170 if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) { 1163 if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) {
1171 error = xfs_attr_rmtval_get(args); 1164 error = xfs_attr_rmtval_get(args);
1172 } 1165 }
1173 return(error); 1166 return error;
1174} 1167}
1175 1168
1176/* 1169/*
@@ -1185,11 +1178,11 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context)
1185 trace_xfs_attr_leaf_list(context); 1178 trace_xfs_attr_leaf_list(context);
1186 1179
1187 context->cursor->blkno = 0; 1180 context->cursor->blkno = 0;
1188 error = xfs_attr_leaf_read(NULL, context->dp, 0, -1, &bp); 1181 error = xfs_attr3_leaf_read(NULL, context->dp, 0, -1, &bp);
1189 if (error) 1182 if (error)
1190 return XFS_ERROR(error); 1183 return XFS_ERROR(error);
1191 1184
1192 error = xfs_attr_leaf_list_int(bp, context); 1185 error = xfs_attr3_leaf_list_int(bp, context);
1193 xfs_trans_brelse(NULL, bp); 1186 xfs_trans_brelse(NULL, bp);
1194 return XFS_ERROR(error); 1187 return XFS_ERROR(error);
1195} 1188}
@@ -1236,7 +1229,7 @@ restart:
1236 * Search to see if name already exists, and get back a pointer 1229 * Search to see if name already exists, and get back a pointer
1237 * to where it should go. 1230 * to where it should go.
1238 */ 1231 */
1239 error = xfs_da_node_lookup_int(state, &retval); 1232 error = xfs_da3_node_lookup_int(state, &retval);
1240 if (error) 1233 if (error)
1241 goto out; 1234 goto out;
1242 blk = &state->path.blk[ state->path.active-1 ]; 1235 blk = &state->path.blk[ state->path.active-1 ];
@@ -1258,7 +1251,7 @@ restart:
1258 args->rmtblkcnt = 0; 1251 args->rmtblkcnt = 0;
1259 } 1252 }
1260 1253
1261 retval = xfs_attr_leaf_add(blk->bp, state->args); 1254 retval = xfs_attr3_leaf_add(blk->bp, state->args);
1262 if (retval == ENOSPC) { 1255 if (retval == ENOSPC) {
1263 if (state->path.active == 1) { 1256 if (state->path.active == 1) {
1264 /* 1257 /*
@@ -1268,7 +1261,7 @@ restart:
1268 */ 1261 */
1269 xfs_da_state_free(state); 1262 xfs_da_state_free(state);
1270 xfs_bmap_init(args->flist, args->firstblock); 1263 xfs_bmap_init(args->flist, args->firstblock);
1271 error = xfs_attr_leaf_to_node(args); 1264 error = xfs_attr3_leaf_to_node(args);
1272 if (!error) { 1265 if (!error) {
1273 error = xfs_bmap_finish(&args->trans, 1266 error = xfs_bmap_finish(&args->trans,
1274 args->flist, 1267 args->flist,
@@ -1307,7 +1300,7 @@ restart:
1307 * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields. 1300 * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields.
1308 */ 1301 */
1309 xfs_bmap_init(args->flist, args->firstblock); 1302 xfs_bmap_init(args->flist, args->firstblock);
1310 error = xfs_da_split(state); 1303 error = xfs_da3_split(state);
1311 if (!error) { 1304 if (!error) {
1312 error = xfs_bmap_finish(&args->trans, args->flist, 1305 error = xfs_bmap_finish(&args->trans, args->flist,
1313 &committed); 1306 &committed);
@@ -1329,7 +1322,7 @@ restart:
1329 /* 1322 /*
1330 * Addition succeeded, update Btree hashvals. 1323 * Addition succeeded, update Btree hashvals.
1331 */ 1324 */
1332 xfs_da_fixhashpath(state, &state->path); 1325 xfs_da3_fixhashpath(state, &state->path);
1333 } 1326 }
1334 1327
1335 /* 1328 /*
@@ -1370,7 +1363,7 @@ restart:
1370 * In a separate transaction, set the incomplete flag on the 1363 * In a separate transaction, set the incomplete flag on the
1371 * "old" attr and clear the incomplete flag on the "new" attr. 1364 * "old" attr and clear the incomplete flag on the "new" attr.
1372 */ 1365 */
1373 error = xfs_attr_leaf_flipflags(args); 1366 error = xfs_attr3_leaf_flipflags(args);
1374 if (error) 1367 if (error)
1375 goto out; 1368 goto out;
1376 1369
@@ -1400,7 +1393,7 @@ restart:
1400 state->blocksize = state->mp->m_sb.sb_blocksize; 1393 state->blocksize = state->mp->m_sb.sb_blocksize;
1401 state->node_ents = state->mp->m_attr_node_ents; 1394 state->node_ents = state->mp->m_attr_node_ents;
1402 state->inleaf = 0; 1395 state->inleaf = 0;
1403 error = xfs_da_node_lookup_int(state, &retval); 1396 error = xfs_da3_node_lookup_int(state, &retval);
1404 if (error) 1397 if (error)
1405 goto out; 1398 goto out;
1406 1399
@@ -1409,15 +1402,15 @@ restart:
1409 */ 1402 */
1410 blk = &state->path.blk[ state->path.active-1 ]; 1403 blk = &state->path.blk[ state->path.active-1 ];
1411 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); 1404 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1412 error = xfs_attr_leaf_remove(blk->bp, args); 1405 error = xfs_attr3_leaf_remove(blk->bp, args);
1413 xfs_da_fixhashpath(state, &state->path); 1406 xfs_da3_fixhashpath(state, &state->path);
1414 1407
1415 /* 1408 /*
1416 * Check to see if the tree needs to be collapsed. 1409 * Check to see if the tree needs to be collapsed.
1417 */ 1410 */
1418 if (retval && (state->path.active > 1)) { 1411 if (retval && (state->path.active > 1)) {
1419 xfs_bmap_init(args->flist, args->firstblock); 1412 xfs_bmap_init(args->flist, args->firstblock);
1420 error = xfs_da_join(state); 1413 error = xfs_da3_join(state);
1421 if (!error) { 1414 if (!error) {
1422 error = xfs_bmap_finish(&args->trans, 1415 error = xfs_bmap_finish(&args->trans,
1423 args->flist, 1416 args->flist,
@@ -1450,7 +1443,7 @@ restart:
1450 /* 1443 /*
1451 * Added a "remote" value, just clear the incomplete flag. 1444 * Added a "remote" value, just clear the incomplete flag.
1452 */ 1445 */
1453 error = xfs_attr_leaf_clearflag(args); 1446 error = xfs_attr3_leaf_clearflag(args);
1454 if (error) 1447 if (error)
1455 goto out; 1448 goto out;
1456 } 1449 }
@@ -1495,7 +1488,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
1495 /* 1488 /*
1496 * Search to see if name exists, and get back a pointer to it. 1489 * Search to see if name exists, and get back a pointer to it.
1497 */ 1490 */
1498 error = xfs_da_node_lookup_int(state, &retval); 1491 error = xfs_da3_node_lookup_int(state, &retval);
1499 if (error || (retval != EEXIST)) { 1492 if (error || (retval != EEXIST)) {
1500 if (error == 0) 1493 if (error == 0)
1501 error = retval; 1494 error = retval;
@@ -1524,7 +1517,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
1524 * Mark the attribute as INCOMPLETE, then bunmapi() the 1517 * Mark the attribute as INCOMPLETE, then bunmapi() the
1525 * remote value. 1518 * remote value.
1526 */ 1519 */
1527 error = xfs_attr_leaf_setflag(args); 1520 error = xfs_attr3_leaf_setflag(args);
1528 if (error) 1521 if (error)
1529 goto out; 1522 goto out;
1530 error = xfs_attr_rmtval_remove(args); 1523 error = xfs_attr_rmtval_remove(args);
@@ -1545,15 +1538,15 @@ xfs_attr_node_removename(xfs_da_args_t *args)
1545 */ 1538 */
1546 blk = &state->path.blk[ state->path.active-1 ]; 1539 blk = &state->path.blk[ state->path.active-1 ];
1547 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); 1540 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1548 retval = xfs_attr_leaf_remove(blk->bp, args); 1541 retval = xfs_attr3_leaf_remove(blk->bp, args);
1549 xfs_da_fixhashpath(state, &state->path); 1542 xfs_da3_fixhashpath(state, &state->path);
1550 1543
1551 /* 1544 /*
1552 * Check to see if the tree needs to be collapsed. 1545 * Check to see if the tree needs to be collapsed.
1553 */ 1546 */
1554 if (retval && (state->path.active > 1)) { 1547 if (retval && (state->path.active > 1)) {
1555 xfs_bmap_init(args->flist, args->firstblock); 1548 xfs_bmap_init(args->flist, args->firstblock);
1556 error = xfs_da_join(state); 1549 error = xfs_da3_join(state);
1557 if (!error) { 1550 if (!error) {
1558 error = xfs_bmap_finish(&args->trans, args->flist, 1551 error = xfs_bmap_finish(&args->trans, args->flist,
1559 &committed); 1552 &committed);
@@ -1591,13 +1584,13 @@ xfs_attr_node_removename(xfs_da_args_t *args)
1591 ASSERT(state->path.blk[0].bp); 1584 ASSERT(state->path.blk[0].bp);
1592 state->path.blk[0].bp = NULL; 1585 state->path.blk[0].bp = NULL;
1593 1586
1594 error = xfs_attr_leaf_read(args->trans, args->dp, 0, -1, &bp); 1587 error = xfs_attr3_leaf_read(args->trans, args->dp, 0, -1, &bp);
1595 if (error) 1588 if (error)
1596 goto out; 1589 goto out;
1597 1590
1598 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { 1591 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1599 xfs_bmap_init(args->flist, args->firstblock); 1592 xfs_bmap_init(args->flist, args->firstblock);
1600 error = xfs_attr_leaf_to_shortform(bp, args, forkoff); 1593 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
1601 /* bp is gone due to xfs_da_shrink_inode */ 1594 /* bp is gone due to xfs_da_shrink_inode */
1602 if (!error) { 1595 if (!error) {
1603 error = xfs_bmap_finish(&args->trans, 1596 error = xfs_bmap_finish(&args->trans,
@@ -1699,7 +1692,7 @@ xfs_attr_refillstate(xfs_da_state_t *state)
1699 ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); 1692 ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1700 for (blk = path->blk, level = 0; level < path->active; blk++, level++) { 1693 for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1701 if (blk->disk_blkno) { 1694 if (blk->disk_blkno) {
1702 error = xfs_da_node_read(state->args->trans, 1695 error = xfs_da3_node_read(state->args->trans,
1703 state->args->dp, 1696 state->args->dp,
1704 blk->blkno, blk->disk_blkno, 1697 blk->blkno, blk->disk_blkno,
1705 &blk->bp, XFS_ATTR_FORK); 1698 &blk->bp, XFS_ATTR_FORK);
@@ -1718,7 +1711,7 @@ xfs_attr_refillstate(xfs_da_state_t *state)
1718 ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); 1711 ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1719 for (blk = path->blk, level = 0; level < path->active; blk++, level++) { 1712 for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1720 if (blk->disk_blkno) { 1713 if (blk->disk_blkno) {
1721 error = xfs_da_node_read(state->args->trans, 1714 error = xfs_da3_node_read(state->args->trans,
1722 state->args->dp, 1715 state->args->dp,
1723 blk->blkno, blk->disk_blkno, 1716 blk->blkno, blk->disk_blkno,
1724 &blk->bp, XFS_ATTR_FORK); 1717 &blk->bp, XFS_ATTR_FORK);
@@ -1758,7 +1751,7 @@ xfs_attr_node_get(xfs_da_args_t *args)
1758 /* 1751 /*
1759 * Search to see if name exists, and get back a pointer to it. 1752 * Search to see if name exists, and get back a pointer to it.
1760 */ 1753 */
1761 error = xfs_da_node_lookup_int(state, &retval); 1754 error = xfs_da3_node_lookup_int(state, &retval);
1762 if (error) { 1755 if (error) {
1763 retval = error; 1756 retval = error;
1764 } else if (retval == EEXIST) { 1757 } else if (retval == EEXIST) {
@@ -1769,7 +1762,7 @@ xfs_attr_node_get(xfs_da_args_t *args)
1769 /* 1762 /*
1770 * Get the value, local or "remote" 1763 * Get the value, local or "remote"
1771 */ 1764 */
1772 retval = xfs_attr_leaf_getvalue(blk->bp, args); 1765 retval = xfs_attr3_leaf_getvalue(blk->bp, args);
1773 if (!retval && (args->rmtblkno > 0) 1766 if (!retval && (args->rmtblkno > 0)
1774 && !(args->flags & ATTR_KERNOVAL)) { 1767 && !(args->flags & ATTR_KERNOVAL)) {
1775 retval = xfs_attr_rmtval_get(args); 1768 retval = xfs_attr_rmtval_get(args);
@@ -1794,7 +1787,9 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
1794 attrlist_cursor_kern_t *cursor; 1787 attrlist_cursor_kern_t *cursor;
1795 xfs_attr_leafblock_t *leaf; 1788 xfs_attr_leafblock_t *leaf;
1796 xfs_da_intnode_t *node; 1789 xfs_da_intnode_t *node;
1797 xfs_da_node_entry_t *btree; 1790 struct xfs_attr3_icleaf_hdr leafhdr;
1791 struct xfs_da3_icnode_hdr nodehdr;
1792 struct xfs_da_node_entry *btree;
1798 int error, i; 1793 int error, i;
1799 struct xfs_buf *bp; 1794 struct xfs_buf *bp;
1800 1795
@@ -1810,27 +1805,33 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
1810 */ 1805 */
1811 bp = NULL; 1806 bp = NULL;
1812 if (cursor->blkno > 0) { 1807 if (cursor->blkno > 0) {
1813 error = xfs_da_node_read(NULL, context->dp, cursor->blkno, -1, 1808 error = xfs_da3_node_read(NULL, context->dp, cursor->blkno, -1,
1814 &bp, XFS_ATTR_FORK); 1809 &bp, XFS_ATTR_FORK);
1815 if ((error != 0) && (error != EFSCORRUPTED)) 1810 if ((error != 0) && (error != EFSCORRUPTED))
1816 return(error); 1811 return(error);
1817 if (bp) { 1812 if (bp) {
1813 struct xfs_attr_leaf_entry *entries;
1814
1818 node = bp->b_addr; 1815 node = bp->b_addr;
1819 switch (be16_to_cpu(node->hdr.info.magic)) { 1816 switch (be16_to_cpu(node->hdr.info.magic)) {
1820 case XFS_DA_NODE_MAGIC: 1817 case XFS_DA_NODE_MAGIC:
1818 case XFS_DA3_NODE_MAGIC:
1821 trace_xfs_attr_list_wrong_blk(context); 1819 trace_xfs_attr_list_wrong_blk(context);
1822 xfs_trans_brelse(NULL, bp); 1820 xfs_trans_brelse(NULL, bp);
1823 bp = NULL; 1821 bp = NULL;
1824 break; 1822 break;
1825 case XFS_ATTR_LEAF_MAGIC: 1823 case XFS_ATTR_LEAF_MAGIC:
1824 case XFS_ATTR3_LEAF_MAGIC:
1826 leaf = bp->b_addr; 1825 leaf = bp->b_addr;
1827 if (cursor->hashval > be32_to_cpu(leaf->entries[ 1826 xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
1828 be16_to_cpu(leaf->hdr.count)-1].hashval)) { 1827 entries = xfs_attr3_leaf_entryp(leaf);
1828 if (cursor->hashval > be32_to_cpu(
1829 entries[leafhdr.count - 1].hashval)) {
1829 trace_xfs_attr_list_wrong_blk(context); 1830 trace_xfs_attr_list_wrong_blk(context);
1830 xfs_trans_brelse(NULL, bp); 1831 xfs_trans_brelse(NULL, bp);
1831 bp = NULL; 1832 bp = NULL;
1832 } else if (cursor->hashval <= 1833 } else if (cursor->hashval <= be32_to_cpu(
1833 be32_to_cpu(leaf->entries[0].hashval)) { 1834 entries[0].hashval)) {
1834 trace_xfs_attr_list_wrong_blk(context); 1835 trace_xfs_attr_list_wrong_blk(context);
1835 xfs_trans_brelse(NULL, bp); 1836 xfs_trans_brelse(NULL, bp);
1836 bp = NULL; 1837 bp = NULL;
@@ -1852,27 +1853,31 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
1852 if (bp == NULL) { 1853 if (bp == NULL) {
1853 cursor->blkno = 0; 1854 cursor->blkno = 0;
1854 for (;;) { 1855 for (;;) {
1855 error = xfs_da_node_read(NULL, context->dp, 1856 __uint16_t magic;
1857
1858 error = xfs_da3_node_read(NULL, context->dp,
1856 cursor->blkno, -1, &bp, 1859 cursor->blkno, -1, &bp,
1857 XFS_ATTR_FORK); 1860 XFS_ATTR_FORK);
1858 if (error) 1861 if (error)
1859 return(error); 1862 return(error);
1860 node = bp->b_addr; 1863 node = bp->b_addr;
1861 if (node->hdr.info.magic == 1864 magic = be16_to_cpu(node->hdr.info.magic);
1862 cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) 1865 if (magic == XFS_ATTR_LEAF_MAGIC ||
1866 magic == XFS_ATTR3_LEAF_MAGIC)
1863 break; 1867 break;
1864 if (unlikely(node->hdr.info.magic != 1868 if (magic != XFS_DA_NODE_MAGIC &&
1865 cpu_to_be16(XFS_DA_NODE_MAGIC))) { 1869 magic != XFS_DA3_NODE_MAGIC) {
1866 XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)", 1870 XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)",
1867 XFS_ERRLEVEL_LOW, 1871 XFS_ERRLEVEL_LOW,
1868 context->dp->i_mount, 1872 context->dp->i_mount,
1869 node); 1873 node);
1870 xfs_trans_brelse(NULL, bp); 1874 xfs_trans_brelse(NULL, bp);
1871 return(XFS_ERROR(EFSCORRUPTED)); 1875 return XFS_ERROR(EFSCORRUPTED);
1872 } 1876 }
1873 btree = node->btree; 1877
1874 for (i = 0; i < be16_to_cpu(node->hdr.count); 1878 xfs_da3_node_hdr_from_disk(&nodehdr, node);
1875 btree++, i++) { 1879 btree = xfs_da3_node_tree_p(node);
1880 for (i = 0; i < nodehdr.count; btree++, i++) {
1876 if (cursor->hashval 1881 if (cursor->hashval
1877 <= be32_to_cpu(btree->hashval)) { 1882 <= be32_to_cpu(btree->hashval)) {
1878 cursor->blkno = be32_to_cpu(btree->before); 1883 cursor->blkno = be32_to_cpu(btree->before);
@@ -1881,9 +1886,9 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
1881 break; 1886 break;
1882 } 1887 }
1883 } 1888 }
1884 if (i == be16_to_cpu(node->hdr.count)) { 1889 if (i == nodehdr.count) {
1885 xfs_trans_brelse(NULL, bp); 1890 xfs_trans_brelse(NULL, bp);
1886 return(0); 1891 return 0;
1887 } 1892 }
1888 xfs_trans_brelse(NULL, bp); 1893 xfs_trans_brelse(NULL, bp);
1889 } 1894 }
@@ -1897,310 +1902,21 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
1897 */ 1902 */
1898 for (;;) { 1903 for (;;) {
1899 leaf = bp->b_addr; 1904 leaf = bp->b_addr;
1900 error = xfs_attr_leaf_list_int(bp, context); 1905 error = xfs_attr3_leaf_list_int(bp, context);
1901 if (error) { 1906 if (error) {
1902 xfs_trans_brelse(NULL, bp); 1907 xfs_trans_brelse(NULL, bp);
1903 return error; 1908 return error;
1904 } 1909 }
1905 if (context->seen_enough || leaf->hdr.info.forw == 0) 1910 xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
1911 if (context->seen_enough || leafhdr.forw == 0)
1906 break; 1912 break;
1907 cursor->blkno = be32_to_cpu(leaf->hdr.info.forw); 1913 cursor->blkno = leafhdr.forw;
1908 xfs_trans_brelse(NULL, bp); 1914 xfs_trans_brelse(NULL, bp);
1909 error = xfs_attr_leaf_read(NULL, context->dp, cursor->blkno, -1, 1915 error = xfs_attr3_leaf_read(NULL, context->dp, cursor->blkno, -1,
1910 &bp); 1916 &bp);
1911 if (error) 1917 if (error)
1912 return error; 1918 return error;
1913 } 1919 }
1914 xfs_trans_brelse(NULL, bp); 1920 xfs_trans_brelse(NULL, bp);
1915 return(0); 1921 return 0;
1916}
1917
1918
1919/*========================================================================
1920 * External routines for manipulating out-of-line attribute values.
1921 *========================================================================*/
1922
1923/*
1924 * Read the value associated with an attribute from the out-of-line buffer
1925 * that we stored it in.
1926 */
1927int
1928xfs_attr_rmtval_get(xfs_da_args_t *args)
1929{
1930 xfs_bmbt_irec_t map[ATTR_RMTVALUE_MAPSIZE];
1931 xfs_mount_t *mp;
1932 xfs_daddr_t dblkno;
1933 void *dst;
1934 xfs_buf_t *bp;
1935 int nmap, error, tmp, valuelen, blkcnt, i;
1936 xfs_dablk_t lblkno;
1937
1938 trace_xfs_attr_rmtval_get(args);
1939
1940 ASSERT(!(args->flags & ATTR_KERNOVAL));
1941
1942 mp = args->dp->i_mount;
1943 dst = args->value;
1944 valuelen = args->valuelen;
1945 lblkno = args->rmtblkno;
1946 while (valuelen > 0) {
1947 nmap = ATTR_RMTVALUE_MAPSIZE;
1948 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
1949 args->rmtblkcnt, map, &nmap,
1950 XFS_BMAPI_ATTRFORK);
1951 if (error)
1952 return(error);
1953 ASSERT(nmap >= 1);
1954
1955 for (i = 0; (i < nmap) && (valuelen > 0); i++) {
1956 ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) &&
1957 (map[i].br_startblock != HOLESTARTBLOCK));
1958 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
1959 blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
1960 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
1961 dblkno, blkcnt, 0, &bp, NULL);
1962 if (error)
1963 return(error);
1964
1965 tmp = min_t(int, valuelen, BBTOB(bp->b_length));
1966 xfs_buf_iomove(bp, 0, tmp, dst, XBRW_READ);
1967 xfs_buf_relse(bp);
1968 dst += tmp;
1969 valuelen -= tmp;
1970
1971 lblkno += map[i].br_blockcount;
1972 }
1973 }
1974 ASSERT(valuelen == 0);
1975 return(0);
1976}
1977
1978/*
1979 * Write the value associated with an attribute into the out-of-line buffer
1980 * that we have defined for it.
1981 */
1982STATIC int
1983xfs_attr_rmtval_set(xfs_da_args_t *args)
1984{
1985 xfs_mount_t *mp;
1986 xfs_fileoff_t lfileoff;
1987 xfs_inode_t *dp;
1988 xfs_bmbt_irec_t map;
1989 xfs_daddr_t dblkno;
1990 void *src;
1991 xfs_buf_t *bp;
1992 xfs_dablk_t lblkno;
1993 int blkcnt, valuelen, nmap, error, tmp, committed;
1994
1995 trace_xfs_attr_rmtval_set(args);
1996
1997 dp = args->dp;
1998 mp = dp->i_mount;
1999 src = args->value;
2000
2001 /*
2002 * Find a "hole" in the attribute address space large enough for
2003 * us to drop the new attribute's value into.
2004 */
2005 blkcnt = XFS_B_TO_FSB(mp, args->valuelen);
2006 lfileoff = 0;
2007 error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
2008 XFS_ATTR_FORK);
2009 if (error) {
2010 return(error);
2011 }
2012 args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff;
2013 args->rmtblkcnt = blkcnt;
2014
2015 /*
2016 * Roll through the "value", allocating blocks on disk as required.
2017 */
2018 while (blkcnt > 0) {
2019 /*
2020 * Allocate a single extent, up to the size of the value.
2021 */
2022 xfs_bmap_init(args->flist, args->firstblock);
2023 nmap = 1;
2024 error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
2025 blkcnt,
2026 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2027 args->firstblock, args->total, &map, &nmap,
2028 args->flist);
2029 if (!error) {
2030 error = xfs_bmap_finish(&args->trans, args->flist,
2031 &committed);
2032 }
2033 if (error) {
2034 ASSERT(committed);
2035 args->trans = NULL;
2036 xfs_bmap_cancel(args->flist);
2037 return(error);
2038 }
2039
2040 /*
2041 * bmap_finish() may have committed the last trans and started
2042 * a new one. We need the inode to be in all transactions.
2043 */
2044 if (committed)
2045 xfs_trans_ijoin(args->trans, dp, 0);
2046
2047 ASSERT(nmap == 1);
2048 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
2049 (map.br_startblock != HOLESTARTBLOCK));
2050 lblkno += map.br_blockcount;
2051 blkcnt -= map.br_blockcount;
2052
2053 /*
2054 * Start the next trans in the chain.
2055 */
2056 error = xfs_trans_roll(&args->trans, dp);
2057 if (error)
2058 return (error);
2059 }
2060
2061 /*
2062 * Roll through the "value", copying the attribute value to the
2063 * already-allocated blocks. Blocks are written synchronously
2064 * so that we can know they are all on disk before we turn off
2065 * the INCOMPLETE flag.
2066 */
2067 lblkno = args->rmtblkno;
2068 valuelen = args->valuelen;
2069 while (valuelen > 0) {
2070 int buflen;
2071
2072 /*
2073 * Try to remember where we decided to put the value.
2074 */
2075 xfs_bmap_init(args->flist, args->firstblock);
2076 nmap = 1;
2077 error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
2078 args->rmtblkcnt, &map, &nmap,
2079 XFS_BMAPI_ATTRFORK);
2080 if (error)
2081 return(error);
2082 ASSERT(nmap == 1);
2083 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
2084 (map.br_startblock != HOLESTARTBLOCK));
2085
2086 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
2087 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
2088
2089 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, 0);
2090 if (!bp)
2091 return ENOMEM;
2092
2093 buflen = BBTOB(bp->b_length);
2094 tmp = min_t(int, valuelen, buflen);
2095 xfs_buf_iomove(bp, 0, tmp, src, XBRW_WRITE);
2096 if (tmp < buflen)
2097 xfs_buf_zero(bp, tmp, buflen - tmp);
2098
2099 error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */
2100 xfs_buf_relse(bp);
2101 if (error)
2102 return error;
2103 src += tmp;
2104 valuelen -= tmp;
2105
2106 lblkno += map.br_blockcount;
2107 }
2108 ASSERT(valuelen == 0);
2109 return(0);
2110}
2111
2112/*
2113 * Remove the value associated with an attribute by deleting the
2114 * out-of-line buffer that it is stored on.
2115 */
2116STATIC int
2117xfs_attr_rmtval_remove(xfs_da_args_t *args)
2118{
2119 xfs_mount_t *mp;
2120 xfs_bmbt_irec_t map;
2121 xfs_buf_t *bp;
2122 xfs_daddr_t dblkno;
2123 xfs_dablk_t lblkno;
2124 int valuelen, blkcnt, nmap, error, done, committed;
2125
2126 trace_xfs_attr_rmtval_remove(args);
2127
2128 mp = args->dp->i_mount;
2129
2130 /*
2131 * Roll through the "value", invalidating the attribute value's
2132 * blocks.
2133 */
2134 lblkno = args->rmtblkno;
2135 valuelen = args->rmtblkcnt;
2136 while (valuelen > 0) {
2137 /*
2138 * Try to remember where we decided to put the value.
2139 */
2140 nmap = 1;
2141 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
2142 args->rmtblkcnt, &map, &nmap,
2143 XFS_BMAPI_ATTRFORK);
2144 if (error)
2145 return(error);
2146 ASSERT(nmap == 1);
2147 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
2148 (map.br_startblock != HOLESTARTBLOCK));
2149
2150 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
2151 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
2152
2153 /*
2154 * If the "remote" value is in the cache, remove it.
2155 */
2156 bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, XBF_TRYLOCK);
2157 if (bp) {
2158 xfs_buf_stale(bp);
2159 xfs_buf_relse(bp);
2160 bp = NULL;
2161 }
2162
2163 valuelen -= map.br_blockcount;
2164
2165 lblkno += map.br_blockcount;
2166 }
2167
2168 /*
2169 * Keep de-allocating extents until the remote-value region is gone.
2170 */
2171 lblkno = args->rmtblkno;
2172 blkcnt = args->rmtblkcnt;
2173 done = 0;
2174 while (!done) {
2175 xfs_bmap_init(args->flist, args->firstblock);
2176 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
2177 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2178 1, args->firstblock, args->flist,
2179 &done);
2180 if (!error) {
2181 error = xfs_bmap_finish(&args->trans, args->flist,
2182 &committed);
2183 }
2184 if (error) {
2185 ASSERT(committed);
2186 args->trans = NULL;
2187 xfs_bmap_cancel(args->flist);
2188 return(error);
2189 }
2190
2191 /*
2192 * bmap_finish() may have committed the last trans and started
2193 * a new one. We need the inode to be in all transactions.
2194 */
2195 if (committed)
2196 xfs_trans_ijoin(args->trans, args->dp, 0);
2197
2198 /*
2199 * Close out trans and start the next one in the chain.
2200 */
2201 error = xfs_trans_roll(&args->trans, args->dp);
2202 if (error)
2203 return (error);
2204 }
2205 return(0);
2206} 1922}
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index e920d68ef509..de8dd58da46c 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -140,7 +140,6 @@ typedef struct xfs_attr_list_context {
140 * Overall external interface routines. 140 * Overall external interface routines.
141 */ 141 */
142int xfs_attr_inactive(struct xfs_inode *dp); 142int xfs_attr_inactive(struct xfs_inode *dp);
143int xfs_attr_rmtval_get(struct xfs_da_args *args);
144int xfs_attr_list_int(struct xfs_attr_list_context *); 143int xfs_attr_list_int(struct xfs_attr_list_context *);
145 144
146#endif /* __XFS_ATTR_H__ */ 145#endif /* __XFS_ATTR_H__ */
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index ee24993c7d12..08d5457c948e 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * Copyright (c) 2013 Red Hat, Inc.
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -31,6 +32,7 @@
31#include "xfs_alloc.h" 32#include "xfs_alloc.h"
32#include "xfs_btree.h" 33#include "xfs_btree.h"
33#include "xfs_attr_sf.h" 34#include "xfs_attr_sf.h"
35#include "xfs_attr_remote.h"
34#include "xfs_dinode.h" 36#include "xfs_dinode.h"
35#include "xfs_inode.h" 37#include "xfs_inode.h"
36#include "xfs_inode_item.h" 38#include "xfs_inode_item.h"
@@ -39,6 +41,9 @@
39#include "xfs_attr_leaf.h" 41#include "xfs_attr_leaf.h"
40#include "xfs_error.h" 42#include "xfs_error.h"
41#include "xfs_trace.h" 43#include "xfs_trace.h"
44#include "xfs_buf_item.h"
45#include "xfs_cksum.h"
46
42 47
43/* 48/*
44 * xfs_attr_leaf.c 49 * xfs_attr_leaf.c
@@ -53,85 +58,226 @@
53/* 58/*
54 * Routines used for growing the Btree. 59 * Routines used for growing the Btree.
55 */ 60 */
56STATIC int xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t which_block, 61STATIC int xfs_attr3_leaf_create(struct xfs_da_args *args,
57 struct xfs_buf **bpp); 62 xfs_dablk_t which_block, struct xfs_buf **bpp);
58STATIC int xfs_attr_leaf_add_work(struct xfs_buf *leaf_buffer, 63STATIC int xfs_attr3_leaf_add_work(struct xfs_buf *leaf_buffer,
59 xfs_da_args_t *args, int freemap_index); 64 struct xfs_attr3_icleaf_hdr *ichdr,
60STATIC void xfs_attr_leaf_compact(struct xfs_da_args *args, 65 struct xfs_da_args *args, int freemap_index);
61 struct xfs_buf *leaf_buffer); 66STATIC void xfs_attr3_leaf_compact(struct xfs_da_args *args,
62STATIC void xfs_attr_leaf_rebalance(xfs_da_state_t *state, 67 struct xfs_attr3_icleaf_hdr *ichdr,
68 struct xfs_buf *leaf_buffer);
69STATIC void xfs_attr3_leaf_rebalance(xfs_da_state_t *state,
63 xfs_da_state_blk_t *blk1, 70 xfs_da_state_blk_t *blk1,
64 xfs_da_state_blk_t *blk2); 71 xfs_da_state_blk_t *blk2);
65STATIC int xfs_attr_leaf_figure_balance(xfs_da_state_t *state, 72STATIC int xfs_attr3_leaf_figure_balance(xfs_da_state_t *state,
66 xfs_da_state_blk_t *leaf_blk_1, 73 xfs_da_state_blk_t *leaf_blk_1,
67 xfs_da_state_blk_t *leaf_blk_2, 74 struct xfs_attr3_icleaf_hdr *ichdr1,
68 int *number_entries_in_blk1, 75 xfs_da_state_blk_t *leaf_blk_2,
69 int *number_usedbytes_in_blk1); 76 struct xfs_attr3_icleaf_hdr *ichdr2,
77 int *number_entries_in_blk1,
78 int *number_usedbytes_in_blk1);
70 79
71/* 80/*
72 * Routines used for shrinking the Btree. 81 * Routines used for shrinking the Btree.
73 */ 82 */
74STATIC int xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, 83STATIC int xfs_attr3_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp,
75 struct xfs_buf *bp, int level); 84 struct xfs_buf *bp, int level);
76STATIC int xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, 85STATIC int xfs_attr3_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp,
77 struct xfs_buf *bp); 86 struct xfs_buf *bp);
78STATIC int xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp, 87STATIC int xfs_attr3_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
79 xfs_dablk_t blkno, int blkcnt); 88 xfs_dablk_t blkno, int blkcnt);
80 89
81/* 90/*
82 * Utility routines. 91 * Utility routines.
83 */ 92 */
84STATIC void xfs_attr_leaf_moveents(xfs_attr_leafblock_t *src_leaf, 93STATIC void xfs_attr3_leaf_moveents(struct xfs_attr_leafblock *src_leaf,
85 int src_start, 94 struct xfs_attr3_icleaf_hdr *src_ichdr, int src_start,
86 xfs_attr_leafblock_t *dst_leaf, 95 struct xfs_attr_leafblock *dst_leaf,
87 int dst_start, int move_count, 96 struct xfs_attr3_icleaf_hdr *dst_ichdr, int dst_start,
88 xfs_mount_t *mp); 97 int move_count, struct xfs_mount *mp);
89STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index); 98STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
90 99
91static void 100void
92xfs_attr_leaf_verify( 101xfs_attr3_leaf_hdr_from_disk(
102 struct xfs_attr3_icleaf_hdr *to,
103 struct xfs_attr_leafblock *from)
104{
105 int i;
106
107 ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) ||
108 from->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC));
109
110 if (from->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) {
111 struct xfs_attr3_leaf_hdr *hdr3 = (struct xfs_attr3_leaf_hdr *)from;
112
113 to->forw = be32_to_cpu(hdr3->info.hdr.forw);
114 to->back = be32_to_cpu(hdr3->info.hdr.back);
115 to->magic = be16_to_cpu(hdr3->info.hdr.magic);
116 to->count = be16_to_cpu(hdr3->count);
117 to->usedbytes = be16_to_cpu(hdr3->usedbytes);
118 to->firstused = be16_to_cpu(hdr3->firstused);
119 to->holes = hdr3->holes;
120
121 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
122 to->freemap[i].base = be16_to_cpu(hdr3->freemap[i].base);
123 to->freemap[i].size = be16_to_cpu(hdr3->freemap[i].size);
124 }
125 return;
126 }
127 to->forw = be32_to_cpu(from->hdr.info.forw);
128 to->back = be32_to_cpu(from->hdr.info.back);
129 to->magic = be16_to_cpu(from->hdr.info.magic);
130 to->count = be16_to_cpu(from->hdr.count);
131 to->usedbytes = be16_to_cpu(from->hdr.usedbytes);
132 to->firstused = be16_to_cpu(from->hdr.firstused);
133 to->holes = from->hdr.holes;
134
135 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
136 to->freemap[i].base = be16_to_cpu(from->hdr.freemap[i].base);
137 to->freemap[i].size = be16_to_cpu(from->hdr.freemap[i].size);
138 }
139}
140
141void
142xfs_attr3_leaf_hdr_to_disk(
143 struct xfs_attr_leafblock *to,
144 struct xfs_attr3_icleaf_hdr *from)
145{
146 int i;
147
148 ASSERT(from->magic == XFS_ATTR_LEAF_MAGIC ||
149 from->magic == XFS_ATTR3_LEAF_MAGIC);
150
151 if (from->magic == XFS_ATTR3_LEAF_MAGIC) {
152 struct xfs_attr3_leaf_hdr *hdr3 = (struct xfs_attr3_leaf_hdr *)to;
153
154 hdr3->info.hdr.forw = cpu_to_be32(from->forw);
155 hdr3->info.hdr.back = cpu_to_be32(from->back);
156 hdr3->info.hdr.magic = cpu_to_be16(from->magic);
157 hdr3->count = cpu_to_be16(from->count);
158 hdr3->usedbytes = cpu_to_be16(from->usedbytes);
159 hdr3->firstused = cpu_to_be16(from->firstused);
160 hdr3->holes = from->holes;
161 hdr3->pad1 = 0;
162
163 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
164 hdr3->freemap[i].base = cpu_to_be16(from->freemap[i].base);
165 hdr3->freemap[i].size = cpu_to_be16(from->freemap[i].size);
166 }
167 return;
168 }
169 to->hdr.info.forw = cpu_to_be32(from->forw);
170 to->hdr.info.back = cpu_to_be32(from->back);
171 to->hdr.info.magic = cpu_to_be16(from->magic);
172 to->hdr.count = cpu_to_be16(from->count);
173 to->hdr.usedbytes = cpu_to_be16(from->usedbytes);
174 to->hdr.firstused = cpu_to_be16(from->firstused);
175 to->hdr.holes = from->holes;
176 to->hdr.pad1 = 0;
177
178 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
179 to->hdr.freemap[i].base = cpu_to_be16(from->freemap[i].base);
180 to->hdr.freemap[i].size = cpu_to_be16(from->freemap[i].size);
181 }
182}
183
184static bool
185xfs_attr3_leaf_verify(
93 struct xfs_buf *bp) 186 struct xfs_buf *bp)
94{ 187{
95 struct xfs_mount *mp = bp->b_target->bt_mount; 188 struct xfs_mount *mp = bp->b_target->bt_mount;
96 struct xfs_attr_leaf_hdr *hdr = bp->b_addr; 189 struct xfs_attr_leafblock *leaf = bp->b_addr;
97 int block_ok = 0; 190 struct xfs_attr3_icleaf_hdr ichdr;
98 191
99 block_ok = hdr->info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC); 192 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
100 if (!block_ok) { 193
101 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); 194 if (xfs_sb_version_hascrc(&mp->m_sb)) {
102 xfs_buf_ioerror(bp, EFSCORRUPTED); 195 struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
196
197 if (ichdr.magic != XFS_ATTR3_LEAF_MAGIC)
198 return false;
199
200 if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_uuid))
201 return false;
202 if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
203 return false;
204 } else {
205 if (ichdr.magic != XFS_ATTR_LEAF_MAGIC)
206 return false;
103 } 207 }
208 if (ichdr.count == 0)
209 return false;
210
211 /* XXX: need to range check rest of attr header values */
212 /* XXX: hash order check? */
213
214 return true;
104} 215}
105 216
106static void 217static void
107xfs_attr_leaf_read_verify( 218xfs_attr3_leaf_write_verify(
108 struct xfs_buf *bp) 219 struct xfs_buf *bp)
109{ 220{
110 xfs_attr_leaf_verify(bp); 221 struct xfs_mount *mp = bp->b_target->bt_mount;
222 struct xfs_buf_log_item *bip = bp->b_fspriv;
223 struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr;
224
225 if (!xfs_attr3_leaf_verify(bp)) {
226 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
227 xfs_buf_ioerror(bp, EFSCORRUPTED);
228 return;
229 }
230
231 if (!xfs_sb_version_hascrc(&mp->m_sb))
232 return;
233
234 if (bip)
235 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
236
237 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_ATTR3_LEAF_CRC_OFF);
111} 238}
112 239
240/*
241 * leaf/node format detection on trees is sketchy, so a node read can be done on
242 * leaf level blocks when detection identifies the tree as a node format tree
243 * incorrectly. In this case, we need to swap the verifier to match the correct
244 * format of the block being read.
245 */
113static void 246static void
114xfs_attr_leaf_write_verify( 247xfs_attr3_leaf_read_verify(
115 struct xfs_buf *bp) 248 struct xfs_buf *bp)
116{ 249{
117 xfs_attr_leaf_verify(bp); 250 struct xfs_mount *mp = bp->b_target->bt_mount;
251
252 if ((xfs_sb_version_hascrc(&mp->m_sb) &&
253 !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
254 XFS_ATTR3_LEAF_CRC_OFF)) ||
255 !xfs_attr3_leaf_verify(bp)) {
256 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
257 xfs_buf_ioerror(bp, EFSCORRUPTED);
258 }
118} 259}
119 260
120const struct xfs_buf_ops xfs_attr_leaf_buf_ops = { 261const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = {
121 .verify_read = xfs_attr_leaf_read_verify, 262 .verify_read = xfs_attr3_leaf_read_verify,
122 .verify_write = xfs_attr_leaf_write_verify, 263 .verify_write = xfs_attr3_leaf_write_verify,
123}; 264};
124 265
125int 266int
126xfs_attr_leaf_read( 267xfs_attr3_leaf_read(
127 struct xfs_trans *tp, 268 struct xfs_trans *tp,
128 struct xfs_inode *dp, 269 struct xfs_inode *dp,
129 xfs_dablk_t bno, 270 xfs_dablk_t bno,
130 xfs_daddr_t mappedbno, 271 xfs_daddr_t mappedbno,
131 struct xfs_buf **bpp) 272 struct xfs_buf **bpp)
132{ 273{
133 return xfs_da_read_buf(tp, dp, bno, mappedbno, bpp, 274 int err;
134 XFS_ATTR_FORK, &xfs_attr_leaf_buf_ops); 275
276 err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp,
277 XFS_ATTR_FORK, &xfs_attr3_leaf_buf_ops);
278 if (!err && tp)
279 xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_ATTR_LEAF_BUF);
280 return err;
135} 281}
136 282
137/*======================================================================== 283/*========================================================================
@@ -172,7 +318,8 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
172 int dsize; 318 int dsize;
173 xfs_mount_t *mp = dp->i_mount; 319 xfs_mount_t *mp = dp->i_mount;
174 320
175 offset = (XFS_LITINO(mp) - bytes) >> 3; /* rounded down */ 321 /* rounded down */
322 offset = (XFS_LITINO(mp, dp->i_d.di_version) - bytes) >> 3;
176 323
177 switch (dp->i_d.di_format) { 324 switch (dp->i_d.di_format) {
178 case XFS_DINODE_FMT_DEV: 325 case XFS_DINODE_FMT_DEV:
@@ -231,7 +378,7 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
231 return 0; 378 return 0;
232 return dp->i_d.di_forkoff; 379 return dp->i_d.di_forkoff;
233 } 380 }
234 dsize = XFS_BMAP_BROOT_SPACE(dp->i_df.if_broot); 381 dsize = XFS_BMAP_BROOT_SPACE(mp, dp->i_df.if_broot);
235 break; 382 break;
236 } 383 }
237 384
@@ -243,7 +390,8 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
243 minforkoff = roundup(minforkoff, 8) >> 3; 390 minforkoff = roundup(minforkoff, 8) >> 3;
244 391
245 /* attr fork btree root can have at least this many key/ptr pairs */ 392 /* attr fork btree root can have at least this many key/ptr pairs */
246 maxforkoff = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS); 393 maxforkoff = XFS_LITINO(mp, dp->i_d.di_version) -
394 XFS_BMDR_SPACE_CALC(MINABTPTRS);
247 maxforkoff = maxforkoff >> 3; /* rounded down */ 395 maxforkoff = maxforkoff >> 3; /* rounded down */
248 396
249 if (offset >= maxforkoff) 397 if (offset >= maxforkoff)
@@ -557,7 +705,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
557 } 705 }
558 706
559 ASSERT(blkno == 0); 707 ASSERT(blkno == 0);
560 error = xfs_attr_leaf_create(args, blkno, &bp); 708 error = xfs_attr3_leaf_create(args, blkno, &bp);
561 if (error) { 709 if (error) {
562 error = xfs_da_shrink_inode(args, 0, bp); 710 error = xfs_da_shrink_inode(args, 0, bp);
563 bp = NULL; 711 bp = NULL;
@@ -586,9 +734,9 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
586 nargs.hashval = xfs_da_hashname(sfe->nameval, 734 nargs.hashval = xfs_da_hashname(sfe->nameval,
587 sfe->namelen); 735 sfe->namelen);
588 nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags); 736 nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags);
589 error = xfs_attr_leaf_lookup_int(bp, &nargs); /* set a->index */ 737 error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */
590 ASSERT(error == ENOATTR); 738 ASSERT(error == ENOATTR);
591 error = xfs_attr_leaf_add(bp, &nargs); 739 error = xfs_attr3_leaf_add(bp, &nargs);
592 ASSERT(error != ENOSPC); 740 ASSERT(error != ENOSPC);
593 if (error) 741 if (error)
594 goto out; 742 goto out;
@@ -801,7 +949,7 @@ xfs_attr_shortform_allfit(
801 continue; /* don't copy partial entries */ 949 continue; /* don't copy partial entries */
802 if (!(entry->flags & XFS_ATTR_LOCAL)) 950 if (!(entry->flags & XFS_ATTR_LOCAL))
803 return(0); 951 return(0);
804 name_loc = xfs_attr_leaf_name_local(leaf, i); 952 name_loc = xfs_attr3_leaf_name_local(leaf, i);
805 if (name_loc->namelen >= XFS_ATTR_SF_ENTSIZE_MAX) 953 if (name_loc->namelen >= XFS_ATTR_SF_ENTSIZE_MAX)
806 return(0); 954 return(0);
807 if (be16_to_cpu(name_loc->valuelen) >= XFS_ATTR_SF_ENTSIZE_MAX) 955 if (be16_to_cpu(name_loc->valuelen) >= XFS_ATTR_SF_ENTSIZE_MAX)
@@ -821,29 +969,34 @@ xfs_attr_shortform_allfit(
821 * Convert a leaf attribute list to shortform attribute list 969 * Convert a leaf attribute list to shortform attribute list
822 */ 970 */
823int 971int
824xfs_attr_leaf_to_shortform( 972xfs_attr3_leaf_to_shortform(
825 struct xfs_buf *bp, 973 struct xfs_buf *bp,
826 xfs_da_args_t *args, 974 struct xfs_da_args *args,
827 int forkoff) 975 int forkoff)
828{ 976{
829 xfs_attr_leafblock_t *leaf; 977 struct xfs_attr_leafblock *leaf;
830 xfs_attr_leaf_entry_t *entry; 978 struct xfs_attr3_icleaf_hdr ichdr;
831 xfs_attr_leaf_name_local_t *name_loc; 979 struct xfs_attr_leaf_entry *entry;
832 xfs_da_args_t nargs; 980 struct xfs_attr_leaf_name_local *name_loc;
833 xfs_inode_t *dp; 981 struct xfs_da_args nargs;
834 char *tmpbuffer; 982 struct xfs_inode *dp = args->dp;
835 int error, i; 983 char *tmpbuffer;
984 int error;
985 int i;
836 986
837 trace_xfs_attr_leaf_to_sf(args); 987 trace_xfs_attr_leaf_to_sf(args);
838 988
839 dp = args->dp;
840 tmpbuffer = kmem_alloc(XFS_LBSIZE(dp->i_mount), KM_SLEEP); 989 tmpbuffer = kmem_alloc(XFS_LBSIZE(dp->i_mount), KM_SLEEP);
841 ASSERT(tmpbuffer != NULL); 990 if (!tmpbuffer)
991 return ENOMEM;
842 992
843 ASSERT(bp != NULL);
844 memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(dp->i_mount)); 993 memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(dp->i_mount));
994
845 leaf = (xfs_attr_leafblock_t *)tmpbuffer; 995 leaf = (xfs_attr_leafblock_t *)tmpbuffer;
846 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 996 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
997 entry = xfs_attr3_leaf_entryp(leaf);
998
999 /* XXX (dgc): buffer is about to be marked stale - why zero it? */
847 memset(bp->b_addr, 0, XFS_LBSIZE(dp->i_mount)); 1000 memset(bp->b_addr, 0, XFS_LBSIZE(dp->i_mount));
848 1001
849 /* 1002 /*
@@ -873,14 +1026,14 @@ xfs_attr_leaf_to_shortform(
873 nargs.whichfork = XFS_ATTR_FORK; 1026 nargs.whichfork = XFS_ATTR_FORK;
874 nargs.trans = args->trans; 1027 nargs.trans = args->trans;
875 nargs.op_flags = XFS_DA_OP_OKNOENT; 1028 nargs.op_flags = XFS_DA_OP_OKNOENT;
876 entry = &leaf->entries[0]; 1029
877 for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) { 1030 for (i = 0; i < ichdr.count; entry++, i++) {
878 if (entry->flags & XFS_ATTR_INCOMPLETE) 1031 if (entry->flags & XFS_ATTR_INCOMPLETE)
879 continue; /* don't copy partial entries */ 1032 continue; /* don't copy partial entries */
880 if (!entry->nameidx) 1033 if (!entry->nameidx)
881 continue; 1034 continue;
882 ASSERT(entry->flags & XFS_ATTR_LOCAL); 1035 ASSERT(entry->flags & XFS_ATTR_LOCAL);
883 name_loc = xfs_attr_leaf_name_local(leaf, i); 1036 name_loc = xfs_attr3_leaf_name_local(leaf, i);
884 nargs.name = name_loc->nameval; 1037 nargs.name = name_loc->nameval;
885 nargs.namelen = name_loc->namelen; 1038 nargs.namelen = name_loc->namelen;
886 nargs.value = &name_loc->nameval[nargs.namelen]; 1039 nargs.value = &name_loc->nameval[nargs.namelen];
@@ -893,61 +1046,75 @@ xfs_attr_leaf_to_shortform(
893 1046
894out: 1047out:
895 kmem_free(tmpbuffer); 1048 kmem_free(tmpbuffer);
896 return(error); 1049 return error;
897} 1050}
898 1051
899/* 1052/*
900 * Convert from using a single leaf to a root node and a leaf. 1053 * Convert from using a single leaf to a root node and a leaf.
901 */ 1054 */
902int 1055int
903xfs_attr_leaf_to_node(xfs_da_args_t *args) 1056xfs_attr3_leaf_to_node(
1057 struct xfs_da_args *args)
904{ 1058{
905 xfs_attr_leafblock_t *leaf; 1059 struct xfs_attr_leafblock *leaf;
906 xfs_da_intnode_t *node; 1060 struct xfs_attr3_icleaf_hdr icleafhdr;
907 xfs_inode_t *dp; 1061 struct xfs_attr_leaf_entry *entries;
908 struct xfs_buf *bp1, *bp2; 1062 struct xfs_da_node_entry *btree;
909 xfs_dablk_t blkno; 1063 struct xfs_da3_icnode_hdr icnodehdr;
910 int error; 1064 struct xfs_da_intnode *node;
1065 struct xfs_inode *dp = args->dp;
1066 struct xfs_mount *mp = dp->i_mount;
1067 struct xfs_buf *bp1 = NULL;
1068 struct xfs_buf *bp2 = NULL;
1069 xfs_dablk_t blkno;
1070 int error;
911 1071
912 trace_xfs_attr_leaf_to_node(args); 1072 trace_xfs_attr_leaf_to_node(args);
913 1073
914 dp = args->dp;
915 bp1 = bp2 = NULL;
916 error = xfs_da_grow_inode(args, &blkno); 1074 error = xfs_da_grow_inode(args, &blkno);
917 if (error) 1075 if (error)
918 goto out; 1076 goto out;
919 error = xfs_attr_leaf_read(args->trans, args->dp, 0, -1, &bp1); 1077 error = xfs_attr3_leaf_read(args->trans, dp, 0, -1, &bp1);
920 if (error) 1078 if (error)
921 goto out; 1079 goto out;
922 1080
923 bp2 = NULL; 1081 error = xfs_da_get_buf(args->trans, dp, blkno, -1, &bp2, XFS_ATTR_FORK);
924 error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp2,
925 XFS_ATTR_FORK);
926 if (error) 1082 if (error)
927 goto out; 1083 goto out;
1084
1085 /* copy leaf to new buffer, update identifiers */
1086 xfs_trans_buf_set_type(args->trans, bp2, XFS_BLFT_ATTR_LEAF_BUF);
928 bp2->b_ops = bp1->b_ops; 1087 bp2->b_ops = bp1->b_ops;
929 memcpy(bp2->b_addr, bp1->b_addr, XFS_LBSIZE(dp->i_mount)); 1088 memcpy(bp2->b_addr, bp1->b_addr, XFS_LBSIZE(mp));
930 bp1 = NULL; 1089 if (xfs_sb_version_hascrc(&mp->m_sb)) {
931 xfs_trans_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1); 1090 struct xfs_da3_blkinfo *hdr3 = bp2->b_addr;
1091 hdr3->blkno = cpu_to_be64(bp2->b_bn);
1092 }
1093 xfs_trans_log_buf(args->trans, bp2, 0, XFS_LBSIZE(mp) - 1);
932 1094
933 /* 1095 /*
934 * Set up the new root node. 1096 * Set up the new root node.
935 */ 1097 */
936 error = xfs_da_node_create(args, 0, 1, &bp1, XFS_ATTR_FORK); 1098 error = xfs_da3_node_create(args, 0, 1, &bp1, XFS_ATTR_FORK);
937 if (error) 1099 if (error)
938 goto out; 1100 goto out;
939 node = bp1->b_addr; 1101 node = bp1->b_addr;
1102 xfs_da3_node_hdr_from_disk(&icnodehdr, node);
1103 btree = xfs_da3_node_tree_p(node);
1104
940 leaf = bp2->b_addr; 1105 leaf = bp2->b_addr;
941 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 1106 xfs_attr3_leaf_hdr_from_disk(&icleafhdr, leaf);
1107 entries = xfs_attr3_leaf_entryp(leaf);
1108
942 /* both on-disk, don't endian-flip twice */ 1109 /* both on-disk, don't endian-flip twice */
943 node->btree[0].hashval = 1110 btree[0].hashval = entries[icleafhdr.count - 1].hashval;
944 leaf->entries[be16_to_cpu(leaf->hdr.count)-1 ].hashval; 1111 btree[0].before = cpu_to_be32(blkno);
945 node->btree[0].before = cpu_to_be32(blkno); 1112 icnodehdr.count = 1;
946 node->hdr.count = cpu_to_be16(1); 1113 xfs_da3_node_hdr_to_disk(node, &icnodehdr);
947 xfs_trans_log_buf(args->trans, bp1, 0, XFS_LBSIZE(dp->i_mount) - 1); 1114 xfs_trans_log_buf(args->trans, bp1, 0, XFS_LBSIZE(mp) - 1);
948 error = 0; 1115 error = 0;
949out: 1116out:
950 return(error); 1117 return error;
951} 1118}
952 1119
953 1120
@@ -960,52 +1127,63 @@ out:
960 * or a leaf in a node attribute list. 1127 * or a leaf in a node attribute list.
961 */ 1128 */
962STATIC int 1129STATIC int
963xfs_attr_leaf_create( 1130xfs_attr3_leaf_create(
964 xfs_da_args_t *args, 1131 struct xfs_da_args *args,
965 xfs_dablk_t blkno, 1132 xfs_dablk_t blkno,
966 struct xfs_buf **bpp) 1133 struct xfs_buf **bpp)
967{ 1134{
968 xfs_attr_leafblock_t *leaf; 1135 struct xfs_attr_leafblock *leaf;
969 xfs_attr_leaf_hdr_t *hdr; 1136 struct xfs_attr3_icleaf_hdr ichdr;
970 xfs_inode_t *dp; 1137 struct xfs_inode *dp = args->dp;
971 struct xfs_buf *bp; 1138 struct xfs_mount *mp = dp->i_mount;
972 int error; 1139 struct xfs_buf *bp;
1140 int error;
973 1141
974 trace_xfs_attr_leaf_create(args); 1142 trace_xfs_attr_leaf_create(args);
975 1143
976 dp = args->dp;
977 ASSERT(dp != NULL);
978 error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp, 1144 error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp,
979 XFS_ATTR_FORK); 1145 XFS_ATTR_FORK);
980 if (error) 1146 if (error)
981 return(error); 1147 return error;
982 bp->b_ops = &xfs_attr_leaf_buf_ops; 1148 bp->b_ops = &xfs_attr3_leaf_buf_ops;
1149 xfs_trans_buf_set_type(args->trans, bp, XFS_BLFT_ATTR_LEAF_BUF);
983 leaf = bp->b_addr; 1150 leaf = bp->b_addr;
984 memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount)); 1151 memset(leaf, 0, XFS_LBSIZE(mp));
985 hdr = &leaf->hdr;
986 hdr->info.magic = cpu_to_be16(XFS_ATTR_LEAF_MAGIC);
987 hdr->firstused = cpu_to_be16(XFS_LBSIZE(dp->i_mount));
988 if (!hdr->firstused) {
989 hdr->firstused = cpu_to_be16(
990 XFS_LBSIZE(dp->i_mount) - XFS_ATTR_LEAF_NAME_ALIGN);
991 }
992 1152
993 hdr->freemap[0].base = cpu_to_be16(sizeof(xfs_attr_leaf_hdr_t)); 1153 memset(&ichdr, 0, sizeof(ichdr));
994 hdr->freemap[0].size = cpu_to_be16(be16_to_cpu(hdr->firstused) - 1154 ichdr.firstused = XFS_LBSIZE(mp);
995 sizeof(xfs_attr_leaf_hdr_t));
996 1155
997 xfs_trans_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1); 1156 if (xfs_sb_version_hascrc(&mp->m_sb)) {
1157 struct xfs_da3_blkinfo *hdr3 = bp->b_addr;
1158
1159 ichdr.magic = XFS_ATTR3_LEAF_MAGIC;
1160
1161 hdr3->blkno = cpu_to_be64(bp->b_bn);
1162 hdr3->owner = cpu_to_be64(dp->i_ino);
1163 uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid);
1164
1165 ichdr.freemap[0].base = sizeof(struct xfs_attr3_leaf_hdr);
1166 } else {
1167 ichdr.magic = XFS_ATTR_LEAF_MAGIC;
1168 ichdr.freemap[0].base = sizeof(struct xfs_attr_leaf_hdr);
1169 }
1170 ichdr.freemap[0].size = ichdr.firstused - ichdr.freemap[0].base;
1171
1172 xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr);
1173 xfs_trans_log_buf(args->trans, bp, 0, XFS_LBSIZE(mp) - 1);
998 1174
999 *bpp = bp; 1175 *bpp = bp;
1000 return(0); 1176 return 0;
1001} 1177}
1002 1178
1003/* 1179/*
1004 * Split the leaf node, rebalance, then add the new entry. 1180 * Split the leaf node, rebalance, then add the new entry.
1005 */ 1181 */
1006int 1182int
1007xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, 1183xfs_attr3_leaf_split(
1008 xfs_da_state_blk_t *newblk) 1184 struct xfs_da_state *state,
1185 struct xfs_da_state_blk *oldblk,
1186 struct xfs_da_state_blk *newblk)
1009{ 1187{
1010 xfs_dablk_t blkno; 1188 xfs_dablk_t blkno;
1011 int error; 1189 int error;
@@ -1019,7 +1197,7 @@ xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
1019 error = xfs_da_grow_inode(state->args, &blkno); 1197 error = xfs_da_grow_inode(state->args, &blkno);
1020 if (error) 1198 if (error)
1021 return(error); 1199 return(error);
1022 error = xfs_attr_leaf_create(state->args, blkno, &newblk->bp); 1200 error = xfs_attr3_leaf_create(state->args, blkno, &newblk->bp);
1023 if (error) 1201 if (error)
1024 return(error); 1202 return(error);
1025 newblk->blkno = blkno; 1203 newblk->blkno = blkno;
@@ -1029,8 +1207,8 @@ xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
1029 * Rebalance the entries across the two leaves. 1207 * Rebalance the entries across the two leaves.
1030 * NOTE: rebalance() currently depends on the 2nd block being empty. 1208 * NOTE: rebalance() currently depends on the 2nd block being empty.
1031 */ 1209 */
1032 xfs_attr_leaf_rebalance(state, oldblk, newblk); 1210 xfs_attr3_leaf_rebalance(state, oldblk, newblk);
1033 error = xfs_da_blk_link(state, oldblk, newblk); 1211 error = xfs_da3_blk_link(state, oldblk, newblk);
1034 if (error) 1212 if (error)
1035 return(error); 1213 return(error);
1036 1214
@@ -1043,10 +1221,10 @@ xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
1043 */ 1221 */
1044 if (state->inleaf) { 1222 if (state->inleaf) {
1045 trace_xfs_attr_leaf_add_old(state->args); 1223 trace_xfs_attr_leaf_add_old(state->args);
1046 error = xfs_attr_leaf_add(oldblk->bp, state->args); 1224 error = xfs_attr3_leaf_add(oldblk->bp, state->args);
1047 } else { 1225 } else {
1048 trace_xfs_attr_leaf_add_new(state->args); 1226 trace_xfs_attr_leaf_add_new(state->args);
1049 error = xfs_attr_leaf_add(newblk->bp, state->args); 1227 error = xfs_attr3_leaf_add(newblk->bp, state->args);
1050 } 1228 }
1051 1229
1052 /* 1230 /*
@@ -1061,22 +1239,23 @@ xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
1061 * Add a name to the leaf attribute list structure. 1239 * Add a name to the leaf attribute list structure.
1062 */ 1240 */
1063int 1241int
1064xfs_attr_leaf_add( 1242xfs_attr3_leaf_add(
1065 struct xfs_buf *bp, 1243 struct xfs_buf *bp,
1066 struct xfs_da_args *args) 1244 struct xfs_da_args *args)
1067{ 1245{
1068 xfs_attr_leafblock_t *leaf; 1246 struct xfs_attr_leafblock *leaf;
1069 xfs_attr_leaf_hdr_t *hdr; 1247 struct xfs_attr3_icleaf_hdr ichdr;
1070 xfs_attr_leaf_map_t *map; 1248 int tablesize;
1071 int tablesize, entsize, sum, tmp, i; 1249 int entsize;
1250 int sum;
1251 int tmp;
1252 int i;
1072 1253
1073 trace_xfs_attr_leaf_add(args); 1254 trace_xfs_attr_leaf_add(args);
1074 1255
1075 leaf = bp->b_addr; 1256 leaf = bp->b_addr;
1076 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 1257 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
1077 ASSERT((args->index >= 0) 1258 ASSERT(args->index >= 0 && args->index <= ichdr.count);
1078 && (args->index <= be16_to_cpu(leaf->hdr.count)));
1079 hdr = &leaf->hdr;
1080 entsize = xfs_attr_leaf_newentsize(args->namelen, args->valuelen, 1259 entsize = xfs_attr_leaf_newentsize(args->namelen, args->valuelen,
1081 args->trans->t_mountp->m_sb.sb_blocksize, NULL); 1260 args->trans->t_mountp->m_sb.sb_blocksize, NULL);
1082 1261
@@ -1084,25 +1263,23 @@ xfs_attr_leaf_add(
1084 * Search through freemap for first-fit on new name length. 1263 * Search through freemap for first-fit on new name length.
1085 * (may need to figure in size of entry struct too) 1264 * (may need to figure in size of entry struct too)
1086 */ 1265 */
1087 tablesize = (be16_to_cpu(hdr->count) + 1) 1266 tablesize = (ichdr.count + 1) * sizeof(xfs_attr_leaf_entry_t)
1088 * sizeof(xfs_attr_leaf_entry_t) 1267 + xfs_attr3_leaf_hdr_size(leaf);
1089 + sizeof(xfs_attr_leaf_hdr_t); 1268 for (sum = 0, i = XFS_ATTR_LEAF_MAPSIZE - 1; i >= 0; i--) {
1090 map = &hdr->freemap[XFS_ATTR_LEAF_MAPSIZE-1]; 1269 if (tablesize > ichdr.firstused) {
1091 for (sum = 0, i = XFS_ATTR_LEAF_MAPSIZE-1; i >= 0; map--, i--) { 1270 sum += ichdr.freemap[i].size;
1092 if (tablesize > be16_to_cpu(hdr->firstused)) {
1093 sum += be16_to_cpu(map->size);
1094 continue; 1271 continue;
1095 } 1272 }
1096 if (!map->size) 1273 if (!ichdr.freemap[i].size)
1097 continue; /* no space in this map */ 1274 continue; /* no space in this map */
1098 tmp = entsize; 1275 tmp = entsize;
1099 if (be16_to_cpu(map->base) < be16_to_cpu(hdr->firstused)) 1276 if (ichdr.freemap[i].base < ichdr.firstused)
1100 tmp += sizeof(xfs_attr_leaf_entry_t); 1277 tmp += sizeof(xfs_attr_leaf_entry_t);
1101 if (be16_to_cpu(map->size) >= tmp) { 1278 if (ichdr.freemap[i].size >= tmp) {
1102 tmp = xfs_attr_leaf_add_work(bp, args, i); 1279 tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, i);
1103 return(tmp); 1280 goto out_log_hdr;
1104 } 1281 }
1105 sum += be16_to_cpu(map->size); 1282 sum += ichdr.freemap[i].size;
1106 } 1283 }
1107 1284
1108 /* 1285 /*
@@ -1110,82 +1287,89 @@ xfs_attr_leaf_add(
1110 * and we don't have enough freespace, then compaction will do us 1287 * and we don't have enough freespace, then compaction will do us
1111 * no good and we should just give up. 1288 * no good and we should just give up.
1112 */ 1289 */
1113 if (!hdr->holes && (sum < entsize)) 1290 if (!ichdr.holes && sum < entsize)
1114 return(XFS_ERROR(ENOSPC)); 1291 return XFS_ERROR(ENOSPC);
1115 1292
1116 /* 1293 /*
1117 * Compact the entries to coalesce free space. 1294 * Compact the entries to coalesce free space.
1118 * This may change the hdr->count via dropping INCOMPLETE entries. 1295 * This may change the hdr->count via dropping INCOMPLETE entries.
1119 */ 1296 */
1120 xfs_attr_leaf_compact(args, bp); 1297 xfs_attr3_leaf_compact(args, &ichdr, bp);
1121 1298
1122 /* 1299 /*
1123 * After compaction, the block is guaranteed to have only one 1300 * After compaction, the block is guaranteed to have only one
1124 * free region, in freemap[0]. If it is not big enough, give up. 1301 * free region, in freemap[0]. If it is not big enough, give up.
1125 */ 1302 */
1126 if (be16_to_cpu(hdr->freemap[0].size) 1303 if (ichdr.freemap[0].size < (entsize + sizeof(xfs_attr_leaf_entry_t))) {
1127 < (entsize + sizeof(xfs_attr_leaf_entry_t))) 1304 tmp = ENOSPC;
1128 return(XFS_ERROR(ENOSPC)); 1305 goto out_log_hdr;
1306 }
1307
1308 tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, 0);
1129 1309
1130 return(xfs_attr_leaf_add_work(bp, args, 0)); 1310out_log_hdr:
1311 xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr);
1312 xfs_trans_log_buf(args->trans, bp,
1313 XFS_DA_LOGRANGE(leaf, &leaf->hdr,
1314 xfs_attr3_leaf_hdr_size(leaf)));
1315 return tmp;
1131} 1316}
1132 1317
1133/* 1318/*
1134 * Add a name to a leaf attribute list structure. 1319 * Add a name to a leaf attribute list structure.
1135 */ 1320 */
1136STATIC int 1321STATIC int
1137xfs_attr_leaf_add_work( 1322xfs_attr3_leaf_add_work(
1138 struct xfs_buf *bp, 1323 struct xfs_buf *bp,
1139 xfs_da_args_t *args, 1324 struct xfs_attr3_icleaf_hdr *ichdr,
1140 int mapindex) 1325 struct xfs_da_args *args,
1326 int mapindex)
1141{ 1327{
1142 xfs_attr_leafblock_t *leaf; 1328 struct xfs_attr_leafblock *leaf;
1143 xfs_attr_leaf_hdr_t *hdr; 1329 struct xfs_attr_leaf_entry *entry;
1144 xfs_attr_leaf_entry_t *entry; 1330 struct xfs_attr_leaf_name_local *name_loc;
1145 xfs_attr_leaf_name_local_t *name_loc; 1331 struct xfs_attr_leaf_name_remote *name_rmt;
1146 xfs_attr_leaf_name_remote_t *name_rmt; 1332 struct xfs_mount *mp;
1147 xfs_attr_leaf_map_t *map; 1333 int tmp;
1148 xfs_mount_t *mp; 1334 int i;
1149 int tmp, i;
1150 1335
1151 trace_xfs_attr_leaf_add_work(args); 1336 trace_xfs_attr_leaf_add_work(args);
1152 1337
1153 leaf = bp->b_addr; 1338 leaf = bp->b_addr;
1154 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 1339 ASSERT(mapindex >= 0 && mapindex < XFS_ATTR_LEAF_MAPSIZE);
1155 hdr = &leaf->hdr; 1340 ASSERT(args->index >= 0 && args->index <= ichdr->count);
1156 ASSERT((mapindex >= 0) && (mapindex < XFS_ATTR_LEAF_MAPSIZE));
1157 ASSERT((args->index >= 0) && (args->index <= be16_to_cpu(hdr->count)));
1158 1341
1159 /* 1342 /*
1160 * Force open some space in the entry array and fill it in. 1343 * Force open some space in the entry array and fill it in.
1161 */ 1344 */
1162 entry = &leaf->entries[args->index]; 1345 entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
1163 if (args->index < be16_to_cpu(hdr->count)) { 1346 if (args->index < ichdr->count) {
1164 tmp = be16_to_cpu(hdr->count) - args->index; 1347 tmp = ichdr->count - args->index;
1165 tmp *= sizeof(xfs_attr_leaf_entry_t); 1348 tmp *= sizeof(xfs_attr_leaf_entry_t);
1166 memmove((char *)(entry+1), (char *)entry, tmp); 1349 memmove(entry + 1, entry, tmp);
1167 xfs_trans_log_buf(args->trans, bp, 1350 xfs_trans_log_buf(args->trans, bp,
1168 XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry))); 1351 XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry)));
1169 } 1352 }
1170 be16_add_cpu(&hdr->count, 1); 1353 ichdr->count++;
1171 1354
1172 /* 1355 /*
1173 * Allocate space for the new string (at the end of the run). 1356 * Allocate space for the new string (at the end of the run).
1174 */ 1357 */
1175 map = &hdr->freemap[mapindex];
1176 mp = args->trans->t_mountp; 1358 mp = args->trans->t_mountp;
1177 ASSERT(be16_to_cpu(map->base) < XFS_LBSIZE(mp)); 1359 ASSERT(ichdr->freemap[mapindex].base < XFS_LBSIZE(mp));
1178 ASSERT((be16_to_cpu(map->base) & 0x3) == 0); 1360 ASSERT((ichdr->freemap[mapindex].base & 0x3) == 0);
1179 ASSERT(be16_to_cpu(map->size) >= 1361 ASSERT(ichdr->freemap[mapindex].size >=
1180 xfs_attr_leaf_newentsize(args->namelen, args->valuelen, 1362 xfs_attr_leaf_newentsize(args->namelen, args->valuelen,
1181 mp->m_sb.sb_blocksize, NULL)); 1363 mp->m_sb.sb_blocksize, NULL));
1182 ASSERT(be16_to_cpu(map->size) < XFS_LBSIZE(mp)); 1364 ASSERT(ichdr->freemap[mapindex].size < XFS_LBSIZE(mp));
1183 ASSERT((be16_to_cpu(map->size) & 0x3) == 0); 1365 ASSERT((ichdr->freemap[mapindex].size & 0x3) == 0);
1184 be16_add_cpu(&map->size, 1366
1185 -xfs_attr_leaf_newentsize(args->namelen, args->valuelen, 1367 ichdr->freemap[mapindex].size -=
1186 mp->m_sb.sb_blocksize, &tmp)); 1368 xfs_attr_leaf_newentsize(args->namelen, args->valuelen,
1187 entry->nameidx = cpu_to_be16(be16_to_cpu(map->base) + 1369 mp->m_sb.sb_blocksize, &tmp);
1188 be16_to_cpu(map->size)); 1370
1371 entry->nameidx = cpu_to_be16(ichdr->freemap[mapindex].base +
1372 ichdr->freemap[mapindex].size);
1189 entry->hashval = cpu_to_be32(args->hashval); 1373 entry->hashval = cpu_to_be32(args->hashval);
1190 entry->flags = tmp ? XFS_ATTR_LOCAL : 0; 1374 entry->flags = tmp ? XFS_ATTR_LOCAL : 0;
1191 entry->flags |= XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags); 1375 entry->flags |= XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags);
@@ -1200,7 +1384,7 @@ xfs_attr_leaf_add_work(
1200 XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry))); 1384 XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
1201 ASSERT((args->index == 0) || 1385 ASSERT((args->index == 0) ||
1202 (be32_to_cpu(entry->hashval) >= be32_to_cpu((entry-1)->hashval))); 1386 (be32_to_cpu(entry->hashval) >= be32_to_cpu((entry-1)->hashval)));
1203 ASSERT((args->index == be16_to_cpu(hdr->count)-1) || 1387 ASSERT((args->index == ichdr->count - 1) ||
1204 (be32_to_cpu(entry->hashval) <= be32_to_cpu((entry+1)->hashval))); 1388 (be32_to_cpu(entry->hashval) <= be32_to_cpu((entry+1)->hashval)));
1205 1389
1206 /* 1390 /*
@@ -1211,14 +1395,14 @@ xfs_attr_leaf_add_work(
1211 * as part of this transaction (a split operation for example). 1395 * as part of this transaction (a split operation for example).
1212 */ 1396 */
1213 if (entry->flags & XFS_ATTR_LOCAL) { 1397 if (entry->flags & XFS_ATTR_LOCAL) {
1214 name_loc = xfs_attr_leaf_name_local(leaf, args->index); 1398 name_loc = xfs_attr3_leaf_name_local(leaf, args->index);
1215 name_loc->namelen = args->namelen; 1399 name_loc->namelen = args->namelen;
1216 name_loc->valuelen = cpu_to_be16(args->valuelen); 1400 name_loc->valuelen = cpu_to_be16(args->valuelen);
1217 memcpy((char *)name_loc->nameval, args->name, args->namelen); 1401 memcpy((char *)name_loc->nameval, args->name, args->namelen);
1218 memcpy((char *)&name_loc->nameval[args->namelen], args->value, 1402 memcpy((char *)&name_loc->nameval[args->namelen], args->value,
1219 be16_to_cpu(name_loc->valuelen)); 1403 be16_to_cpu(name_loc->valuelen));
1220 } else { 1404 } else {
1221 name_rmt = xfs_attr_leaf_name_remote(leaf, args->index); 1405 name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
1222 name_rmt->namelen = args->namelen; 1406 name_rmt->namelen = args->namelen;
1223 memcpy((char *)name_rmt->name, args->name, args->namelen); 1407 memcpy((char *)name_rmt->name, args->name, args->namelen);
1224 entry->flags |= XFS_ATTR_INCOMPLETE; 1408 entry->flags |= XFS_ATTR_INCOMPLETE;
@@ -1229,44 +1413,41 @@ xfs_attr_leaf_add_work(
1229 args->rmtblkcnt = XFS_B_TO_FSB(mp, args->valuelen); 1413 args->rmtblkcnt = XFS_B_TO_FSB(mp, args->valuelen);
1230 } 1414 }
1231 xfs_trans_log_buf(args->trans, bp, 1415 xfs_trans_log_buf(args->trans, bp,
1232 XFS_DA_LOGRANGE(leaf, xfs_attr_leaf_name(leaf, args->index), 1416 XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index),
1233 xfs_attr_leaf_entsize(leaf, args->index))); 1417 xfs_attr_leaf_entsize(leaf, args->index)));
1234 1418
1235 /* 1419 /*
1236 * Update the control info for this leaf node 1420 * Update the control info for this leaf node
1237 */ 1421 */
1238 if (be16_to_cpu(entry->nameidx) < be16_to_cpu(hdr->firstused)) { 1422 if (be16_to_cpu(entry->nameidx) < ichdr->firstused)
1239 /* both on-disk, don't endian-flip twice */ 1423 ichdr->firstused = be16_to_cpu(entry->nameidx);
1240 hdr->firstused = entry->nameidx; 1424
1241 } 1425 ASSERT(ichdr->firstused >= ichdr->count * sizeof(xfs_attr_leaf_entry_t)
1242 ASSERT(be16_to_cpu(hdr->firstused) >= 1426 + xfs_attr3_leaf_hdr_size(leaf));
1243 ((be16_to_cpu(hdr->count) * sizeof(*entry)) + sizeof(*hdr))); 1427 tmp = (ichdr->count - 1) * sizeof(xfs_attr_leaf_entry_t)
1244 tmp = (be16_to_cpu(hdr->count)-1) * sizeof(xfs_attr_leaf_entry_t) 1428 + xfs_attr3_leaf_hdr_size(leaf);
1245 + sizeof(xfs_attr_leaf_hdr_t); 1429
1246 map = &hdr->freemap[0]; 1430 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
1247 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; map++, i++) { 1431 if (ichdr->freemap[i].base == tmp) {
1248 if (be16_to_cpu(map->base) == tmp) { 1432 ichdr->freemap[i].base += sizeof(xfs_attr_leaf_entry_t);
1249 be16_add_cpu(&map->base, sizeof(xfs_attr_leaf_entry_t)); 1433 ichdr->freemap[i].size -= sizeof(xfs_attr_leaf_entry_t);
1250 be16_add_cpu(&map->size,
1251 -((int)sizeof(xfs_attr_leaf_entry_t)));
1252 } 1434 }
1253 } 1435 }
1254 be16_add_cpu(&hdr->usedbytes, xfs_attr_leaf_entsize(leaf, args->index)); 1436 ichdr->usedbytes += xfs_attr_leaf_entsize(leaf, args->index);
1255 xfs_trans_log_buf(args->trans, bp, 1437 return 0;
1256 XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr)));
1257 return(0);
1258} 1438}
1259 1439
1260/* 1440/*
1261 * Garbage collect a leaf attribute list block by copying it to a new buffer. 1441 * Garbage collect a leaf attribute list block by copying it to a new buffer.
1262 */ 1442 */
1263STATIC void 1443STATIC void
1264xfs_attr_leaf_compact( 1444xfs_attr3_leaf_compact(
1265 struct xfs_da_args *args, 1445 struct xfs_da_args *args,
1446 struct xfs_attr3_icleaf_hdr *ichdr_d,
1266 struct xfs_buf *bp) 1447 struct xfs_buf *bp)
1267{ 1448{
1268 xfs_attr_leafblock_t *leaf_s, *leaf_d; 1449 xfs_attr_leafblock_t *leaf_s, *leaf_d;
1269 xfs_attr_leaf_hdr_t *hdr_s, *hdr_d; 1450 struct xfs_attr3_icleaf_hdr ichdr_s;
1270 struct xfs_trans *trans = args->trans; 1451 struct xfs_trans *trans = args->trans;
1271 struct xfs_mount *mp = trans->t_mountp; 1452 struct xfs_mount *mp = trans->t_mountp;
1272 char *tmpbuffer; 1453 char *tmpbuffer;
@@ -1283,34 +1464,69 @@ xfs_attr_leaf_compact(
1283 */ 1464 */
1284 leaf_s = (xfs_attr_leafblock_t *)tmpbuffer; 1465 leaf_s = (xfs_attr_leafblock_t *)tmpbuffer;
1285 leaf_d = bp->b_addr; 1466 leaf_d = bp->b_addr;
1286 hdr_s = &leaf_s->hdr; 1467 ichdr_s = *ichdr_d; /* struct copy */
1287 hdr_d = &leaf_d->hdr; 1468 ichdr_d->firstused = XFS_LBSIZE(mp);
1288 hdr_d->info = hdr_s->info; /* struct copy */ 1469 ichdr_d->usedbytes = 0;
1289 hdr_d->firstused = cpu_to_be16(XFS_LBSIZE(mp)); 1470 ichdr_d->count = 0;
1290 /* handle truncation gracefully */ 1471 ichdr_d->holes = 0;
1291 if (!hdr_d->firstused) { 1472 ichdr_d->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_s);
1292 hdr_d->firstused = cpu_to_be16( 1473 ichdr_d->freemap[0].size = ichdr_d->firstused - ichdr_d->freemap[0].base;
1293 XFS_LBSIZE(mp) - XFS_ATTR_LEAF_NAME_ALIGN);
1294 }
1295 hdr_d->usedbytes = 0;
1296 hdr_d->count = 0;
1297 hdr_d->holes = 0;
1298 hdr_d->freemap[0].base = cpu_to_be16(sizeof(xfs_attr_leaf_hdr_t));
1299 hdr_d->freemap[0].size = cpu_to_be16(be16_to_cpu(hdr_d->firstused) -
1300 sizeof(xfs_attr_leaf_hdr_t));
1301 1474
1302 /* 1475 /*
1303 * Copy all entry's in the same (sorted) order, 1476 * Copy all entry's in the same (sorted) order,
1304 * but allocate name/value pairs packed and in sequence. 1477 * but allocate name/value pairs packed and in sequence.
1305 */ 1478 */
1306 xfs_attr_leaf_moveents(leaf_s, 0, leaf_d, 0, 1479 xfs_attr3_leaf_moveents(leaf_s, &ichdr_s, 0, leaf_d, ichdr_d, 0,
1307 be16_to_cpu(hdr_s->count), mp); 1480 ichdr_s.count, mp);
1481 /*
1482 * this logs the entire buffer, but the caller must write the header
1483 * back to the buffer when it is finished modifying it.
1484 */
1308 xfs_trans_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1); 1485 xfs_trans_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1);
1309 1486
1310 kmem_free(tmpbuffer); 1487 kmem_free(tmpbuffer);
1311} 1488}
1312 1489
1313/* 1490/*
1491 * Compare two leaf blocks "order".
1492 * Return 0 unless leaf2 should go before leaf1.
1493 */
1494static int
1495xfs_attr3_leaf_order(
1496 struct xfs_buf *leaf1_bp,
1497 struct xfs_attr3_icleaf_hdr *leaf1hdr,
1498 struct xfs_buf *leaf2_bp,
1499 struct xfs_attr3_icleaf_hdr *leaf2hdr)
1500{
1501 struct xfs_attr_leaf_entry *entries1;
1502 struct xfs_attr_leaf_entry *entries2;
1503
1504 entries1 = xfs_attr3_leaf_entryp(leaf1_bp->b_addr);
1505 entries2 = xfs_attr3_leaf_entryp(leaf2_bp->b_addr);
1506 if (leaf1hdr->count > 0 && leaf2hdr->count > 0 &&
1507 ((be32_to_cpu(entries2[0].hashval) <
1508 be32_to_cpu(entries1[0].hashval)) ||
1509 (be32_to_cpu(entries2[leaf2hdr->count - 1].hashval) <
1510 be32_to_cpu(entries1[leaf1hdr->count - 1].hashval)))) {
1511 return 1;
1512 }
1513 return 0;
1514}
1515
1516int
1517xfs_attr_leaf_order(
1518 struct xfs_buf *leaf1_bp,
1519 struct xfs_buf *leaf2_bp)
1520{
1521 struct xfs_attr3_icleaf_hdr ichdr1;
1522 struct xfs_attr3_icleaf_hdr ichdr2;
1523
1524 xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1_bp->b_addr);
1525 xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2_bp->b_addr);
1526 return xfs_attr3_leaf_order(leaf1_bp, &ichdr1, leaf2_bp, &ichdr2);
1527}
1528
1529/*
1314 * Redistribute the attribute list entries between two leaf nodes, 1530 * Redistribute the attribute list entries between two leaf nodes,
1315 * taking into account the size of the new entry. 1531 * taking into account the size of the new entry.
1316 * 1532 *
@@ -1323,14 +1539,23 @@ xfs_attr_leaf_compact(
1323 * the "new" and "old" values can end up in different blocks. 1539 * the "new" and "old" values can end up in different blocks.
1324 */ 1540 */
1325STATIC void 1541STATIC void
1326xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, 1542xfs_attr3_leaf_rebalance(
1327 xfs_da_state_blk_t *blk2) 1543 struct xfs_da_state *state,
1544 struct xfs_da_state_blk *blk1,
1545 struct xfs_da_state_blk *blk2)
1328{ 1546{
1329 xfs_da_args_t *args; 1547 struct xfs_da_args *args;
1330 xfs_da_state_blk_t *tmp_blk; 1548 struct xfs_attr_leafblock *leaf1;
1331 xfs_attr_leafblock_t *leaf1, *leaf2; 1549 struct xfs_attr_leafblock *leaf2;
1332 xfs_attr_leaf_hdr_t *hdr1, *hdr2; 1550 struct xfs_attr3_icleaf_hdr ichdr1;
1333 int count, totallen, max, space, swap; 1551 struct xfs_attr3_icleaf_hdr ichdr2;
1552 struct xfs_attr_leaf_entry *entries1;
1553 struct xfs_attr_leaf_entry *entries2;
1554 int count;
1555 int totallen;
1556 int max;
1557 int space;
1558 int swap;
1334 1559
1335 /* 1560 /*
1336 * Set up environment. 1561 * Set up environment.
@@ -1339,9 +1564,9 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
1339 ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC); 1564 ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC);
1340 leaf1 = blk1->bp->b_addr; 1565 leaf1 = blk1->bp->b_addr;
1341 leaf2 = blk2->bp->b_addr; 1566 leaf2 = blk2->bp->b_addr;
1342 ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 1567 xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1);
1343 ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 1568 xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2);
1344 ASSERT(leaf2->hdr.count == 0); 1569 ASSERT(ichdr2.count == 0);
1345 args = state->args; 1570 args = state->args;
1346 1571
1347 trace_xfs_attr_leaf_rebalance(args); 1572 trace_xfs_attr_leaf_rebalance(args);
@@ -1353,16 +1578,23 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
1353 * second block, this code should never set "swap". 1578 * second block, this code should never set "swap".
1354 */ 1579 */
1355 swap = 0; 1580 swap = 0;
1356 if (xfs_attr_leaf_order(blk1->bp, blk2->bp)) { 1581 if (xfs_attr3_leaf_order(blk1->bp, &ichdr1, blk2->bp, &ichdr2)) {
1582 struct xfs_da_state_blk *tmp_blk;
1583 struct xfs_attr3_icleaf_hdr tmp_ichdr;
1584
1357 tmp_blk = blk1; 1585 tmp_blk = blk1;
1358 blk1 = blk2; 1586 blk1 = blk2;
1359 blk2 = tmp_blk; 1587 blk2 = tmp_blk;
1588
1589 /* struct copies to swap them rather than reconverting */
1590 tmp_ichdr = ichdr1;
1591 ichdr1 = ichdr2;
1592 ichdr2 = tmp_ichdr;
1593
1360 leaf1 = blk1->bp->b_addr; 1594 leaf1 = blk1->bp->b_addr;
1361 leaf2 = blk2->bp->b_addr; 1595 leaf2 = blk2->bp->b_addr;
1362 swap = 1; 1596 swap = 1;
1363 } 1597 }
1364 hdr1 = &leaf1->hdr;
1365 hdr2 = &leaf2->hdr;
1366 1598
1367 /* 1599 /*
1368 * Examine entries until we reduce the absolute difference in 1600 * Examine entries until we reduce the absolute difference in
@@ -1372,41 +1604,39 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
1372 * "inleaf" is true if the new entry should be inserted into blk1. 1604 * "inleaf" is true if the new entry should be inserted into blk1.
1373 * If "swap" is also true, then reverse the sense of "inleaf". 1605 * If "swap" is also true, then reverse the sense of "inleaf".
1374 */ 1606 */
1375 state->inleaf = xfs_attr_leaf_figure_balance(state, blk1, blk2, 1607 state->inleaf = xfs_attr3_leaf_figure_balance(state, blk1, &ichdr1,
1376 &count, &totallen); 1608 blk2, &ichdr2,
1609 &count, &totallen);
1377 if (swap) 1610 if (swap)
1378 state->inleaf = !state->inleaf; 1611 state->inleaf = !state->inleaf;
1379 1612
1380 /* 1613 /*
1381 * Move any entries required from leaf to leaf: 1614 * Move any entries required from leaf to leaf:
1382 */ 1615 */
1383 if (count < be16_to_cpu(hdr1->count)) { 1616 if (count < ichdr1.count) {
1384 /* 1617 /*
1385 * Figure the total bytes to be added to the destination leaf. 1618 * Figure the total bytes to be added to the destination leaf.
1386 */ 1619 */
1387 /* number entries being moved */ 1620 /* number entries being moved */
1388 count = be16_to_cpu(hdr1->count) - count; 1621 count = ichdr1.count - count;
1389 space = be16_to_cpu(hdr1->usedbytes) - totallen; 1622 space = ichdr1.usedbytes - totallen;
1390 space += count * sizeof(xfs_attr_leaf_entry_t); 1623 space += count * sizeof(xfs_attr_leaf_entry_t);
1391 1624
1392 /* 1625 /*
1393 * leaf2 is the destination, compact it if it looks tight. 1626 * leaf2 is the destination, compact it if it looks tight.
1394 */ 1627 */
1395 max = be16_to_cpu(hdr2->firstused) 1628 max = ichdr2.firstused - xfs_attr3_leaf_hdr_size(leaf1);
1396 - sizeof(xfs_attr_leaf_hdr_t); 1629 max -= ichdr2.count * sizeof(xfs_attr_leaf_entry_t);
1397 max -= be16_to_cpu(hdr2->count) * sizeof(xfs_attr_leaf_entry_t);
1398 if (space > max) 1630 if (space > max)
1399 xfs_attr_leaf_compact(args, blk2->bp); 1631 xfs_attr3_leaf_compact(args, &ichdr2, blk2->bp);
1400 1632
1401 /* 1633 /*
1402 * Move high entries from leaf1 to low end of leaf2. 1634 * Move high entries from leaf1 to low end of leaf2.
1403 */ 1635 */
1404 xfs_attr_leaf_moveents(leaf1, be16_to_cpu(hdr1->count) - count, 1636 xfs_attr3_leaf_moveents(leaf1, &ichdr1, ichdr1.count - count,
1405 leaf2, 0, count, state->mp); 1637 leaf2, &ichdr2, 0, count, state->mp);
1406 1638
1407 xfs_trans_log_buf(args->trans, blk1->bp, 0, state->blocksize-1); 1639 } else if (count > ichdr1.count) {
1408 xfs_trans_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
1409 } else if (count > be16_to_cpu(hdr1->count)) {
1410 /* 1640 /*
1411 * I assert that since all callers pass in an empty 1641 * I assert that since all callers pass in an empty
1412 * second buffer, this code should never execute. 1642 * second buffer, this code should never execute.
@@ -1417,36 +1647,37 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
1417 * Figure the total bytes to be added to the destination leaf. 1647 * Figure the total bytes to be added to the destination leaf.
1418 */ 1648 */
1419 /* number entries being moved */ 1649 /* number entries being moved */
1420 count -= be16_to_cpu(hdr1->count); 1650 count -= ichdr1.count;
1421 space = totallen - be16_to_cpu(hdr1->usedbytes); 1651 space = totallen - ichdr1.usedbytes;
1422 space += count * sizeof(xfs_attr_leaf_entry_t); 1652 space += count * sizeof(xfs_attr_leaf_entry_t);
1423 1653
1424 /* 1654 /*
1425 * leaf1 is the destination, compact it if it looks tight. 1655 * leaf1 is the destination, compact it if it looks tight.
1426 */ 1656 */
1427 max = be16_to_cpu(hdr1->firstused) 1657 max = ichdr1.firstused - xfs_attr3_leaf_hdr_size(leaf1);
1428 - sizeof(xfs_attr_leaf_hdr_t); 1658 max -= ichdr1.count * sizeof(xfs_attr_leaf_entry_t);
1429 max -= be16_to_cpu(hdr1->count) * sizeof(xfs_attr_leaf_entry_t);
1430 if (space > max) 1659 if (space > max)
1431 xfs_attr_leaf_compact(args, blk1->bp); 1660 xfs_attr3_leaf_compact(args, &ichdr1, blk1->bp);
1432 1661
1433 /* 1662 /*
1434 * Move low entries from leaf2 to high end of leaf1. 1663 * Move low entries from leaf2 to high end of leaf1.
1435 */ 1664 */
1436 xfs_attr_leaf_moveents(leaf2, 0, leaf1, 1665 xfs_attr3_leaf_moveents(leaf2, &ichdr2, 0, leaf1, &ichdr1,
1437 be16_to_cpu(hdr1->count), count, state->mp); 1666 ichdr1.count, count, state->mp);
1438
1439 xfs_trans_log_buf(args->trans, blk1->bp, 0, state->blocksize-1);
1440 xfs_trans_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
1441 } 1667 }
1442 1668
1669 xfs_attr3_leaf_hdr_to_disk(leaf1, &ichdr1);
1670 xfs_attr3_leaf_hdr_to_disk(leaf2, &ichdr2);
1671 xfs_trans_log_buf(args->trans, blk1->bp, 0, state->blocksize-1);
1672 xfs_trans_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
1673
1443 /* 1674 /*
1444 * Copy out last hashval in each block for B-tree code. 1675 * Copy out last hashval in each block for B-tree code.
1445 */ 1676 */
1446 blk1->hashval = be32_to_cpu( 1677 entries1 = xfs_attr3_leaf_entryp(leaf1);
1447 leaf1->entries[be16_to_cpu(leaf1->hdr.count)-1].hashval); 1678 entries2 = xfs_attr3_leaf_entryp(leaf2);
1448 blk2->hashval = be32_to_cpu( 1679 blk1->hashval = be32_to_cpu(entries1[ichdr1.count - 1].hashval);
1449 leaf2->entries[be16_to_cpu(leaf2->hdr.count)-1].hashval); 1680 blk2->hashval = be32_to_cpu(entries2[ichdr2.count - 1].hashval);
1450 1681
1451 /* 1682 /*
1452 * Adjust the expected index for insertion. 1683 * Adjust the expected index for insertion.
@@ -1460,12 +1691,12 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
1460 * inserting. The index/blkno fields refer to the "old" entry, 1691 * inserting. The index/blkno fields refer to the "old" entry,
1461 * while the index2/blkno2 fields refer to the "new" entry. 1692 * while the index2/blkno2 fields refer to the "new" entry.
1462 */ 1693 */
1463 if (blk1->index > be16_to_cpu(leaf1->hdr.count)) { 1694 if (blk1->index > ichdr1.count) {
1464 ASSERT(state->inleaf == 0); 1695 ASSERT(state->inleaf == 0);
1465 blk2->index = blk1->index - be16_to_cpu(leaf1->hdr.count); 1696 blk2->index = blk1->index - ichdr1.count;
1466 args->index = args->index2 = blk2->index; 1697 args->index = args->index2 = blk2->index;
1467 args->blkno = args->blkno2 = blk2->blkno; 1698 args->blkno = args->blkno2 = blk2->blkno;
1468 } else if (blk1->index == be16_to_cpu(leaf1->hdr.count)) { 1699 } else if (blk1->index == ichdr1.count) {
1469 if (state->inleaf) { 1700 if (state->inleaf) {
1470 args->index = blk1->index; 1701 args->index = blk1->index;
1471 args->blkno = blk1->blkno; 1702 args->blkno = blk1->blkno;
@@ -1477,8 +1708,7 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
1477 * is already stored in blkno2/index2, so don't 1708 * is already stored in blkno2/index2, so don't
1478 * overwrite it overwise we corrupt the tree. 1709 * overwrite it overwise we corrupt the tree.
1479 */ 1710 */
1480 blk2->index = blk1->index 1711 blk2->index = blk1->index - ichdr1.count;
1481 - be16_to_cpu(leaf1->hdr.count);
1482 args->index = blk2->index; 1712 args->index = blk2->index;
1483 args->blkno = blk2->blkno; 1713 args->blkno = blk2->blkno;
1484 if (!state->extravalid) { 1714 if (!state->extravalid) {
@@ -1506,42 +1736,40 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
1506 * GROT: Do a double-split for this case? 1736 * GROT: Do a double-split for this case?
1507 */ 1737 */
1508STATIC int 1738STATIC int
1509xfs_attr_leaf_figure_balance(xfs_da_state_t *state, 1739xfs_attr3_leaf_figure_balance(
1510 xfs_da_state_blk_t *blk1, 1740 struct xfs_da_state *state,
1511 xfs_da_state_blk_t *blk2, 1741 struct xfs_da_state_blk *blk1,
1512 int *countarg, int *usedbytesarg) 1742 struct xfs_attr3_icleaf_hdr *ichdr1,
1743 struct xfs_da_state_blk *blk2,
1744 struct xfs_attr3_icleaf_hdr *ichdr2,
1745 int *countarg,
1746 int *usedbytesarg)
1513{ 1747{
1514 xfs_attr_leafblock_t *leaf1, *leaf2; 1748 struct xfs_attr_leafblock *leaf1 = blk1->bp->b_addr;
1515 xfs_attr_leaf_hdr_t *hdr1, *hdr2; 1749 struct xfs_attr_leafblock *leaf2 = blk2->bp->b_addr;
1516 xfs_attr_leaf_entry_t *entry; 1750 struct xfs_attr_leaf_entry *entry;
1517 int count, max, index, totallen, half; 1751 int count;
1518 int lastdelta, foundit, tmp; 1752 int max;
1519 1753 int index;
1520 /* 1754 int totallen = 0;
1521 * Set up environment. 1755 int half;
1522 */ 1756 int lastdelta;
1523 leaf1 = blk1->bp->b_addr; 1757 int foundit = 0;
1524 leaf2 = blk2->bp->b_addr; 1758 int tmp;
1525 hdr1 = &leaf1->hdr;
1526 hdr2 = &leaf2->hdr;
1527 foundit = 0;
1528 totallen = 0;
1529 1759
1530 /* 1760 /*
1531 * Examine entries until we reduce the absolute difference in 1761 * Examine entries until we reduce the absolute difference in
1532 * byte usage between the two blocks to a minimum. 1762 * byte usage between the two blocks to a minimum.
1533 */ 1763 */
1534 max = be16_to_cpu(hdr1->count) + be16_to_cpu(hdr2->count); 1764 max = ichdr1->count + ichdr2->count;
1535 half = (max+1) * sizeof(*entry); 1765 half = (max + 1) * sizeof(*entry);
1536 half += be16_to_cpu(hdr1->usedbytes) + 1766 half += ichdr1->usedbytes + ichdr2->usedbytes +
1537 be16_to_cpu(hdr2->usedbytes) + 1767 xfs_attr_leaf_newentsize(state->args->namelen,
1538 xfs_attr_leaf_newentsize( 1768 state->args->valuelen,
1539 state->args->namelen, 1769 state->blocksize, NULL);
1540 state->args->valuelen,
1541 state->blocksize, NULL);
1542 half /= 2; 1770 half /= 2;
1543 lastdelta = state->blocksize; 1771 lastdelta = state->blocksize;
1544 entry = &leaf1->entries[0]; 1772 entry = xfs_attr3_leaf_entryp(leaf1);
1545 for (count = index = 0; count < max; entry++, index++, count++) { 1773 for (count = index = 0; count < max; entry++, index++, count++) {
1546 1774
1547#define XFS_ATTR_ABS(A) (((A) < 0) ? -(A) : (A)) 1775#define XFS_ATTR_ABS(A) (((A) < 0) ? -(A) : (A))
@@ -1564,9 +1792,9 @@ xfs_attr_leaf_figure_balance(xfs_da_state_t *state,
1564 /* 1792 /*
1565 * Wrap around into the second block if necessary. 1793 * Wrap around into the second block if necessary.
1566 */ 1794 */
1567 if (count == be16_to_cpu(hdr1->count)) { 1795 if (count == ichdr1->count) {
1568 leaf1 = leaf2; 1796 leaf1 = leaf2;
1569 entry = &leaf1->entries[0]; 1797 entry = xfs_attr3_leaf_entryp(leaf1);
1570 index = 0; 1798 index = 0;
1571 } 1799 }
1572 1800
@@ -1597,7 +1825,7 @@ xfs_attr_leaf_figure_balance(xfs_da_state_t *state,
1597 1825
1598 *countarg = count; 1826 *countarg = count;
1599 *usedbytesarg = totallen; 1827 *usedbytesarg = totallen;
1600 return(foundit); 1828 return foundit;
1601} 1829}
1602 1830
1603/*======================================================================== 1831/*========================================================================
@@ -1616,14 +1844,20 @@ xfs_attr_leaf_figure_balance(xfs_da_state_t *state,
1616 * GROT: allow for INCOMPLETE entries in calculation. 1844 * GROT: allow for INCOMPLETE entries in calculation.
1617 */ 1845 */
1618int 1846int
1619xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action) 1847xfs_attr3_leaf_toosmall(
1848 struct xfs_da_state *state,
1849 int *action)
1620{ 1850{
1621 xfs_attr_leafblock_t *leaf; 1851 struct xfs_attr_leafblock *leaf;
1622 xfs_da_state_blk_t *blk; 1852 struct xfs_da_state_blk *blk;
1623 xfs_da_blkinfo_t *info; 1853 struct xfs_attr3_icleaf_hdr ichdr;
1624 int count, bytes, forward, error, retval, i; 1854 struct xfs_buf *bp;
1625 xfs_dablk_t blkno; 1855 xfs_dablk_t blkno;
1626 struct xfs_buf *bp; 1856 int bytes;
1857 int forward;
1858 int error;
1859 int retval;
1860 int i;
1627 1861
1628 trace_xfs_attr_leaf_toosmall(state->args); 1862 trace_xfs_attr_leaf_toosmall(state->args);
1629 1863
@@ -1633,13 +1867,11 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
1633 * to coalesce with a sibling. 1867 * to coalesce with a sibling.
1634 */ 1868 */
1635 blk = &state->path.blk[ state->path.active-1 ]; 1869 blk = &state->path.blk[ state->path.active-1 ];
1636 info = blk->bp->b_addr; 1870 leaf = blk->bp->b_addr;
1637 ASSERT(info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 1871 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
1638 leaf = (xfs_attr_leafblock_t *)info; 1872 bytes = xfs_attr3_leaf_hdr_size(leaf) +
1639 count = be16_to_cpu(leaf->hdr.count); 1873 ichdr.count * sizeof(xfs_attr_leaf_entry_t) +
1640 bytes = sizeof(xfs_attr_leaf_hdr_t) + 1874 ichdr.usedbytes;
1641 count * sizeof(xfs_attr_leaf_entry_t) +
1642 be16_to_cpu(leaf->hdr.usedbytes);
1643 if (bytes > (state->blocksize >> 1)) { 1875 if (bytes > (state->blocksize >> 1)) {
1644 *action = 0; /* blk over 50%, don't try to join */ 1876 *action = 0; /* blk over 50%, don't try to join */
1645 return(0); 1877 return(0);
@@ -1651,14 +1883,14 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
1651 * coalesce it with a sibling block. We choose (arbitrarily) 1883 * coalesce it with a sibling block. We choose (arbitrarily)
1652 * to merge with the forward block unless it is NULL. 1884 * to merge with the forward block unless it is NULL.
1653 */ 1885 */
1654 if (count == 0) { 1886 if (ichdr.count == 0) {
1655 /* 1887 /*
1656 * Make altpath point to the block we want to keep and 1888 * Make altpath point to the block we want to keep and
1657 * path point to the block we want to drop (this one). 1889 * path point to the block we want to drop (this one).
1658 */ 1890 */
1659 forward = (info->forw != 0); 1891 forward = (ichdr.forw != 0);
1660 memcpy(&state->altpath, &state->path, sizeof(state->path)); 1892 memcpy(&state->altpath, &state->path, sizeof(state->path));
1661 error = xfs_da_path_shift(state, &state->altpath, forward, 1893 error = xfs_da3_path_shift(state, &state->altpath, forward,
1662 0, &retval); 1894 0, &retval);
1663 if (error) 1895 if (error)
1664 return(error); 1896 return(error);
@@ -1667,7 +1899,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
1667 } else { 1899 } else {
1668 *action = 2; 1900 *action = 2;
1669 } 1901 }
1670 return(0); 1902 return 0;
1671 } 1903 }
1672 1904
1673 /* 1905 /*
@@ -1678,28 +1910,28 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
1678 * to shrink an attribute list over time. 1910 * to shrink an attribute list over time.
1679 */ 1911 */
1680 /* start with smaller blk num */ 1912 /* start with smaller blk num */
1681 forward = (be32_to_cpu(info->forw) < be32_to_cpu(info->back)); 1913 forward = ichdr.forw < ichdr.back;
1682 for (i = 0; i < 2; forward = !forward, i++) { 1914 for (i = 0; i < 2; forward = !forward, i++) {
1915 struct xfs_attr3_icleaf_hdr ichdr2;
1683 if (forward) 1916 if (forward)
1684 blkno = be32_to_cpu(info->forw); 1917 blkno = ichdr.forw;
1685 else 1918 else
1686 blkno = be32_to_cpu(info->back); 1919 blkno = ichdr.back;
1687 if (blkno == 0) 1920 if (blkno == 0)
1688 continue; 1921 continue;
1689 error = xfs_attr_leaf_read(state->args->trans, state->args->dp, 1922 error = xfs_attr3_leaf_read(state->args->trans, state->args->dp,
1690 blkno, -1, &bp); 1923 blkno, -1, &bp);
1691 if (error) 1924 if (error)
1692 return(error); 1925 return(error);
1693 1926
1694 leaf = (xfs_attr_leafblock_t *)info; 1927 xfs_attr3_leaf_hdr_from_disk(&ichdr2, bp->b_addr);
1695 count = be16_to_cpu(leaf->hdr.count); 1928
1696 bytes = state->blocksize - (state->blocksize>>2); 1929 bytes = state->blocksize - (state->blocksize >> 2) -
1697 bytes -= be16_to_cpu(leaf->hdr.usedbytes); 1930 ichdr.usedbytes - ichdr2.usedbytes -
1698 leaf = bp->b_addr; 1931 ((ichdr.count + ichdr2.count) *
1699 count += be16_to_cpu(leaf->hdr.count); 1932 sizeof(xfs_attr_leaf_entry_t)) -
1700 bytes -= be16_to_cpu(leaf->hdr.usedbytes); 1933 xfs_attr3_leaf_hdr_size(leaf);
1701 bytes -= count * sizeof(xfs_attr_leaf_entry_t); 1934
1702 bytes -= sizeof(xfs_attr_leaf_hdr_t);
1703 xfs_trans_brelse(state->args->trans, bp); 1935 xfs_trans_brelse(state->args->trans, bp);
1704 if (bytes >= 0) 1936 if (bytes >= 0)
1705 break; /* fits with at least 25% to spare */ 1937 break; /* fits with at least 25% to spare */
@@ -1715,10 +1947,10 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
1715 */ 1947 */
1716 memcpy(&state->altpath, &state->path, sizeof(state->path)); 1948 memcpy(&state->altpath, &state->path, sizeof(state->path));
1717 if (blkno < blk->blkno) { 1949 if (blkno < blk->blkno) {
1718 error = xfs_da_path_shift(state, &state->altpath, forward, 1950 error = xfs_da3_path_shift(state, &state->altpath, forward,
1719 0, &retval); 1951 0, &retval);
1720 } else { 1952 } else {
1721 error = xfs_da_path_shift(state, &state->path, forward, 1953 error = xfs_da3_path_shift(state, &state->path, forward,
1722 0, &retval); 1954 0, &retval);
1723 } 1955 }
1724 if (error) 1956 if (error)
@@ -1738,32 +1970,35 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
1738 * If two leaves are 37% full, when combined they will leave 25% free. 1970 * If two leaves are 37% full, when combined they will leave 25% free.
1739 */ 1971 */
1740int 1972int
1741xfs_attr_leaf_remove( 1973xfs_attr3_leaf_remove(
1742 struct xfs_buf *bp, 1974 struct xfs_buf *bp,
1743 xfs_da_args_t *args) 1975 struct xfs_da_args *args)
1744{ 1976{
1745 xfs_attr_leafblock_t *leaf; 1977 struct xfs_attr_leafblock *leaf;
1746 xfs_attr_leaf_hdr_t *hdr; 1978 struct xfs_attr3_icleaf_hdr ichdr;
1747 xfs_attr_leaf_map_t *map; 1979 struct xfs_attr_leaf_entry *entry;
1748 xfs_attr_leaf_entry_t *entry; 1980 struct xfs_mount *mp = args->trans->t_mountp;
1749 int before, after, smallest, entsize; 1981 int before;
1750 int tablesize, tmp, i; 1982 int after;
1751 xfs_mount_t *mp; 1983 int smallest;
1984 int entsize;
1985 int tablesize;
1986 int tmp;
1987 int i;
1752 1988
1753 trace_xfs_attr_leaf_remove(args); 1989 trace_xfs_attr_leaf_remove(args);
1754 1990
1755 leaf = bp->b_addr; 1991 leaf = bp->b_addr;
1756 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 1992 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
1757 hdr = &leaf->hdr; 1993
1758 mp = args->trans->t_mountp; 1994 ASSERT(ichdr.count > 0 && ichdr.count < XFS_LBSIZE(mp) / 8);
1759 ASSERT((be16_to_cpu(hdr->count) > 0) 1995 ASSERT(args->index >= 0 && args->index < ichdr.count);
1760 && (be16_to_cpu(hdr->count) < (XFS_LBSIZE(mp)/8))); 1996 ASSERT(ichdr.firstused >= ichdr.count * sizeof(*entry) +
1761 ASSERT((args->index >= 0) 1997 xfs_attr3_leaf_hdr_size(leaf));
1762 && (args->index < be16_to_cpu(hdr->count))); 1998
1763 ASSERT(be16_to_cpu(hdr->firstused) >= 1999 entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
1764 ((be16_to_cpu(hdr->count) * sizeof(*entry)) + sizeof(*hdr))); 2000
1765 entry = &leaf->entries[args->index]; 2001 ASSERT(be16_to_cpu(entry->nameidx) >= ichdr.firstused);
1766 ASSERT(be16_to_cpu(entry->nameidx) >= be16_to_cpu(hdr->firstused));
1767 ASSERT(be16_to_cpu(entry->nameidx) < XFS_LBSIZE(mp)); 2002 ASSERT(be16_to_cpu(entry->nameidx) < XFS_LBSIZE(mp));
1768 2003
1769 /* 2004 /*
@@ -1772,30 +2007,28 @@ xfs_attr_leaf_remove(
1772 * find smallest free region in case we need to replace it, 2007 * find smallest free region in case we need to replace it,
1773 * adjust any map that borders the entry table, 2008 * adjust any map that borders the entry table,
1774 */ 2009 */
1775 tablesize = be16_to_cpu(hdr->count) * sizeof(xfs_attr_leaf_entry_t) 2010 tablesize = ichdr.count * sizeof(xfs_attr_leaf_entry_t)
1776 + sizeof(xfs_attr_leaf_hdr_t); 2011 + xfs_attr3_leaf_hdr_size(leaf);
1777 map = &hdr->freemap[0]; 2012 tmp = ichdr.freemap[0].size;
1778 tmp = be16_to_cpu(map->size);
1779 before = after = -1; 2013 before = after = -1;
1780 smallest = XFS_ATTR_LEAF_MAPSIZE - 1; 2014 smallest = XFS_ATTR_LEAF_MAPSIZE - 1;
1781 entsize = xfs_attr_leaf_entsize(leaf, args->index); 2015 entsize = xfs_attr_leaf_entsize(leaf, args->index);
1782 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; map++, i++) { 2016 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
1783 ASSERT(be16_to_cpu(map->base) < XFS_LBSIZE(mp)); 2017 ASSERT(ichdr.freemap[i].base < XFS_LBSIZE(mp));
1784 ASSERT(be16_to_cpu(map->size) < XFS_LBSIZE(mp)); 2018 ASSERT(ichdr.freemap[i].size < XFS_LBSIZE(mp));
1785 if (be16_to_cpu(map->base) == tablesize) { 2019 if (ichdr.freemap[i].base == tablesize) {
1786 be16_add_cpu(&map->base, 2020 ichdr.freemap[i].base -= sizeof(xfs_attr_leaf_entry_t);
1787 -((int)sizeof(xfs_attr_leaf_entry_t))); 2021 ichdr.freemap[i].size += sizeof(xfs_attr_leaf_entry_t);
1788 be16_add_cpu(&map->size, sizeof(xfs_attr_leaf_entry_t));
1789 } 2022 }
1790 2023
1791 if ((be16_to_cpu(map->base) + be16_to_cpu(map->size)) 2024 if (ichdr.freemap[i].base + ichdr.freemap[i].size ==
1792 == be16_to_cpu(entry->nameidx)) { 2025 be16_to_cpu(entry->nameidx)) {
1793 before = i; 2026 before = i;
1794 } else if (be16_to_cpu(map->base) 2027 } else if (ichdr.freemap[i].base ==
1795 == (be16_to_cpu(entry->nameidx) + entsize)) { 2028 (be16_to_cpu(entry->nameidx) + entsize)) {
1796 after = i; 2029 after = i;
1797 } else if (be16_to_cpu(map->size) < tmp) { 2030 } else if (ichdr.freemap[i].size < tmp) {
1798 tmp = be16_to_cpu(map->size); 2031 tmp = ichdr.freemap[i].size;
1799 smallest = i; 2032 smallest = i;
1800 } 2033 }
1801 } 2034 }
@@ -1806,36 +2039,30 @@ xfs_attr_leaf_remove(
1806 */ 2039 */
1807 if ((before >= 0) || (after >= 0)) { 2040 if ((before >= 0) || (after >= 0)) {
1808 if ((before >= 0) && (after >= 0)) { 2041 if ((before >= 0) && (after >= 0)) {
1809 map = &hdr->freemap[before]; 2042 ichdr.freemap[before].size += entsize;
1810 be16_add_cpu(&map->size, entsize); 2043 ichdr.freemap[before].size += ichdr.freemap[after].size;
1811 be16_add_cpu(&map->size, 2044 ichdr.freemap[after].base = 0;
1812 be16_to_cpu(hdr->freemap[after].size)); 2045 ichdr.freemap[after].size = 0;
1813 hdr->freemap[after].base = 0;
1814 hdr->freemap[after].size = 0;
1815 } else if (before >= 0) { 2046 } else if (before >= 0) {
1816 map = &hdr->freemap[before]; 2047 ichdr.freemap[before].size += entsize;
1817 be16_add_cpu(&map->size, entsize);
1818 } else { 2048 } else {
1819 map = &hdr->freemap[after]; 2049 ichdr.freemap[after].base = be16_to_cpu(entry->nameidx);
1820 /* both on-disk, don't endian flip twice */ 2050 ichdr.freemap[after].size += entsize;
1821 map->base = entry->nameidx;
1822 be16_add_cpu(&map->size, entsize);
1823 } 2051 }
1824 } else { 2052 } else {
1825 /* 2053 /*
1826 * Replace smallest region (if it is smaller than free'd entry) 2054 * Replace smallest region (if it is smaller than free'd entry)
1827 */ 2055 */
1828 map = &hdr->freemap[smallest]; 2056 if (ichdr.freemap[smallest].size < entsize) {
1829 if (be16_to_cpu(map->size) < entsize) { 2057 ichdr.freemap[smallest].base = be16_to_cpu(entry->nameidx);
1830 map->base = cpu_to_be16(be16_to_cpu(entry->nameidx)); 2058 ichdr.freemap[smallest].size = entsize;
1831 map->size = cpu_to_be16(entsize);
1832 } 2059 }
1833 } 2060 }
1834 2061
1835 /* 2062 /*
1836 * Did we remove the first entry? 2063 * Did we remove the first entry?
1837 */ 2064 */
1838 if (be16_to_cpu(entry->nameidx) == be16_to_cpu(hdr->firstused)) 2065 if (be16_to_cpu(entry->nameidx) == ichdr.firstused)
1839 smallest = 1; 2066 smallest = 1;
1840 else 2067 else
1841 smallest = 0; 2068 smallest = 0;
@@ -1843,20 +2070,20 @@ xfs_attr_leaf_remove(
1843 /* 2070 /*
1844 * Compress the remaining entries and zero out the removed stuff. 2071 * Compress the remaining entries and zero out the removed stuff.
1845 */ 2072 */
1846 memset(xfs_attr_leaf_name(leaf, args->index), 0, entsize); 2073 memset(xfs_attr3_leaf_name(leaf, args->index), 0, entsize);
1847 be16_add_cpu(&hdr->usedbytes, -entsize); 2074 ichdr.usedbytes -= entsize;
1848 xfs_trans_log_buf(args->trans, bp, 2075 xfs_trans_log_buf(args->trans, bp,
1849 XFS_DA_LOGRANGE(leaf, xfs_attr_leaf_name(leaf, args->index), 2076 XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index),
1850 entsize)); 2077 entsize));
1851 2078
1852 tmp = (be16_to_cpu(hdr->count) - args->index) 2079 tmp = (ichdr.count - args->index) * sizeof(xfs_attr_leaf_entry_t);
1853 * sizeof(xfs_attr_leaf_entry_t); 2080 memmove(entry, entry + 1, tmp);
1854 memmove((char *)entry, (char *)(entry+1), tmp); 2081 ichdr.count--;
1855 be16_add_cpu(&hdr->count, -1);
1856 xfs_trans_log_buf(args->trans, bp, 2082 xfs_trans_log_buf(args->trans, bp,
1857 XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry))); 2083 XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(xfs_attr_leaf_entry_t)));
1858 entry = &leaf->entries[be16_to_cpu(hdr->count)]; 2084
1859 memset((char *)entry, 0, sizeof(xfs_attr_leaf_entry_t)); 2085 entry = &xfs_attr3_leaf_entryp(leaf)[ichdr.count];
2086 memset(entry, 0, sizeof(xfs_attr_leaf_entry_t));
1860 2087
1861 /* 2088 /*
1862 * If we removed the first entry, re-find the first used byte 2089 * If we removed the first entry, re-find the first used byte
@@ -1866,130 +2093,130 @@ xfs_attr_leaf_remove(
1866 */ 2093 */
1867 if (smallest) { 2094 if (smallest) {
1868 tmp = XFS_LBSIZE(mp); 2095 tmp = XFS_LBSIZE(mp);
1869 entry = &leaf->entries[0]; 2096 entry = xfs_attr3_leaf_entryp(leaf);
1870 for (i = be16_to_cpu(hdr->count)-1; i >= 0; entry++, i--) { 2097 for (i = ichdr.count - 1; i >= 0; entry++, i--) {
1871 ASSERT(be16_to_cpu(entry->nameidx) >= 2098 ASSERT(be16_to_cpu(entry->nameidx) >= ichdr.firstused);
1872 be16_to_cpu(hdr->firstused));
1873 ASSERT(be16_to_cpu(entry->nameidx) < XFS_LBSIZE(mp)); 2099 ASSERT(be16_to_cpu(entry->nameidx) < XFS_LBSIZE(mp));
1874 2100
1875 if (be16_to_cpu(entry->nameidx) < tmp) 2101 if (be16_to_cpu(entry->nameidx) < tmp)
1876 tmp = be16_to_cpu(entry->nameidx); 2102 tmp = be16_to_cpu(entry->nameidx);
1877 } 2103 }
1878 hdr->firstused = cpu_to_be16(tmp); 2104 ichdr.firstused = tmp;
1879 if (!hdr->firstused) { 2105 if (!ichdr.firstused)
1880 hdr->firstused = cpu_to_be16( 2106 ichdr.firstused = tmp - XFS_ATTR_LEAF_NAME_ALIGN;
1881 tmp - XFS_ATTR_LEAF_NAME_ALIGN);
1882 }
1883 } else { 2107 } else {
1884 hdr->holes = 1; /* mark as needing compaction */ 2108 ichdr.holes = 1; /* mark as needing compaction */
1885 } 2109 }
2110 xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr);
1886 xfs_trans_log_buf(args->trans, bp, 2111 xfs_trans_log_buf(args->trans, bp,
1887 XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr))); 2112 XFS_DA_LOGRANGE(leaf, &leaf->hdr,
2113 xfs_attr3_leaf_hdr_size(leaf)));
1888 2114
1889 /* 2115 /*
1890 * Check if leaf is less than 50% full, caller may want to 2116 * Check if leaf is less than 50% full, caller may want to
1891 * "join" the leaf with a sibling if so. 2117 * "join" the leaf with a sibling if so.
1892 */ 2118 */
1893 tmp = sizeof(xfs_attr_leaf_hdr_t); 2119 tmp = ichdr.usedbytes + xfs_attr3_leaf_hdr_size(leaf) +
1894 tmp += be16_to_cpu(leaf->hdr.count) * sizeof(xfs_attr_leaf_entry_t); 2120 ichdr.count * sizeof(xfs_attr_leaf_entry_t);
1895 tmp += be16_to_cpu(leaf->hdr.usedbytes); 2121
1896 return(tmp < mp->m_attr_magicpct); /* leaf is < 37% full */ 2122 return tmp < mp->m_attr_magicpct; /* leaf is < 37% full */
1897} 2123}
1898 2124
1899/* 2125/*
1900 * Move all the attribute list entries from drop_leaf into save_leaf. 2126 * Move all the attribute list entries from drop_leaf into save_leaf.
1901 */ 2127 */
1902void 2128void
1903xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, 2129xfs_attr3_leaf_unbalance(
1904 xfs_da_state_blk_t *save_blk) 2130 struct xfs_da_state *state,
2131 struct xfs_da_state_blk *drop_blk,
2132 struct xfs_da_state_blk *save_blk)
1905{ 2133{
1906 xfs_attr_leafblock_t *drop_leaf, *save_leaf, *tmp_leaf; 2134 struct xfs_attr_leafblock *drop_leaf = drop_blk->bp->b_addr;
1907 xfs_attr_leaf_hdr_t *drop_hdr, *save_hdr, *tmp_hdr; 2135 struct xfs_attr_leafblock *save_leaf = save_blk->bp->b_addr;
1908 xfs_mount_t *mp; 2136 struct xfs_attr3_icleaf_hdr drophdr;
1909 char *tmpbuffer; 2137 struct xfs_attr3_icleaf_hdr savehdr;
2138 struct xfs_attr_leaf_entry *entry;
2139 struct xfs_mount *mp = state->mp;
1910 2140
1911 trace_xfs_attr_leaf_unbalance(state->args); 2141 trace_xfs_attr_leaf_unbalance(state->args);
1912 2142
1913 /*
1914 * Set up environment.
1915 */
1916 mp = state->mp;
1917 ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC);
1918 ASSERT(save_blk->magic == XFS_ATTR_LEAF_MAGIC);
1919 drop_leaf = drop_blk->bp->b_addr; 2143 drop_leaf = drop_blk->bp->b_addr;
1920 save_leaf = save_blk->bp->b_addr; 2144 save_leaf = save_blk->bp->b_addr;
1921 ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 2145 xfs_attr3_leaf_hdr_from_disk(&drophdr, drop_leaf);
1922 ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 2146 xfs_attr3_leaf_hdr_from_disk(&savehdr, save_leaf);
1923 drop_hdr = &drop_leaf->hdr; 2147 entry = xfs_attr3_leaf_entryp(drop_leaf);
1924 save_hdr = &save_leaf->hdr;
1925 2148
1926 /* 2149 /*
1927 * Save last hashval from dying block for later Btree fixup. 2150 * Save last hashval from dying block for later Btree fixup.
1928 */ 2151 */
1929 drop_blk->hashval = be32_to_cpu( 2152 drop_blk->hashval = be32_to_cpu(entry[drophdr.count - 1].hashval);
1930 drop_leaf->entries[be16_to_cpu(drop_leaf->hdr.count)-1].hashval);
1931 2153
1932 /* 2154 /*
1933 * Check if we need a temp buffer, or can we do it in place. 2155 * Check if we need a temp buffer, or can we do it in place.
1934 * Note that we don't check "leaf" for holes because we will 2156 * Note that we don't check "leaf" for holes because we will
1935 * always be dropping it, toosmall() decided that for us already. 2157 * always be dropping it, toosmall() decided that for us already.
1936 */ 2158 */
1937 if (save_hdr->holes == 0) { 2159 if (savehdr.holes == 0) {
1938 /* 2160 /*
1939 * dest leaf has no holes, so we add there. May need 2161 * dest leaf has no holes, so we add there. May need
1940 * to make some room in the entry array. 2162 * to make some room in the entry array.
1941 */ 2163 */
1942 if (xfs_attr_leaf_order(save_blk->bp, drop_blk->bp)) { 2164 if (xfs_attr3_leaf_order(save_blk->bp, &savehdr,
1943 xfs_attr_leaf_moveents(drop_leaf, 0, save_leaf, 0, 2165 drop_blk->bp, &drophdr)) {
1944 be16_to_cpu(drop_hdr->count), mp); 2166 xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0,
2167 save_leaf, &savehdr, 0,
2168 drophdr.count, mp);
1945 } else { 2169 } else {
1946 xfs_attr_leaf_moveents(drop_leaf, 0, save_leaf, 2170 xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0,
1947 be16_to_cpu(save_hdr->count), 2171 save_leaf, &savehdr,
1948 be16_to_cpu(drop_hdr->count), mp); 2172 savehdr.count, drophdr.count, mp);
1949 } 2173 }
1950 } else { 2174 } else {
1951 /* 2175 /*
1952 * Destination has holes, so we make a temporary copy 2176 * Destination has holes, so we make a temporary copy
1953 * of the leaf and add them both to that. 2177 * of the leaf and add them both to that.
1954 */ 2178 */
1955 tmpbuffer = kmem_alloc(state->blocksize, KM_SLEEP); 2179 struct xfs_attr_leafblock *tmp_leaf;
1956 ASSERT(tmpbuffer != NULL); 2180 struct xfs_attr3_icleaf_hdr tmphdr;
1957 memset(tmpbuffer, 0, state->blocksize); 2181
1958 tmp_leaf = (xfs_attr_leafblock_t *)tmpbuffer; 2182 tmp_leaf = kmem_alloc(state->blocksize, KM_SLEEP);
1959 tmp_hdr = &tmp_leaf->hdr; 2183 memset(tmp_leaf, 0, state->blocksize);
1960 tmp_hdr->info = save_hdr->info; /* struct copy */ 2184 memset(&tmphdr, 0, sizeof(tmphdr));
1961 tmp_hdr->count = 0; 2185
1962 tmp_hdr->firstused = cpu_to_be16(state->blocksize); 2186 tmphdr.magic = savehdr.magic;
1963 if (!tmp_hdr->firstused) { 2187 tmphdr.forw = savehdr.forw;
1964 tmp_hdr->firstused = cpu_to_be16( 2188 tmphdr.back = savehdr.back;
1965 state->blocksize - XFS_ATTR_LEAF_NAME_ALIGN); 2189 tmphdr.firstused = state->blocksize;
1966 } 2190 if (xfs_attr3_leaf_order(save_blk->bp, &savehdr,
1967 tmp_hdr->usedbytes = 0; 2191 drop_blk->bp, &drophdr)) {
1968 if (xfs_attr_leaf_order(save_blk->bp, drop_blk->bp)) { 2192 xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0,
1969 xfs_attr_leaf_moveents(drop_leaf, 0, tmp_leaf, 0, 2193 tmp_leaf, &tmphdr, 0,
1970 be16_to_cpu(drop_hdr->count), mp); 2194 drophdr.count, mp);
1971 xfs_attr_leaf_moveents(save_leaf, 0, tmp_leaf, 2195 xfs_attr3_leaf_moveents(save_leaf, &savehdr, 0,
1972 be16_to_cpu(tmp_leaf->hdr.count), 2196 tmp_leaf, &tmphdr, tmphdr.count,
1973 be16_to_cpu(save_hdr->count), mp); 2197 savehdr.count, mp);
1974 } else { 2198 } else {
1975 xfs_attr_leaf_moveents(save_leaf, 0, tmp_leaf, 0, 2199 xfs_attr3_leaf_moveents(save_leaf, &savehdr, 0,
1976 be16_to_cpu(save_hdr->count), mp); 2200 tmp_leaf, &tmphdr, 0,
1977 xfs_attr_leaf_moveents(drop_leaf, 0, tmp_leaf, 2201 savehdr.count, mp);
1978 be16_to_cpu(tmp_leaf->hdr.count), 2202 xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0,
1979 be16_to_cpu(drop_hdr->count), mp); 2203 tmp_leaf, &tmphdr, tmphdr.count,
2204 drophdr.count, mp);
1980 } 2205 }
1981 memcpy((char *)save_leaf, (char *)tmp_leaf, state->blocksize); 2206 memcpy(save_leaf, tmp_leaf, state->blocksize);
1982 kmem_free(tmpbuffer); 2207 savehdr = tmphdr; /* struct copy */
2208 kmem_free(tmp_leaf);
1983 } 2209 }
1984 2210
2211 xfs_attr3_leaf_hdr_to_disk(save_leaf, &savehdr);
1985 xfs_trans_log_buf(state->args->trans, save_blk->bp, 0, 2212 xfs_trans_log_buf(state->args->trans, save_blk->bp, 0,
1986 state->blocksize - 1); 2213 state->blocksize - 1);
1987 2214
1988 /* 2215 /*
1989 * Copy out last hashval in each block for B-tree code. 2216 * Copy out last hashval in each block for B-tree code.
1990 */ 2217 */
1991 save_blk->hashval = be32_to_cpu( 2218 entry = xfs_attr3_leaf_entryp(save_leaf);
1992 save_leaf->entries[be16_to_cpu(save_leaf->hdr.count)-1].hashval); 2219 save_blk->hashval = be32_to_cpu(entry[savehdr.count - 1].hashval);
1993} 2220}
1994 2221
1995/*======================================================================== 2222/*========================================================================
@@ -2010,31 +2237,33 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
2010 * Don't change the args->value unless we find the attribute. 2237 * Don't change the args->value unless we find the attribute.
2011 */ 2238 */
2012int 2239int
2013xfs_attr_leaf_lookup_int( 2240xfs_attr3_leaf_lookup_int(
2014 struct xfs_buf *bp, 2241 struct xfs_buf *bp,
2015 xfs_da_args_t *args) 2242 struct xfs_da_args *args)
2016{ 2243{
2017 xfs_attr_leafblock_t *leaf; 2244 struct xfs_attr_leafblock *leaf;
2018 xfs_attr_leaf_entry_t *entry; 2245 struct xfs_attr3_icleaf_hdr ichdr;
2019 xfs_attr_leaf_name_local_t *name_loc; 2246 struct xfs_attr_leaf_entry *entry;
2020 xfs_attr_leaf_name_remote_t *name_rmt; 2247 struct xfs_attr_leaf_entry *entries;
2021 int probe, span; 2248 struct xfs_attr_leaf_name_local *name_loc;
2022 xfs_dahash_t hashval; 2249 struct xfs_attr_leaf_name_remote *name_rmt;
2250 xfs_dahash_t hashval;
2251 int probe;
2252 int span;
2023 2253
2024 trace_xfs_attr_leaf_lookup(args); 2254 trace_xfs_attr_leaf_lookup(args);
2025 2255
2026 leaf = bp->b_addr; 2256 leaf = bp->b_addr;
2027 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 2257 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
2028 ASSERT(be16_to_cpu(leaf->hdr.count) 2258 entries = xfs_attr3_leaf_entryp(leaf);
2029 < (XFS_LBSIZE(args->dp->i_mount)/8)); 2259 ASSERT(ichdr.count < XFS_LBSIZE(args->dp->i_mount) / 8);
2030 2260
2031 /* 2261 /*
2032 * Binary search. (note: small blocks will skip this loop) 2262 * Binary search. (note: small blocks will skip this loop)
2033 */ 2263 */
2034 hashval = args->hashval; 2264 hashval = args->hashval;
2035 probe = span = be16_to_cpu(leaf->hdr.count) / 2; 2265 probe = span = ichdr.count / 2;
2036 for (entry = &leaf->entries[probe]; span > 4; 2266 for (entry = &entries[probe]; span > 4; entry = &entries[probe]) {
2037 entry = &leaf->entries[probe]) {
2038 span /= 2; 2267 span /= 2;
2039 if (be32_to_cpu(entry->hashval) < hashval) 2268 if (be32_to_cpu(entry->hashval) < hashval)
2040 probe += span; 2269 probe += span;
@@ -2043,35 +2272,31 @@ xfs_attr_leaf_lookup_int(
2043 else 2272 else
2044 break; 2273 break;
2045 } 2274 }
2046 ASSERT((probe >= 0) && 2275 ASSERT(probe >= 0 && (!ichdr.count || probe < ichdr.count));
2047 (!leaf->hdr.count 2276 ASSERT(span <= 4 || be32_to_cpu(entry->hashval) == hashval);
2048 || (probe < be16_to_cpu(leaf->hdr.count))));
2049 ASSERT((span <= 4) || (be32_to_cpu(entry->hashval) == hashval));
2050 2277
2051 /* 2278 /*
2052 * Since we may have duplicate hashval's, find the first matching 2279 * Since we may have duplicate hashval's, find the first matching
2053 * hashval in the leaf. 2280 * hashval in the leaf.
2054 */ 2281 */
2055 while ((probe > 0) && (be32_to_cpu(entry->hashval) >= hashval)) { 2282 while (probe > 0 && be32_to_cpu(entry->hashval) >= hashval) {
2056 entry--; 2283 entry--;
2057 probe--; 2284 probe--;
2058 } 2285 }
2059 while ((probe < be16_to_cpu(leaf->hdr.count)) && 2286 while (probe < ichdr.count &&
2060 (be32_to_cpu(entry->hashval) < hashval)) { 2287 be32_to_cpu(entry->hashval) < hashval) {
2061 entry++; 2288 entry++;
2062 probe++; 2289 probe++;
2063 } 2290 }
2064 if ((probe == be16_to_cpu(leaf->hdr.count)) || 2291 if (probe == ichdr.count || be32_to_cpu(entry->hashval) != hashval) {
2065 (be32_to_cpu(entry->hashval) != hashval)) {
2066 args->index = probe; 2292 args->index = probe;
2067 return(XFS_ERROR(ENOATTR)); 2293 return XFS_ERROR(ENOATTR);
2068 } 2294 }
2069 2295
2070 /* 2296 /*
2071 * Duplicate keys may be present, so search all of them for a match. 2297 * Duplicate keys may be present, so search all of them for a match.
2072 */ 2298 */
2073 for ( ; (probe < be16_to_cpu(leaf->hdr.count)) && 2299 for (; probe < ichdr.count && (be32_to_cpu(entry->hashval) == hashval);
2074 (be32_to_cpu(entry->hashval) == hashval);
2075 entry++, probe++) { 2300 entry++, probe++) {
2076/* 2301/*
2077 * GROT: Add code to remove incomplete entries. 2302 * GROT: Add code to remove incomplete entries.
@@ -2085,21 +2310,22 @@ xfs_attr_leaf_lookup_int(
2085 continue; 2310 continue;
2086 } 2311 }
2087 if (entry->flags & XFS_ATTR_LOCAL) { 2312 if (entry->flags & XFS_ATTR_LOCAL) {
2088 name_loc = xfs_attr_leaf_name_local(leaf, probe); 2313 name_loc = xfs_attr3_leaf_name_local(leaf, probe);
2089 if (name_loc->namelen != args->namelen) 2314 if (name_loc->namelen != args->namelen)
2090 continue; 2315 continue;
2091 if (memcmp(args->name, (char *)name_loc->nameval, args->namelen) != 0) 2316 if (memcmp(args->name, name_loc->nameval,
2317 args->namelen) != 0)
2092 continue; 2318 continue;
2093 if (!xfs_attr_namesp_match(args->flags, entry->flags)) 2319 if (!xfs_attr_namesp_match(args->flags, entry->flags))
2094 continue; 2320 continue;
2095 args->index = probe; 2321 args->index = probe;
2096 return(XFS_ERROR(EEXIST)); 2322 return XFS_ERROR(EEXIST);
2097 } else { 2323 } else {
2098 name_rmt = xfs_attr_leaf_name_remote(leaf, probe); 2324 name_rmt = xfs_attr3_leaf_name_remote(leaf, probe);
2099 if (name_rmt->namelen != args->namelen) 2325 if (name_rmt->namelen != args->namelen)
2100 continue; 2326 continue;
2101 if (memcmp(args->name, (char *)name_rmt->name, 2327 if (memcmp(args->name, name_rmt->name,
2102 args->namelen) != 0) 2328 args->namelen) != 0)
2103 continue; 2329 continue;
2104 if (!xfs_attr_namesp_match(args->flags, entry->flags)) 2330 if (!xfs_attr_namesp_match(args->flags, entry->flags))
2105 continue; 2331 continue;
@@ -2107,11 +2333,11 @@ xfs_attr_leaf_lookup_int(
2107 args->rmtblkno = be32_to_cpu(name_rmt->valueblk); 2333 args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
2108 args->rmtblkcnt = XFS_B_TO_FSB(args->dp->i_mount, 2334 args->rmtblkcnt = XFS_B_TO_FSB(args->dp->i_mount,
2109 be32_to_cpu(name_rmt->valuelen)); 2335 be32_to_cpu(name_rmt->valuelen));
2110 return(XFS_ERROR(EEXIST)); 2336 return XFS_ERROR(EEXIST);
2111 } 2337 }
2112 } 2338 }
2113 args->index = probe; 2339 args->index = probe;
2114 return(XFS_ERROR(ENOATTR)); 2340 return XFS_ERROR(ENOATTR);
2115} 2341}
2116 2342
2117/* 2343/*
@@ -2119,40 +2345,40 @@ xfs_attr_leaf_lookup_int(
2119 * list structure. 2345 * list structure.
2120 */ 2346 */
2121int 2347int
2122xfs_attr_leaf_getvalue( 2348xfs_attr3_leaf_getvalue(
2123 struct xfs_buf *bp, 2349 struct xfs_buf *bp,
2124 xfs_da_args_t *args) 2350 struct xfs_da_args *args)
2125{ 2351{
2126 int valuelen; 2352 struct xfs_attr_leafblock *leaf;
2127 xfs_attr_leafblock_t *leaf; 2353 struct xfs_attr3_icleaf_hdr ichdr;
2128 xfs_attr_leaf_entry_t *entry; 2354 struct xfs_attr_leaf_entry *entry;
2129 xfs_attr_leaf_name_local_t *name_loc; 2355 struct xfs_attr_leaf_name_local *name_loc;
2130 xfs_attr_leaf_name_remote_t *name_rmt; 2356 struct xfs_attr_leaf_name_remote *name_rmt;
2357 int valuelen;
2131 2358
2132 leaf = bp->b_addr; 2359 leaf = bp->b_addr;
2133 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 2360 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
2134 ASSERT(be16_to_cpu(leaf->hdr.count) 2361 ASSERT(ichdr.count < XFS_LBSIZE(args->dp->i_mount) / 8);
2135 < (XFS_LBSIZE(args->dp->i_mount)/8)); 2362 ASSERT(args->index < ichdr.count);
2136 ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
2137 2363
2138 entry = &leaf->entries[args->index]; 2364 entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
2139 if (entry->flags & XFS_ATTR_LOCAL) { 2365 if (entry->flags & XFS_ATTR_LOCAL) {
2140 name_loc = xfs_attr_leaf_name_local(leaf, args->index); 2366 name_loc = xfs_attr3_leaf_name_local(leaf, args->index);
2141 ASSERT(name_loc->namelen == args->namelen); 2367 ASSERT(name_loc->namelen == args->namelen);
2142 ASSERT(memcmp(args->name, name_loc->nameval, args->namelen) == 0); 2368 ASSERT(memcmp(args->name, name_loc->nameval, args->namelen) == 0);
2143 valuelen = be16_to_cpu(name_loc->valuelen); 2369 valuelen = be16_to_cpu(name_loc->valuelen);
2144 if (args->flags & ATTR_KERNOVAL) { 2370 if (args->flags & ATTR_KERNOVAL) {
2145 args->valuelen = valuelen; 2371 args->valuelen = valuelen;
2146 return(0); 2372 return 0;
2147 } 2373 }
2148 if (args->valuelen < valuelen) { 2374 if (args->valuelen < valuelen) {
2149 args->valuelen = valuelen; 2375 args->valuelen = valuelen;
2150 return(XFS_ERROR(ERANGE)); 2376 return XFS_ERROR(ERANGE);
2151 } 2377 }
2152 args->valuelen = valuelen; 2378 args->valuelen = valuelen;
2153 memcpy(args->value, &name_loc->nameval[args->namelen], valuelen); 2379 memcpy(args->value, &name_loc->nameval[args->namelen], valuelen);
2154 } else { 2380 } else {
2155 name_rmt = xfs_attr_leaf_name_remote(leaf, args->index); 2381 name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
2156 ASSERT(name_rmt->namelen == args->namelen); 2382 ASSERT(name_rmt->namelen == args->namelen);
2157 ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0); 2383 ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0);
2158 valuelen = be32_to_cpu(name_rmt->valuelen); 2384 valuelen = be32_to_cpu(name_rmt->valuelen);
@@ -2160,15 +2386,15 @@ xfs_attr_leaf_getvalue(
2160 args->rmtblkcnt = XFS_B_TO_FSB(args->dp->i_mount, valuelen); 2386 args->rmtblkcnt = XFS_B_TO_FSB(args->dp->i_mount, valuelen);
2161 if (args->flags & ATTR_KERNOVAL) { 2387 if (args->flags & ATTR_KERNOVAL) {
2162 args->valuelen = valuelen; 2388 args->valuelen = valuelen;
2163 return(0); 2389 return 0;
2164 } 2390 }
2165 if (args->valuelen < valuelen) { 2391 if (args->valuelen < valuelen) {
2166 args->valuelen = valuelen; 2392 args->valuelen = valuelen;
2167 return(XFS_ERROR(ERANGE)); 2393 return XFS_ERROR(ERANGE);
2168 } 2394 }
2169 args->valuelen = valuelen; 2395 args->valuelen = valuelen;
2170 } 2396 }
2171 return(0); 2397 return 0;
2172} 2398}
2173 2399
2174/*======================================================================== 2400/*========================================================================
@@ -2181,13 +2407,21 @@ xfs_attr_leaf_getvalue(
2181 */ 2407 */
2182/*ARGSUSED*/ 2408/*ARGSUSED*/
2183STATIC void 2409STATIC void
2184xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s, 2410xfs_attr3_leaf_moveents(
2185 xfs_attr_leafblock_t *leaf_d, int start_d, 2411 struct xfs_attr_leafblock *leaf_s,
2186 int count, xfs_mount_t *mp) 2412 struct xfs_attr3_icleaf_hdr *ichdr_s,
2413 int start_s,
2414 struct xfs_attr_leafblock *leaf_d,
2415 struct xfs_attr3_icleaf_hdr *ichdr_d,
2416 int start_d,
2417 int count,
2418 struct xfs_mount *mp)
2187{ 2419{
2188 xfs_attr_leaf_hdr_t *hdr_s, *hdr_d; 2420 struct xfs_attr_leaf_entry *entry_s;
2189 xfs_attr_leaf_entry_t *entry_s, *entry_d; 2421 struct xfs_attr_leaf_entry *entry_d;
2190 int desti, tmp, i; 2422 int desti;
2423 int tmp;
2424 int i;
2191 2425
2192 /* 2426 /*
2193 * Check for nothing to do. 2427 * Check for nothing to do.
@@ -2198,45 +2432,41 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
2198 /* 2432 /*
2199 * Set up environment. 2433 * Set up environment.
2200 */ 2434 */
2201 ASSERT(leaf_s->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 2435 ASSERT(ichdr_s->magic == XFS_ATTR_LEAF_MAGIC ||
2202 ASSERT(leaf_d->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 2436 ichdr_s->magic == XFS_ATTR3_LEAF_MAGIC);
2203 hdr_s = &leaf_s->hdr; 2437 ASSERT(ichdr_s->magic == ichdr_d->magic);
2204 hdr_d = &leaf_d->hdr; 2438 ASSERT(ichdr_s->count > 0 && ichdr_s->count < XFS_LBSIZE(mp) / 8);
2205 ASSERT((be16_to_cpu(hdr_s->count) > 0) && 2439 ASSERT(ichdr_s->firstused >= (ichdr_s->count * sizeof(*entry_s))
2206 (be16_to_cpu(hdr_s->count) < (XFS_LBSIZE(mp)/8))); 2440 + xfs_attr3_leaf_hdr_size(leaf_s));
2207 ASSERT(be16_to_cpu(hdr_s->firstused) >= 2441 ASSERT(ichdr_d->count < XFS_LBSIZE(mp) / 8);
2208 ((be16_to_cpu(hdr_s->count) 2442 ASSERT(ichdr_d->firstused >= (ichdr_d->count * sizeof(*entry_d))
2209 * sizeof(*entry_s))+sizeof(*hdr_s))); 2443 + xfs_attr3_leaf_hdr_size(leaf_d));
2210 ASSERT(be16_to_cpu(hdr_d->count) < (XFS_LBSIZE(mp)/8)); 2444
2211 ASSERT(be16_to_cpu(hdr_d->firstused) >= 2445 ASSERT(start_s < ichdr_s->count);
2212 ((be16_to_cpu(hdr_d->count) 2446 ASSERT(start_d <= ichdr_d->count);
2213 * sizeof(*entry_d))+sizeof(*hdr_d))); 2447 ASSERT(count <= ichdr_s->count);
2214 2448
2215 ASSERT(start_s < be16_to_cpu(hdr_s->count));
2216 ASSERT(start_d <= be16_to_cpu(hdr_d->count));
2217 ASSERT(count <= be16_to_cpu(hdr_s->count));
2218 2449
2219 /* 2450 /*
2220 * Move the entries in the destination leaf up to make a hole? 2451 * Move the entries in the destination leaf up to make a hole?
2221 */ 2452 */
2222 if (start_d < be16_to_cpu(hdr_d->count)) { 2453 if (start_d < ichdr_d->count) {
2223 tmp = be16_to_cpu(hdr_d->count) - start_d; 2454 tmp = ichdr_d->count - start_d;
2224 tmp *= sizeof(xfs_attr_leaf_entry_t); 2455 tmp *= sizeof(xfs_attr_leaf_entry_t);
2225 entry_s = &leaf_d->entries[start_d]; 2456 entry_s = &xfs_attr3_leaf_entryp(leaf_d)[start_d];
2226 entry_d = &leaf_d->entries[start_d + count]; 2457 entry_d = &xfs_attr3_leaf_entryp(leaf_d)[start_d + count];
2227 memmove((char *)entry_d, (char *)entry_s, tmp); 2458 memmove(entry_d, entry_s, tmp);
2228 } 2459 }
2229 2460
2230 /* 2461 /*
2231 * Copy all entry's in the same (sorted) order, 2462 * Copy all entry's in the same (sorted) order,
2232 * but allocate attribute info packed and in sequence. 2463 * but allocate attribute info packed and in sequence.
2233 */ 2464 */
2234 entry_s = &leaf_s->entries[start_s]; 2465 entry_s = &xfs_attr3_leaf_entryp(leaf_s)[start_s];
2235 entry_d = &leaf_d->entries[start_d]; 2466 entry_d = &xfs_attr3_leaf_entryp(leaf_d)[start_d];
2236 desti = start_d; 2467 desti = start_d;
2237 for (i = 0; i < count; entry_s++, entry_d++, desti++, i++) { 2468 for (i = 0; i < count; entry_s++, entry_d++, desti++, i++) {
2238 ASSERT(be16_to_cpu(entry_s->nameidx) 2469 ASSERT(be16_to_cpu(entry_s->nameidx) >= ichdr_s->firstused);
2239 >= be16_to_cpu(hdr_s->firstused));
2240 tmp = xfs_attr_leaf_entsize(leaf_s, start_s + i); 2470 tmp = xfs_attr_leaf_entsize(leaf_s, start_s + i);
2241#ifdef GROT 2471#ifdef GROT
2242 /* 2472 /*
@@ -2245,36 +2475,34 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
2245 * off for 6.2, should be revisited later. 2475 * off for 6.2, should be revisited later.
2246 */ 2476 */
2247 if (entry_s->flags & XFS_ATTR_INCOMPLETE) { /* skip partials? */ 2477 if (entry_s->flags & XFS_ATTR_INCOMPLETE) { /* skip partials? */
2248 memset(xfs_attr_leaf_name(leaf_s, start_s + i), 0, tmp); 2478 memset(xfs_attr3_leaf_name(leaf_s, start_s + i), 0, tmp);
2249 be16_add_cpu(&hdr_s->usedbytes, -tmp); 2479 ichdr_s->usedbytes -= tmp;
2250 be16_add_cpu(&hdr_s->count, -1); 2480 ichdr_s->count -= 1;
2251 entry_d--; /* to compensate for ++ in loop hdr */ 2481 entry_d--; /* to compensate for ++ in loop hdr */
2252 desti--; 2482 desti--;
2253 if ((start_s + i) < offset) 2483 if ((start_s + i) < offset)
2254 result++; /* insertion index adjustment */ 2484 result++; /* insertion index adjustment */
2255 } else { 2485 } else {
2256#endif /* GROT */ 2486#endif /* GROT */
2257 be16_add_cpu(&hdr_d->firstused, -tmp); 2487 ichdr_d->firstused -= tmp;
2258 /* both on-disk, don't endian flip twice */ 2488 /* both on-disk, don't endian flip twice */
2259 entry_d->hashval = entry_s->hashval; 2489 entry_d->hashval = entry_s->hashval;
2260 /* both on-disk, don't endian flip twice */ 2490 entry_d->nameidx = cpu_to_be16(ichdr_d->firstused);
2261 entry_d->nameidx = hdr_d->firstused;
2262 entry_d->flags = entry_s->flags; 2491 entry_d->flags = entry_s->flags;
2263 ASSERT(be16_to_cpu(entry_d->nameidx) + tmp 2492 ASSERT(be16_to_cpu(entry_d->nameidx) + tmp
2264 <= XFS_LBSIZE(mp)); 2493 <= XFS_LBSIZE(mp));
2265 memmove(xfs_attr_leaf_name(leaf_d, desti), 2494 memmove(xfs_attr3_leaf_name(leaf_d, desti),
2266 xfs_attr_leaf_name(leaf_s, start_s + i), tmp); 2495 xfs_attr3_leaf_name(leaf_s, start_s + i), tmp);
2267 ASSERT(be16_to_cpu(entry_s->nameidx) + tmp 2496 ASSERT(be16_to_cpu(entry_s->nameidx) + tmp
2268 <= XFS_LBSIZE(mp)); 2497 <= XFS_LBSIZE(mp));
2269 memset(xfs_attr_leaf_name(leaf_s, start_s + i), 0, tmp); 2498 memset(xfs_attr3_leaf_name(leaf_s, start_s + i), 0, tmp);
2270 be16_add_cpu(&hdr_s->usedbytes, -tmp); 2499 ichdr_s->usedbytes -= tmp;
2271 be16_add_cpu(&hdr_d->usedbytes, tmp); 2500 ichdr_d->usedbytes += tmp;
2272 be16_add_cpu(&hdr_s->count, -1); 2501 ichdr_s->count -= 1;
2273 be16_add_cpu(&hdr_d->count, 1); 2502 ichdr_d->count += 1;
2274 tmp = be16_to_cpu(hdr_d->count) 2503 tmp = ichdr_d->count * sizeof(xfs_attr_leaf_entry_t)
2275 * sizeof(xfs_attr_leaf_entry_t) 2504 + xfs_attr3_leaf_hdr_size(leaf_d);
2276 + sizeof(xfs_attr_leaf_hdr_t); 2505 ASSERT(ichdr_d->firstused >= tmp);
2277 ASSERT(be16_to_cpu(hdr_d->firstused) >= tmp);
2278#ifdef GROT 2506#ifdef GROT
2279 } 2507 }
2280#endif /* GROT */ 2508#endif /* GROT */
@@ -2283,71 +2511,40 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
2283 /* 2511 /*
2284 * Zero out the entries we just copied. 2512 * Zero out the entries we just copied.
2285 */ 2513 */
2286 if (start_s == be16_to_cpu(hdr_s->count)) { 2514 if (start_s == ichdr_s->count) {
2287 tmp = count * sizeof(xfs_attr_leaf_entry_t); 2515 tmp = count * sizeof(xfs_attr_leaf_entry_t);
2288 entry_s = &leaf_s->entries[start_s]; 2516 entry_s = &xfs_attr3_leaf_entryp(leaf_s)[start_s];
2289 ASSERT(((char *)entry_s + tmp) <= 2517 ASSERT(((char *)entry_s + tmp) <=
2290 ((char *)leaf_s + XFS_LBSIZE(mp))); 2518 ((char *)leaf_s + XFS_LBSIZE(mp)));
2291 memset((char *)entry_s, 0, tmp); 2519 memset(entry_s, 0, tmp);
2292 } else { 2520 } else {
2293 /* 2521 /*
2294 * Move the remaining entries down to fill the hole, 2522 * Move the remaining entries down to fill the hole,
2295 * then zero the entries at the top. 2523 * then zero the entries at the top.
2296 */ 2524 */
2297 tmp = be16_to_cpu(hdr_s->count) - count; 2525 tmp = (ichdr_s->count - count) * sizeof(xfs_attr_leaf_entry_t);
2298 tmp *= sizeof(xfs_attr_leaf_entry_t); 2526 entry_s = &xfs_attr3_leaf_entryp(leaf_s)[start_s + count];
2299 entry_s = &leaf_s->entries[start_s + count]; 2527 entry_d = &xfs_attr3_leaf_entryp(leaf_s)[start_s];
2300 entry_d = &leaf_s->entries[start_s]; 2528 memmove(entry_d, entry_s, tmp);
2301 memmove((char *)entry_d, (char *)entry_s, tmp);
2302 2529
2303 tmp = count * sizeof(xfs_attr_leaf_entry_t); 2530 tmp = count * sizeof(xfs_attr_leaf_entry_t);
2304 entry_s = &leaf_s->entries[be16_to_cpu(hdr_s->count)]; 2531 entry_s = &xfs_attr3_leaf_entryp(leaf_s)[ichdr_s->count];
2305 ASSERT(((char *)entry_s + tmp) <= 2532 ASSERT(((char *)entry_s + tmp) <=
2306 ((char *)leaf_s + XFS_LBSIZE(mp))); 2533 ((char *)leaf_s + XFS_LBSIZE(mp)));
2307 memset((char *)entry_s, 0, tmp); 2534 memset(entry_s, 0, tmp);
2308 } 2535 }
2309 2536
2310 /* 2537 /*
2311 * Fill in the freemap information 2538 * Fill in the freemap information
2312 */ 2539 */
2313 hdr_d->freemap[0].base = cpu_to_be16(sizeof(xfs_attr_leaf_hdr_t)); 2540 ichdr_d->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_d);
2314 be16_add_cpu(&hdr_d->freemap[0].base, be16_to_cpu(hdr_d->count) * 2541 ichdr_d->freemap[0].base += ichdr_d->count * sizeof(xfs_attr_leaf_entry_t);
2315 sizeof(xfs_attr_leaf_entry_t)); 2542 ichdr_d->freemap[0].size = ichdr_d->firstused - ichdr_d->freemap[0].base;
2316 hdr_d->freemap[0].size = cpu_to_be16(be16_to_cpu(hdr_d->firstused) 2543 ichdr_d->freemap[1].base = 0;
2317 - be16_to_cpu(hdr_d->freemap[0].base)); 2544 ichdr_d->freemap[2].base = 0;
2318 hdr_d->freemap[1].base = 0; 2545 ichdr_d->freemap[1].size = 0;
2319 hdr_d->freemap[2].base = 0; 2546 ichdr_d->freemap[2].size = 0;
2320 hdr_d->freemap[1].size = 0; 2547 ichdr_s->holes = 1; /* leaf may not be compact */
2321 hdr_d->freemap[2].size = 0;
2322 hdr_s->holes = 1; /* leaf may not be compact */
2323}
2324
2325/*
2326 * Compare two leaf blocks "order".
2327 * Return 0 unless leaf2 should go before leaf1.
2328 */
2329int
2330xfs_attr_leaf_order(
2331 struct xfs_buf *leaf1_bp,
2332 struct xfs_buf *leaf2_bp)
2333{
2334 xfs_attr_leafblock_t *leaf1, *leaf2;
2335
2336 leaf1 = leaf1_bp->b_addr;
2337 leaf2 = leaf2_bp->b_addr;
2338 ASSERT((leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) &&
2339 (leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)));
2340 if ((be16_to_cpu(leaf1->hdr.count) > 0) &&
2341 (be16_to_cpu(leaf2->hdr.count) > 0) &&
2342 ((be32_to_cpu(leaf2->entries[0].hashval) <
2343 be32_to_cpu(leaf1->entries[0].hashval)) ||
2344 (be32_to_cpu(leaf2->entries[
2345 be16_to_cpu(leaf2->hdr.count)-1].hashval) <
2346 be32_to_cpu(leaf1->entries[
2347 be16_to_cpu(leaf1->hdr.count)-1].hashval)))) {
2348 return(1);
2349 }
2350 return(0);
2351} 2548}
2352 2549
2353/* 2550/*
@@ -2358,15 +2555,16 @@ xfs_attr_leaf_lasthash(
2358 struct xfs_buf *bp, 2555 struct xfs_buf *bp,
2359 int *count) 2556 int *count)
2360{ 2557{
2361 xfs_attr_leafblock_t *leaf; 2558 struct xfs_attr3_icleaf_hdr ichdr;
2559 struct xfs_attr_leaf_entry *entries;
2362 2560
2363 leaf = bp->b_addr; 2561 xfs_attr3_leaf_hdr_from_disk(&ichdr, bp->b_addr);
2364 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 2562 entries = xfs_attr3_leaf_entryp(bp->b_addr);
2365 if (count) 2563 if (count)
2366 *count = be16_to_cpu(leaf->hdr.count); 2564 *count = ichdr.count;
2367 if (!leaf->hdr.count) 2565 if (!ichdr.count)
2368 return(0); 2566 return 0;
2369 return be32_to_cpu(leaf->entries[be16_to_cpu(leaf->hdr.count)-1].hashval); 2567 return be32_to_cpu(entries[ichdr.count - 1].hashval);
2370} 2568}
2371 2569
2372/* 2570/*
@@ -2376,20 +2574,21 @@ xfs_attr_leaf_lasthash(
2376STATIC int 2574STATIC int
2377xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index) 2575xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index)
2378{ 2576{
2577 struct xfs_attr_leaf_entry *entries;
2379 xfs_attr_leaf_name_local_t *name_loc; 2578 xfs_attr_leaf_name_local_t *name_loc;
2380 xfs_attr_leaf_name_remote_t *name_rmt; 2579 xfs_attr_leaf_name_remote_t *name_rmt;
2381 int size; 2580 int size;
2382 2581
2383 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 2582 entries = xfs_attr3_leaf_entryp(leaf);
2384 if (leaf->entries[index].flags & XFS_ATTR_LOCAL) { 2583 if (entries[index].flags & XFS_ATTR_LOCAL) {
2385 name_loc = xfs_attr_leaf_name_local(leaf, index); 2584 name_loc = xfs_attr3_leaf_name_local(leaf, index);
2386 size = xfs_attr_leaf_entsize_local(name_loc->namelen, 2585 size = xfs_attr_leaf_entsize_local(name_loc->namelen,
2387 be16_to_cpu(name_loc->valuelen)); 2586 be16_to_cpu(name_loc->valuelen));
2388 } else { 2587 } else {
2389 name_rmt = xfs_attr_leaf_name_remote(leaf, index); 2588 name_rmt = xfs_attr3_leaf_name_remote(leaf, index);
2390 size = xfs_attr_leaf_entsize_remote(name_rmt->namelen); 2589 size = xfs_attr_leaf_entsize_remote(name_rmt->namelen);
2391 } 2590 }
2392 return(size); 2591 return size;
2393} 2592}
2394 2593
2395/* 2594/*
@@ -2414,35 +2613,40 @@ xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, int *local)
2414 *local = 0; 2613 *local = 0;
2415 } 2614 }
2416 } 2615 }
2417 return(size); 2616 return size;
2418} 2617}
2419 2618
2420/* 2619/*
2421 * Copy out attribute list entries for attr_list(), for leaf attribute lists. 2620 * Copy out attribute list entries for attr_list(), for leaf attribute lists.
2422 */ 2621 */
2423int 2622int
2424xfs_attr_leaf_list_int( 2623xfs_attr3_leaf_list_int(
2425 struct xfs_buf *bp, 2624 struct xfs_buf *bp,
2426 xfs_attr_list_context_t *context) 2625 struct xfs_attr_list_context *context)
2427{ 2626{
2428 attrlist_cursor_kern_t *cursor; 2627 struct attrlist_cursor_kern *cursor;
2429 xfs_attr_leafblock_t *leaf; 2628 struct xfs_attr_leafblock *leaf;
2430 xfs_attr_leaf_entry_t *entry; 2629 struct xfs_attr3_icleaf_hdr ichdr;
2431 int retval, i; 2630 struct xfs_attr_leaf_entry *entries;
2631 struct xfs_attr_leaf_entry *entry;
2632 int retval;
2633 int i;
2634
2635 trace_xfs_attr_list_leaf(context);
2432 2636
2433 ASSERT(bp != NULL);
2434 leaf = bp->b_addr; 2637 leaf = bp->b_addr;
2638 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
2639 entries = xfs_attr3_leaf_entryp(leaf);
2640
2435 cursor = context->cursor; 2641 cursor = context->cursor;
2436 cursor->initted = 1; 2642 cursor->initted = 1;
2437 2643
2438 trace_xfs_attr_list_leaf(context);
2439
2440 /* 2644 /*
2441 * Re-find our place in the leaf block if this is a new syscall. 2645 * Re-find our place in the leaf block if this is a new syscall.
2442 */ 2646 */
2443 if (context->resynch) { 2647 if (context->resynch) {
2444 entry = &leaf->entries[0]; 2648 entry = &entries[0];
2445 for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) { 2649 for (i = 0; i < ichdr.count; entry++, i++) {
2446 if (be32_to_cpu(entry->hashval) == cursor->hashval) { 2650 if (be32_to_cpu(entry->hashval) == cursor->hashval) {
2447 if (cursor->offset == context->dupcnt) { 2651 if (cursor->offset == context->dupcnt) {
2448 context->dupcnt = 0; 2652 context->dupcnt = 0;
@@ -2455,12 +2659,12 @@ xfs_attr_leaf_list_int(
2455 break; 2659 break;
2456 } 2660 }
2457 } 2661 }
2458 if (i == be16_to_cpu(leaf->hdr.count)) { 2662 if (i == ichdr.count) {
2459 trace_xfs_attr_list_notfound(context); 2663 trace_xfs_attr_list_notfound(context);
2460 return(0); 2664 return 0;
2461 } 2665 }
2462 } else { 2666 } else {
2463 entry = &leaf->entries[0]; 2667 entry = &entries[0];
2464 i = 0; 2668 i = 0;
2465 } 2669 }
2466 context->resynch = 0; 2670 context->resynch = 0;
@@ -2469,7 +2673,7 @@ xfs_attr_leaf_list_int(
2469 * We have found our place, start copying out the new attributes. 2673 * We have found our place, start copying out the new attributes.
2470 */ 2674 */
2471 retval = 0; 2675 retval = 0;
2472 for ( ; (i < be16_to_cpu(leaf->hdr.count)); entry++, i++) { 2676 for (; i < ichdr.count; entry++, i++) {
2473 if (be32_to_cpu(entry->hashval) != cursor->hashval) { 2677 if (be32_to_cpu(entry->hashval) != cursor->hashval) {
2474 cursor->hashval = be32_to_cpu(entry->hashval); 2678 cursor->hashval = be32_to_cpu(entry->hashval);
2475 cursor->offset = 0; 2679 cursor->offset = 0;
@@ -2480,7 +2684,7 @@ xfs_attr_leaf_list_int(
2480 2684
2481 if (entry->flags & XFS_ATTR_LOCAL) { 2685 if (entry->flags & XFS_ATTR_LOCAL) {
2482 xfs_attr_leaf_name_local_t *name_loc = 2686 xfs_attr_leaf_name_local_t *name_loc =
2483 xfs_attr_leaf_name_local(leaf, i); 2687 xfs_attr3_leaf_name_local(leaf, i);
2484 2688
2485 retval = context->put_listent(context, 2689 retval = context->put_listent(context,
2486 entry->flags, 2690 entry->flags,
@@ -2492,7 +2696,7 @@ xfs_attr_leaf_list_int(
2492 return retval; 2696 return retval;
2493 } else { 2697 } else {
2494 xfs_attr_leaf_name_remote_t *name_rmt = 2698 xfs_attr_leaf_name_remote_t *name_rmt =
2495 xfs_attr_leaf_name_remote(leaf, i); 2699 xfs_attr3_leaf_name_remote(leaf, i);
2496 2700
2497 int valuelen = be32_to_cpu(name_rmt->valuelen); 2701 int valuelen = be32_to_cpu(name_rmt->valuelen);
2498 2702
@@ -2532,7 +2736,7 @@ xfs_attr_leaf_list_int(
2532 cursor->offset++; 2736 cursor->offset++;
2533 } 2737 }
2534 trace_xfs_attr_list_leaf_end(context); 2738 trace_xfs_attr_list_leaf_end(context);
2535 return(retval); 2739 return retval;
2536} 2740}
2537 2741
2538 2742
@@ -2544,14 +2748,16 @@ xfs_attr_leaf_list_int(
2544 * Clear the INCOMPLETE flag on an entry in a leaf block. 2748 * Clear the INCOMPLETE flag on an entry in a leaf block.
2545 */ 2749 */
2546int 2750int
2547xfs_attr_leaf_clearflag(xfs_da_args_t *args) 2751xfs_attr3_leaf_clearflag(
2752 struct xfs_da_args *args)
2548{ 2753{
2549 xfs_attr_leafblock_t *leaf; 2754 struct xfs_attr_leafblock *leaf;
2550 xfs_attr_leaf_entry_t *entry; 2755 struct xfs_attr_leaf_entry *entry;
2551 xfs_attr_leaf_name_remote_t *name_rmt; 2756 struct xfs_attr_leaf_name_remote *name_rmt;
2552 struct xfs_buf *bp; 2757 struct xfs_buf *bp;
2553 int error; 2758 int error;
2554#ifdef DEBUG 2759#ifdef DEBUG
2760 struct xfs_attr3_icleaf_hdr ichdr;
2555 xfs_attr_leaf_name_local_t *name_loc; 2761 xfs_attr_leaf_name_local_t *name_loc;
2556 int namelen; 2762 int namelen;
2557 char *name; 2763 char *name;
@@ -2561,23 +2767,25 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
2561 /* 2767 /*
2562 * Set up the operation. 2768 * Set up the operation.
2563 */ 2769 */
2564 error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); 2770 error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
2565 if (error) 2771 if (error)
2566 return(error); 2772 return(error);
2567 2773
2568 leaf = bp->b_addr; 2774 leaf = bp->b_addr;
2569 ASSERT(args->index < be16_to_cpu(leaf->hdr.count)); 2775 entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
2570 ASSERT(args->index >= 0);
2571 entry = &leaf->entries[ args->index ];
2572 ASSERT(entry->flags & XFS_ATTR_INCOMPLETE); 2776 ASSERT(entry->flags & XFS_ATTR_INCOMPLETE);
2573 2777
2574#ifdef DEBUG 2778#ifdef DEBUG
2779 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
2780 ASSERT(args->index < ichdr.count);
2781 ASSERT(args->index >= 0);
2782
2575 if (entry->flags & XFS_ATTR_LOCAL) { 2783 if (entry->flags & XFS_ATTR_LOCAL) {
2576 name_loc = xfs_attr_leaf_name_local(leaf, args->index); 2784 name_loc = xfs_attr3_leaf_name_local(leaf, args->index);
2577 namelen = name_loc->namelen; 2785 namelen = name_loc->namelen;
2578 name = (char *)name_loc->nameval; 2786 name = (char *)name_loc->nameval;
2579 } else { 2787 } else {
2580 name_rmt = xfs_attr_leaf_name_remote(leaf, args->index); 2788 name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
2581 namelen = name_rmt->namelen; 2789 namelen = name_rmt->namelen;
2582 name = (char *)name_rmt->name; 2790 name = (char *)name_rmt->name;
2583 } 2791 }
@@ -2592,7 +2800,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
2592 2800
2593 if (args->rmtblkno) { 2801 if (args->rmtblkno) {
2594 ASSERT((entry->flags & XFS_ATTR_LOCAL) == 0); 2802 ASSERT((entry->flags & XFS_ATTR_LOCAL) == 0);
2595 name_rmt = xfs_attr_leaf_name_remote(leaf, args->index); 2803 name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
2596 name_rmt->valueblk = cpu_to_be32(args->rmtblkno); 2804 name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
2597 name_rmt->valuelen = cpu_to_be32(args->valuelen); 2805 name_rmt->valuelen = cpu_to_be32(args->valuelen);
2598 xfs_trans_log_buf(args->trans, bp, 2806 xfs_trans_log_buf(args->trans, bp,
@@ -2609,34 +2817,41 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
2609 * Set the INCOMPLETE flag on an entry in a leaf block. 2817 * Set the INCOMPLETE flag on an entry in a leaf block.
2610 */ 2818 */
2611int 2819int
2612xfs_attr_leaf_setflag(xfs_da_args_t *args) 2820xfs_attr3_leaf_setflag(
2821 struct xfs_da_args *args)
2613{ 2822{
2614 xfs_attr_leafblock_t *leaf; 2823 struct xfs_attr_leafblock *leaf;
2615 xfs_attr_leaf_entry_t *entry; 2824 struct xfs_attr_leaf_entry *entry;
2616 xfs_attr_leaf_name_remote_t *name_rmt; 2825 struct xfs_attr_leaf_name_remote *name_rmt;
2617 struct xfs_buf *bp; 2826 struct xfs_buf *bp;
2618 int error; 2827 int error;
2828#ifdef DEBUG
2829 struct xfs_attr3_icleaf_hdr ichdr;
2830#endif
2619 2831
2620 trace_xfs_attr_leaf_setflag(args); 2832 trace_xfs_attr_leaf_setflag(args);
2621 2833
2622 /* 2834 /*
2623 * Set up the operation. 2835 * Set up the operation.
2624 */ 2836 */
2625 error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); 2837 error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
2626 if (error) 2838 if (error)
2627 return(error); 2839 return(error);
2628 2840
2629 leaf = bp->b_addr; 2841 leaf = bp->b_addr;
2630 ASSERT(args->index < be16_to_cpu(leaf->hdr.count)); 2842#ifdef DEBUG
2843 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
2844 ASSERT(args->index < ichdr.count);
2631 ASSERT(args->index >= 0); 2845 ASSERT(args->index >= 0);
2632 entry = &leaf->entries[ args->index ]; 2846#endif
2847 entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
2633 2848
2634 ASSERT((entry->flags & XFS_ATTR_INCOMPLETE) == 0); 2849 ASSERT((entry->flags & XFS_ATTR_INCOMPLETE) == 0);
2635 entry->flags |= XFS_ATTR_INCOMPLETE; 2850 entry->flags |= XFS_ATTR_INCOMPLETE;
2636 xfs_trans_log_buf(args->trans, bp, 2851 xfs_trans_log_buf(args->trans, bp,
2637 XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry))); 2852 XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
2638 if ((entry->flags & XFS_ATTR_LOCAL) == 0) { 2853 if ((entry->flags & XFS_ATTR_LOCAL) == 0) {
2639 name_rmt = xfs_attr_leaf_name_remote(leaf, args->index); 2854 name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
2640 name_rmt->valueblk = 0; 2855 name_rmt->valueblk = 0;
2641 name_rmt->valuelen = 0; 2856 name_rmt->valuelen = 0;
2642 xfs_trans_log_buf(args->trans, bp, 2857 xfs_trans_log_buf(args->trans, bp,
@@ -2657,14 +2872,20 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args)
2657 * Note that they could be in different blocks, or in the same block. 2872 * Note that they could be in different blocks, or in the same block.
2658 */ 2873 */
2659int 2874int
2660xfs_attr_leaf_flipflags(xfs_da_args_t *args) 2875xfs_attr3_leaf_flipflags(
2876 struct xfs_da_args *args)
2661{ 2877{
2662 xfs_attr_leafblock_t *leaf1, *leaf2; 2878 struct xfs_attr_leafblock *leaf1;
2663 xfs_attr_leaf_entry_t *entry1, *entry2; 2879 struct xfs_attr_leafblock *leaf2;
2664 xfs_attr_leaf_name_remote_t *name_rmt; 2880 struct xfs_attr_leaf_entry *entry1;
2665 struct xfs_buf *bp1, *bp2; 2881 struct xfs_attr_leaf_entry *entry2;
2882 struct xfs_attr_leaf_name_remote *name_rmt;
2883 struct xfs_buf *bp1;
2884 struct xfs_buf *bp2;
2666 int error; 2885 int error;
2667#ifdef DEBUG 2886#ifdef DEBUG
2887 struct xfs_attr3_icleaf_hdr ichdr1;
2888 struct xfs_attr3_icleaf_hdr ichdr2;
2668 xfs_attr_leaf_name_local_t *name_loc; 2889 xfs_attr_leaf_name_local_t *name_loc;
2669 int namelen1, namelen2; 2890 int namelen1, namelen2;
2670 char *name1, *name2; 2891 char *name1, *name2;
@@ -2675,7 +2896,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
2675 /* 2896 /*
2676 * Read the block containing the "old" attr 2897 * Read the block containing the "old" attr
2677 */ 2898 */
2678 error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp1); 2899 error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp1);
2679 if (error) 2900 if (error)
2680 return error; 2901 return error;
2681 2902
@@ -2683,7 +2904,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
2683 * Read the block containing the "new" attr, if it is different 2904 * Read the block containing the "new" attr, if it is different
2684 */ 2905 */
2685 if (args->blkno2 != args->blkno) { 2906 if (args->blkno2 != args->blkno) {
2686 error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno2, 2907 error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno2,
2687 -1, &bp2); 2908 -1, &bp2);
2688 if (error) 2909 if (error)
2689 return error; 2910 return error;
@@ -2692,31 +2913,35 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
2692 } 2913 }
2693 2914
2694 leaf1 = bp1->b_addr; 2915 leaf1 = bp1->b_addr;
2695 ASSERT(args->index < be16_to_cpu(leaf1->hdr.count)); 2916 entry1 = &xfs_attr3_leaf_entryp(leaf1)[args->index];
2696 ASSERT(args->index >= 0);
2697 entry1 = &leaf1->entries[ args->index ];
2698 2917
2699 leaf2 = bp2->b_addr; 2918 leaf2 = bp2->b_addr;
2700 ASSERT(args->index2 < be16_to_cpu(leaf2->hdr.count)); 2919 entry2 = &xfs_attr3_leaf_entryp(leaf2)[args->index2];
2701 ASSERT(args->index2 >= 0);
2702 entry2 = &leaf2->entries[ args->index2 ];
2703 2920
2704#ifdef DEBUG 2921#ifdef DEBUG
2922 xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1);
2923 ASSERT(args->index < ichdr1.count);
2924 ASSERT(args->index >= 0);
2925
2926 xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2);
2927 ASSERT(args->index2 < ichdr2.count);
2928 ASSERT(args->index2 >= 0);
2929
2705 if (entry1->flags & XFS_ATTR_LOCAL) { 2930 if (entry1->flags & XFS_ATTR_LOCAL) {
2706 name_loc = xfs_attr_leaf_name_local(leaf1, args->index); 2931 name_loc = xfs_attr3_leaf_name_local(leaf1, args->index);
2707 namelen1 = name_loc->namelen; 2932 namelen1 = name_loc->namelen;
2708 name1 = (char *)name_loc->nameval; 2933 name1 = (char *)name_loc->nameval;
2709 } else { 2934 } else {
2710 name_rmt = xfs_attr_leaf_name_remote(leaf1, args->index); 2935 name_rmt = xfs_attr3_leaf_name_remote(leaf1, args->index);
2711 namelen1 = name_rmt->namelen; 2936 namelen1 = name_rmt->namelen;
2712 name1 = (char *)name_rmt->name; 2937 name1 = (char *)name_rmt->name;
2713 } 2938 }
2714 if (entry2->flags & XFS_ATTR_LOCAL) { 2939 if (entry2->flags & XFS_ATTR_LOCAL) {
2715 name_loc = xfs_attr_leaf_name_local(leaf2, args->index2); 2940 name_loc = xfs_attr3_leaf_name_local(leaf2, args->index2);
2716 namelen2 = name_loc->namelen; 2941 namelen2 = name_loc->namelen;
2717 name2 = (char *)name_loc->nameval; 2942 name2 = (char *)name_loc->nameval;
2718 } else { 2943 } else {
2719 name_rmt = xfs_attr_leaf_name_remote(leaf2, args->index2); 2944 name_rmt = xfs_attr3_leaf_name_remote(leaf2, args->index2);
2720 namelen2 = name_rmt->namelen; 2945 namelen2 = name_rmt->namelen;
2721 name2 = (char *)name_rmt->name; 2946 name2 = (char *)name_rmt->name;
2722 } 2947 }
@@ -2733,7 +2958,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
2733 XFS_DA_LOGRANGE(leaf1, entry1, sizeof(*entry1))); 2958 XFS_DA_LOGRANGE(leaf1, entry1, sizeof(*entry1)));
2734 if (args->rmtblkno) { 2959 if (args->rmtblkno) {
2735 ASSERT((entry1->flags & XFS_ATTR_LOCAL) == 0); 2960 ASSERT((entry1->flags & XFS_ATTR_LOCAL) == 0);
2736 name_rmt = xfs_attr_leaf_name_remote(leaf1, args->index); 2961 name_rmt = xfs_attr3_leaf_name_remote(leaf1, args->index);
2737 name_rmt->valueblk = cpu_to_be32(args->rmtblkno); 2962 name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
2738 name_rmt->valuelen = cpu_to_be32(args->valuelen); 2963 name_rmt->valuelen = cpu_to_be32(args->valuelen);
2739 xfs_trans_log_buf(args->trans, bp1, 2964 xfs_trans_log_buf(args->trans, bp1,
@@ -2744,7 +2969,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
2744 xfs_trans_log_buf(args->trans, bp2, 2969 xfs_trans_log_buf(args->trans, bp2,
2745 XFS_DA_LOGRANGE(leaf2, entry2, sizeof(*entry2))); 2970 XFS_DA_LOGRANGE(leaf2, entry2, sizeof(*entry2)));
2746 if ((entry2->flags & XFS_ATTR_LOCAL) == 0) { 2971 if ((entry2->flags & XFS_ATTR_LOCAL) == 0) {
2747 name_rmt = xfs_attr_leaf_name_remote(leaf2, args->index2); 2972 name_rmt = xfs_attr3_leaf_name_remote(leaf2, args->index2);
2748 name_rmt->valueblk = 0; 2973 name_rmt->valueblk = 0;
2749 name_rmt->valuelen = 0; 2974 name_rmt->valuelen = 0;
2750 xfs_trans_log_buf(args->trans, bp2, 2975 xfs_trans_log_buf(args->trans, bp2,
@@ -2756,7 +2981,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
2756 */ 2981 */
2757 error = xfs_trans_roll(&args->trans, args->dp); 2982 error = xfs_trans_roll(&args->trans, args->dp);
2758 2983
2759 return(error); 2984 return error;
2760} 2985}
2761 2986
2762/*======================================================================== 2987/*========================================================================
@@ -2768,12 +2993,14 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
2768 * We're doing a depth-first traversal in order to invalidate everything. 2993 * We're doing a depth-first traversal in order to invalidate everything.
2769 */ 2994 */
2770int 2995int
2771xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp) 2996xfs_attr3_root_inactive(
2997 struct xfs_trans **trans,
2998 struct xfs_inode *dp)
2772{ 2999{
2773 xfs_da_blkinfo_t *info; 3000 struct xfs_da_blkinfo *info;
2774 xfs_daddr_t blkno; 3001 struct xfs_buf *bp;
2775 struct xfs_buf *bp; 3002 xfs_daddr_t blkno;
2776 int error; 3003 int error;
2777 3004
2778 /* 3005 /*
2779 * Read block 0 to see what we have to work with. 3006 * Read block 0 to see what we have to work with.
@@ -2781,40 +3008,46 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp)
2781 * the extents in reverse order the extent containing 3008 * the extents in reverse order the extent containing
2782 * block 0 must still be there. 3009 * block 0 must still be there.
2783 */ 3010 */
2784 error = xfs_da_node_read(*trans, dp, 0, -1, &bp, XFS_ATTR_FORK); 3011 error = xfs_da3_node_read(*trans, dp, 0, -1, &bp, XFS_ATTR_FORK);
2785 if (error) 3012 if (error)
2786 return(error); 3013 return error;
2787 blkno = XFS_BUF_ADDR(bp); 3014 blkno = bp->b_bn;
2788 3015
2789 /* 3016 /*
2790 * Invalidate the tree, even if the "tree" is only a single leaf block. 3017 * Invalidate the tree, even if the "tree" is only a single leaf block.
2791 * This is a depth-first traversal! 3018 * This is a depth-first traversal!
2792 */ 3019 */
2793 info = bp->b_addr; 3020 info = bp->b_addr;
2794 if (info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) { 3021 switch (info->magic) {
2795 error = xfs_attr_node_inactive(trans, dp, bp, 1); 3022 case cpu_to_be16(XFS_DA_NODE_MAGIC):
2796 } else if (info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) { 3023 case cpu_to_be16(XFS_DA3_NODE_MAGIC):
2797 error = xfs_attr_leaf_inactive(trans, dp, bp); 3024 error = xfs_attr3_node_inactive(trans, dp, bp, 1);
2798 } else { 3025 break;
3026 case cpu_to_be16(XFS_ATTR_LEAF_MAGIC):
3027 case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC):
3028 error = xfs_attr3_leaf_inactive(trans, dp, bp);
3029 break;
3030 default:
2799 error = XFS_ERROR(EIO); 3031 error = XFS_ERROR(EIO);
2800 xfs_trans_brelse(*trans, bp); 3032 xfs_trans_brelse(*trans, bp);
3033 break;
2801 } 3034 }
2802 if (error) 3035 if (error)
2803 return(error); 3036 return error;
2804 3037
2805 /* 3038 /*
2806 * Invalidate the incore copy of the root block. 3039 * Invalidate the incore copy of the root block.
2807 */ 3040 */
2808 error = xfs_da_get_buf(*trans, dp, 0, blkno, &bp, XFS_ATTR_FORK); 3041 error = xfs_da_get_buf(*trans, dp, 0, blkno, &bp, XFS_ATTR_FORK);
2809 if (error) 3042 if (error)
2810 return(error); 3043 return error;
2811 xfs_trans_binval(*trans, bp); /* remove from cache */ 3044 xfs_trans_binval(*trans, bp); /* remove from cache */
2812 /* 3045 /*
2813 * Commit the invalidate and start the next transaction. 3046 * Commit the invalidate and start the next transaction.
2814 */ 3047 */
2815 error = xfs_trans_roll(trans, dp); 3048 error = xfs_trans_roll(trans, dp);
2816 3049
2817 return (error); 3050 return error;
2818} 3051}
2819 3052
2820/* 3053/*
@@ -2822,7 +3055,7 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp)
2822 * We're doing a depth-first traversal in order to invalidate everything. 3055 * We're doing a depth-first traversal in order to invalidate everything.
2823 */ 3056 */
2824STATIC int 3057STATIC int
2825xfs_attr_node_inactive( 3058xfs_attr3_node_inactive(
2826 struct xfs_trans **trans, 3059 struct xfs_trans **trans,
2827 struct xfs_inode *dp, 3060 struct xfs_inode *dp,
2828 struct xfs_buf *bp, 3061 struct xfs_buf *bp,
@@ -2832,26 +3065,28 @@ xfs_attr_node_inactive(
2832 xfs_da_intnode_t *node; 3065 xfs_da_intnode_t *node;
2833 xfs_dablk_t child_fsb; 3066 xfs_dablk_t child_fsb;
2834 xfs_daddr_t parent_blkno, child_blkno; 3067 xfs_daddr_t parent_blkno, child_blkno;
2835 int error, count, i; 3068 int error, i;
2836 struct xfs_buf *child_bp; 3069 struct xfs_buf *child_bp;
3070 struct xfs_da_node_entry *btree;
3071 struct xfs_da3_icnode_hdr ichdr;
2837 3072
2838 /* 3073 /*
2839 * Since this code is recursive (gasp!) we must protect ourselves. 3074 * Since this code is recursive (gasp!) we must protect ourselves.
2840 */ 3075 */
2841 if (level > XFS_DA_NODE_MAXDEPTH) { 3076 if (level > XFS_DA_NODE_MAXDEPTH) {
2842 xfs_trans_brelse(*trans, bp); /* no locks for later trans */ 3077 xfs_trans_brelse(*trans, bp); /* no locks for later trans */
2843 return(XFS_ERROR(EIO)); 3078 return XFS_ERROR(EIO);
2844 } 3079 }
2845 3080
2846 node = bp->b_addr; 3081 node = bp->b_addr;
2847 ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 3082 xfs_da3_node_hdr_from_disk(&ichdr, node);
2848 parent_blkno = XFS_BUF_ADDR(bp); /* save for re-read later */ 3083 parent_blkno = bp->b_bn;
2849 count = be16_to_cpu(node->hdr.count); 3084 if (!ichdr.count) {
2850 if (!count) {
2851 xfs_trans_brelse(*trans, bp); 3085 xfs_trans_brelse(*trans, bp);
2852 return(0); 3086 return 0;
2853 } 3087 }
2854 child_fsb = be32_to_cpu(node->btree[0].before); 3088 btree = xfs_da3_node_tree_p(node);
3089 child_fsb = be32_to_cpu(btree[0].before);
2855 xfs_trans_brelse(*trans, bp); /* no locks for later trans */ 3090 xfs_trans_brelse(*trans, bp); /* no locks for later trans */
2856 3091
2857 /* 3092 /*
@@ -2859,14 +3094,14 @@ xfs_attr_node_inactive(
2859 * over the leaves removing all of them. If this is higher up 3094 * over the leaves removing all of them. If this is higher up
2860 * in the tree, recurse downward. 3095 * in the tree, recurse downward.
2861 */ 3096 */
2862 for (i = 0; i < count; i++) { 3097 for (i = 0; i < ichdr.count; i++) {
2863 /* 3098 /*
2864 * Read the subsidiary block to see what we have to work with. 3099 * Read the subsidiary block to see what we have to work with.
2865 * Don't do this in a transaction. This is a depth-first 3100 * Don't do this in a transaction. This is a depth-first
2866 * traversal of the tree so we may deal with many blocks 3101 * traversal of the tree so we may deal with many blocks
2867 * before we come back to this one. 3102 * before we come back to this one.
2868 */ 3103 */
2869 error = xfs_da_node_read(*trans, dp, child_fsb, -2, &child_bp, 3104 error = xfs_da3_node_read(*trans, dp, child_fsb, -2, &child_bp,
2870 XFS_ATTR_FORK); 3105 XFS_ATTR_FORK);
2871 if (error) 3106 if (error)
2872 return(error); 3107 return(error);
@@ -2878,18 +3113,24 @@ xfs_attr_node_inactive(
2878 * Invalidate the subtree, however we have to. 3113 * Invalidate the subtree, however we have to.
2879 */ 3114 */
2880 info = child_bp->b_addr; 3115 info = child_bp->b_addr;
2881 if (info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) { 3116 switch (info->magic) {
2882 error = xfs_attr_node_inactive(trans, dp, 3117 case cpu_to_be16(XFS_DA_NODE_MAGIC):
2883 child_bp, level+1); 3118 case cpu_to_be16(XFS_DA3_NODE_MAGIC):
2884 } else if (info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) { 3119 error = xfs_attr3_node_inactive(trans, dp,
2885 error = xfs_attr_leaf_inactive(trans, dp, 3120 child_bp, level + 1);
2886 child_bp); 3121 break;
2887 } else { 3122 case cpu_to_be16(XFS_ATTR_LEAF_MAGIC):
3123 case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC):
3124 error = xfs_attr3_leaf_inactive(trans, dp,
3125 child_bp);
3126 break;
3127 default:
2888 error = XFS_ERROR(EIO); 3128 error = XFS_ERROR(EIO);
2889 xfs_trans_brelse(*trans, child_bp); 3129 xfs_trans_brelse(*trans, child_bp);
3130 break;
2890 } 3131 }
2891 if (error) 3132 if (error)
2892 return(error); 3133 return error;
2893 3134
2894 /* 3135 /*
2895 * Remove the subsidiary block from the cache 3136 * Remove the subsidiary block from the cache
@@ -2898,7 +3139,7 @@ xfs_attr_node_inactive(
2898 error = xfs_da_get_buf(*trans, dp, 0, child_blkno, 3139 error = xfs_da_get_buf(*trans, dp, 0, child_blkno,
2899 &child_bp, XFS_ATTR_FORK); 3140 &child_bp, XFS_ATTR_FORK);
2900 if (error) 3141 if (error)
2901 return(error); 3142 return error;
2902 xfs_trans_binval(*trans, child_bp); 3143 xfs_trans_binval(*trans, child_bp);
2903 } 3144 }
2904 3145
@@ -2906,12 +3147,12 @@ xfs_attr_node_inactive(
2906 * If we're not done, re-read the parent to get the next 3147 * If we're not done, re-read the parent to get the next
2907 * child block number. 3148 * child block number.
2908 */ 3149 */
2909 if ((i+1) < count) { 3150 if (i + 1 < ichdr.count) {
2910 error = xfs_da_node_read(*trans, dp, 0, parent_blkno, 3151 error = xfs_da3_node_read(*trans, dp, 0, parent_blkno,
2911 &bp, XFS_ATTR_FORK); 3152 &bp, XFS_ATTR_FORK);
2912 if (error) 3153 if (error)
2913 return(error); 3154 return error;
2914 child_fsb = be32_to_cpu(node->btree[i+1].before); 3155 child_fsb = be32_to_cpu(btree[i + 1].before);
2915 xfs_trans_brelse(*trans, bp); 3156 xfs_trans_brelse(*trans, bp);
2916 } 3157 }
2917 /* 3158 /*
@@ -2919,10 +3160,10 @@ xfs_attr_node_inactive(
2919 */ 3160 */
2920 error = xfs_trans_roll(trans, dp); 3161 error = xfs_trans_roll(trans, dp);
2921 if (error) 3162 if (error)
2922 return (error); 3163 return error;
2923 } 3164 }
2924 3165
2925 return(0); 3166 return 0;
2926} 3167}
2927 3168
2928/* 3169/*
@@ -2932,29 +3173,35 @@ xfs_attr_node_inactive(
2932 * caught holding something that the logging code wants to flush to disk. 3173 * caught holding something that the logging code wants to flush to disk.
2933 */ 3174 */
2934STATIC int 3175STATIC int
2935xfs_attr_leaf_inactive( 3176xfs_attr3_leaf_inactive(
2936 struct xfs_trans **trans, 3177 struct xfs_trans **trans,
2937 struct xfs_inode *dp, 3178 struct xfs_inode *dp,
2938 struct xfs_buf *bp) 3179 struct xfs_buf *bp)
2939{ 3180{
2940 xfs_attr_leafblock_t *leaf; 3181 struct xfs_attr_leafblock *leaf;
2941 xfs_attr_leaf_entry_t *entry; 3182 struct xfs_attr3_icleaf_hdr ichdr;
2942 xfs_attr_leaf_name_remote_t *name_rmt; 3183 struct xfs_attr_leaf_entry *entry;
2943 xfs_attr_inactive_list_t *list, *lp; 3184 struct xfs_attr_leaf_name_remote *name_rmt;
2944 int error, count, size, tmp, i; 3185 struct xfs_attr_inactive_list *list;
3186 struct xfs_attr_inactive_list *lp;
3187 int error;
3188 int count;
3189 int size;
3190 int tmp;
3191 int i;
2945 3192
2946 leaf = bp->b_addr; 3193 leaf = bp->b_addr;
2947 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 3194 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
2948 3195
2949 /* 3196 /*
2950 * Count the number of "remote" value extents. 3197 * Count the number of "remote" value extents.
2951 */ 3198 */
2952 count = 0; 3199 count = 0;
2953 entry = &leaf->entries[0]; 3200 entry = xfs_attr3_leaf_entryp(leaf);
2954 for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) { 3201 for (i = 0; i < ichdr.count; entry++, i++) {
2955 if (be16_to_cpu(entry->nameidx) && 3202 if (be16_to_cpu(entry->nameidx) &&
2956 ((entry->flags & XFS_ATTR_LOCAL) == 0)) { 3203 ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
2957 name_rmt = xfs_attr_leaf_name_remote(leaf, i); 3204 name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
2958 if (name_rmt->valueblk) 3205 if (name_rmt->valueblk)
2959 count++; 3206 count++;
2960 } 3207 }
@@ -2965,24 +3212,24 @@ xfs_attr_leaf_inactive(
2965 */ 3212 */
2966 if (count == 0) { 3213 if (count == 0) {
2967 xfs_trans_brelse(*trans, bp); 3214 xfs_trans_brelse(*trans, bp);
2968 return(0); 3215 return 0;
2969 } 3216 }
2970 3217
2971 /* 3218 /*
2972 * Allocate storage for a list of all the "remote" value extents. 3219 * Allocate storage for a list of all the "remote" value extents.
2973 */ 3220 */
2974 size = count * sizeof(xfs_attr_inactive_list_t); 3221 size = count * sizeof(xfs_attr_inactive_list_t);
2975 list = (xfs_attr_inactive_list_t *)kmem_alloc(size, KM_SLEEP); 3222 list = kmem_alloc(size, KM_SLEEP);
2976 3223
2977 /* 3224 /*
2978 * Identify each of the "remote" value extents. 3225 * Identify each of the "remote" value extents.
2979 */ 3226 */
2980 lp = list; 3227 lp = list;
2981 entry = &leaf->entries[0]; 3228 entry = xfs_attr3_leaf_entryp(leaf);
2982 for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) { 3229 for (i = 0; i < ichdr.count; entry++, i++) {
2983 if (be16_to_cpu(entry->nameidx) && 3230 if (be16_to_cpu(entry->nameidx) &&
2984 ((entry->flags & XFS_ATTR_LOCAL) == 0)) { 3231 ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
2985 name_rmt = xfs_attr_leaf_name_remote(leaf, i); 3232 name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
2986 if (name_rmt->valueblk) { 3233 if (name_rmt->valueblk) {
2987 lp->valueblk = be32_to_cpu(name_rmt->valueblk); 3234 lp->valueblk = be32_to_cpu(name_rmt->valueblk);
2988 lp->valuelen = XFS_B_TO_FSB(dp->i_mount, 3235 lp->valuelen = XFS_B_TO_FSB(dp->i_mount,
@@ -2998,15 +3245,15 @@ xfs_attr_leaf_inactive(
2998 */ 3245 */
2999 error = 0; 3246 error = 0;
3000 for (lp = list, i = 0; i < count; i++, lp++) { 3247 for (lp = list, i = 0; i < count; i++, lp++) {
3001 tmp = xfs_attr_leaf_freextent(trans, dp, 3248 tmp = xfs_attr3_leaf_freextent(trans, dp,
3002 lp->valueblk, lp->valuelen); 3249 lp->valueblk, lp->valuelen);
3003 3250
3004 if (error == 0) 3251 if (error == 0)
3005 error = tmp; /* save only the 1st errno */ 3252 error = tmp; /* save only the 1st errno */
3006 } 3253 }
3007 3254
3008 kmem_free((xfs_caddr_t)list); 3255 kmem_free(list);
3009 return(error); 3256 return error;
3010} 3257}
3011 3258
3012/* 3259/*
@@ -3014,14 +3261,20 @@ xfs_attr_leaf_inactive(
3014 * invalidate any buffers that are incore/in transactions. 3261 * invalidate any buffers that are incore/in transactions.
3015 */ 3262 */
3016STATIC int 3263STATIC int
3017xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp, 3264xfs_attr3_leaf_freextent(
3018 xfs_dablk_t blkno, int blkcnt) 3265 struct xfs_trans **trans,
3266 struct xfs_inode *dp,
3267 xfs_dablk_t blkno,
3268 int blkcnt)
3019{ 3269{
3020 xfs_bmbt_irec_t map; 3270 struct xfs_bmbt_irec map;
3021 xfs_dablk_t tblkno; 3271 struct xfs_buf *bp;
3022 int tblkcnt, dblkcnt, nmap, error; 3272 xfs_dablk_t tblkno;
3023 xfs_daddr_t dblkno; 3273 xfs_daddr_t dblkno;
3024 xfs_buf_t *bp; 3274 int tblkcnt;
3275 int dblkcnt;
3276 int nmap;
3277 int error;
3025 3278
3026 /* 3279 /*
3027 * Roll through the "value", invalidating the attribute value's 3280 * Roll through the "value", invalidating the attribute value's
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index 77de139a58f0..f9d7846097e2 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000,2002-2003,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000,2002-2003,2005 Silicon Graphics, Inc.
3 * Copyright (c) 2013 Red Hat, Inc.
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -89,7 +90,7 @@ typedef struct xfs_attr_leaf_hdr { /* constant-structure header block */
89 90
90typedef struct xfs_attr_leaf_entry { /* sorted on key, not name */ 91typedef struct xfs_attr_leaf_entry { /* sorted on key, not name */
91 __be32 hashval; /* hash value of name */ 92 __be32 hashval; /* hash value of name */
92 __be16 nameidx; /* index into buffer of name/value */ 93 __be16 nameidx; /* index into buffer of name/value */
93 __u8 flags; /* LOCAL/ROOT/SECURE/INCOMPLETE flag */ 94 __u8 flags; /* LOCAL/ROOT/SECURE/INCOMPLETE flag */
94 __u8 pad2; /* unused pad byte */ 95 __u8 pad2; /* unused pad byte */
95} xfs_attr_leaf_entry_t; 96} xfs_attr_leaf_entry_t;
@@ -115,6 +116,54 @@ typedef struct xfs_attr_leafblock {
115} xfs_attr_leafblock_t; 116} xfs_attr_leafblock_t;
116 117
117/* 118/*
119 * CRC enabled leaf structures. Called "version 3" structures to match the
120 * version number of the directory and dablk structures for this feature, and
121 * attr2 is already taken by the variable inode attribute fork size feature.
122 */
123struct xfs_attr3_leaf_hdr {
124 struct xfs_da3_blkinfo info;
125 __be16 count;
126 __be16 usedbytes;
127 __be16 firstused;
128 __u8 holes;
129 __u8 pad1;
130 struct xfs_attr_leaf_map freemap[XFS_ATTR_LEAF_MAPSIZE];
131};
132
133#define XFS_ATTR3_LEAF_CRC_OFF (offsetof(struct xfs_attr3_leaf_hdr, info.crc))
134
135struct xfs_attr3_leafblock {
136 struct xfs_attr3_leaf_hdr hdr;
137 struct xfs_attr_leaf_entry entries[1];
138
139 /*
140 * The rest of the block contains the following structures after the
141 * leaf entries, growing from the bottom up. The variables are never
142 * referenced, the locations accessed purely from helper functions.
143 *
144 * struct xfs_attr_leaf_name_local
145 * struct xfs_attr_leaf_name_remote
146 */
147};
148
149/*
150 * incore, neutral version of the attribute leaf header
151 */
152struct xfs_attr3_icleaf_hdr {
153 __uint32_t forw;
154 __uint32_t back;
155 __uint16_t magic;
156 __uint16_t count;
157 __uint16_t usedbytes;
158 __uint16_t firstused;
159 __u8 holes;
160 struct {
161 __uint16_t base;
162 __uint16_t size;
163 } freemap[XFS_ATTR_LEAF_MAPSIZE];
164};
165
166/*
118 * Flags used in the leaf_entry[i].flags field. 167 * Flags used in the leaf_entry[i].flags field.
119 * NOTE: the INCOMPLETE bit must not collide with the flags bits specified 168 * NOTE: the INCOMPLETE bit must not collide with the flags bits specified
120 * on the system call, they are "or"ed together for various operations. 169 * on the system call, they are "or"ed together for various operations.
@@ -147,26 +196,43 @@ typedef struct xfs_attr_leafblock {
147 */ 196 */
148#define XFS_ATTR_LEAF_NAME_ALIGN ((uint)sizeof(xfs_dablk_t)) 197#define XFS_ATTR_LEAF_NAME_ALIGN ((uint)sizeof(xfs_dablk_t))
149 198
199static inline int
200xfs_attr3_leaf_hdr_size(struct xfs_attr_leafblock *leafp)
201{
202 if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC))
203 return sizeof(struct xfs_attr3_leaf_hdr);
204 return sizeof(struct xfs_attr_leaf_hdr);
205}
206
207static inline struct xfs_attr_leaf_entry *
208xfs_attr3_leaf_entryp(xfs_attr_leafblock_t *leafp)
209{
210 if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC))
211 return &((struct xfs_attr3_leafblock *)leafp)->entries[0];
212 return &leafp->entries[0];
213}
214
150/* 215/*
151 * Cast typed pointers for "local" and "remote" name/value structs. 216 * Cast typed pointers for "local" and "remote" name/value structs.
152 */ 217 */
153static inline xfs_attr_leaf_name_remote_t * 218static inline char *
154xfs_attr_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx) 219xfs_attr3_leaf_name(xfs_attr_leafblock_t *leafp, int idx)
155{ 220{
156 return (xfs_attr_leaf_name_remote_t *) 221 struct xfs_attr_leaf_entry *entries = xfs_attr3_leaf_entryp(leafp);
157 &((char *)leafp)[be16_to_cpu(leafp->entries[idx].nameidx)]; 222
223 return &((char *)leafp)[be16_to_cpu(entries[idx].nameidx)];
158} 224}
159 225
160static inline xfs_attr_leaf_name_local_t * 226static inline xfs_attr_leaf_name_remote_t *
161xfs_attr_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx) 227xfs_attr3_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx)
162{ 228{
163 return (xfs_attr_leaf_name_local_t *) 229 return (xfs_attr_leaf_name_remote_t *)xfs_attr3_leaf_name(leafp, idx);
164 &((char *)leafp)[be16_to_cpu(leafp->entries[idx].nameidx)];
165} 230}
166 231
167static inline char *xfs_attr_leaf_name(xfs_attr_leafblock_t *leafp, int idx) 232static inline xfs_attr_leaf_name_local_t *
233xfs_attr3_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx)
168{ 234{
169 return &((char *)leafp)[be16_to_cpu(leafp->entries[idx].nameidx)]; 235 return (xfs_attr_leaf_name_local_t *)xfs_attr3_leaf_name(leafp, idx);
170} 236}
171 237
172/* 238/*
@@ -221,37 +287,37 @@ int xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes);
221/* 287/*
222 * Internal routines when attribute fork size == XFS_LBSIZE(mp). 288 * Internal routines when attribute fork size == XFS_LBSIZE(mp).
223 */ 289 */
224int xfs_attr_leaf_to_node(struct xfs_da_args *args); 290int xfs_attr3_leaf_to_node(struct xfs_da_args *args);
225int xfs_attr_leaf_to_shortform(struct xfs_buf *bp, 291int xfs_attr3_leaf_to_shortform(struct xfs_buf *bp,
226 struct xfs_da_args *args, int forkoff); 292 struct xfs_da_args *args, int forkoff);
227int xfs_attr_leaf_clearflag(struct xfs_da_args *args); 293int xfs_attr3_leaf_clearflag(struct xfs_da_args *args);
228int xfs_attr_leaf_setflag(struct xfs_da_args *args); 294int xfs_attr3_leaf_setflag(struct xfs_da_args *args);
229int xfs_attr_leaf_flipflags(xfs_da_args_t *args); 295int xfs_attr3_leaf_flipflags(struct xfs_da_args *args);
230 296
231/* 297/*
232 * Routines used for growing the Btree. 298 * Routines used for growing the Btree.
233 */ 299 */
234int xfs_attr_leaf_split(struct xfs_da_state *state, 300int xfs_attr3_leaf_split(struct xfs_da_state *state,
235 struct xfs_da_state_blk *oldblk, 301 struct xfs_da_state_blk *oldblk,
236 struct xfs_da_state_blk *newblk); 302 struct xfs_da_state_blk *newblk);
237int xfs_attr_leaf_lookup_int(struct xfs_buf *leaf, 303int xfs_attr3_leaf_lookup_int(struct xfs_buf *leaf,
238 struct xfs_da_args *args); 304 struct xfs_da_args *args);
239int xfs_attr_leaf_getvalue(struct xfs_buf *bp, struct xfs_da_args *args); 305int xfs_attr3_leaf_getvalue(struct xfs_buf *bp, struct xfs_da_args *args);
240int xfs_attr_leaf_add(struct xfs_buf *leaf_buffer, 306int xfs_attr3_leaf_add(struct xfs_buf *leaf_buffer,
241 struct xfs_da_args *args); 307 struct xfs_da_args *args);
242int xfs_attr_leaf_remove(struct xfs_buf *leaf_buffer, 308int xfs_attr3_leaf_remove(struct xfs_buf *leaf_buffer,
243 struct xfs_da_args *args); 309 struct xfs_da_args *args);
244int xfs_attr_leaf_list_int(struct xfs_buf *bp, 310int xfs_attr3_leaf_list_int(struct xfs_buf *bp,
245 struct xfs_attr_list_context *context); 311 struct xfs_attr_list_context *context);
246 312
247/* 313/*
248 * Routines used for shrinking the Btree. 314 * Routines used for shrinking the Btree.
249 */ 315 */
250int xfs_attr_leaf_toosmall(struct xfs_da_state *state, int *retval); 316int xfs_attr3_leaf_toosmall(struct xfs_da_state *state, int *retval);
251void xfs_attr_leaf_unbalance(struct xfs_da_state *state, 317void xfs_attr3_leaf_unbalance(struct xfs_da_state *state,
252 struct xfs_da_state_blk *drop_blk, 318 struct xfs_da_state_blk *drop_blk,
253 struct xfs_da_state_blk *save_blk); 319 struct xfs_da_state_blk *save_blk);
254int xfs_attr_root_inactive(struct xfs_trans **trans, struct xfs_inode *dp); 320int xfs_attr3_root_inactive(struct xfs_trans **trans, struct xfs_inode *dp);
255 321
256/* 322/*
257 * Utility routines. 323 * Utility routines.
@@ -261,10 +327,12 @@ int xfs_attr_leaf_order(struct xfs_buf *leaf1_bp,
261 struct xfs_buf *leaf2_bp); 327 struct xfs_buf *leaf2_bp);
262int xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, 328int xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize,
263 int *local); 329 int *local);
264int xfs_attr_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp, 330int xfs_attr3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp,
265 xfs_dablk_t bno, xfs_daddr_t mappedbno, 331 xfs_dablk_t bno, xfs_daddr_t mappedbno,
266 struct xfs_buf **bpp); 332 struct xfs_buf **bpp);
333void xfs_attr3_leaf_hdr_from_disk(struct xfs_attr3_icleaf_hdr *to,
334 struct xfs_attr_leafblock *from);
267 335
268extern const struct xfs_buf_ops xfs_attr_leaf_buf_ops; 336extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops;
269 337
270#endif /* __XFS_ATTR_LEAF_H__ */ 338#endif /* __XFS_ATTR_LEAF_H__ */
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c
new file mode 100644
index 000000000000..dee84466dcc9
--- /dev/null
+++ b/fs/xfs/xfs_attr_remote.c
@@ -0,0 +1,541 @@
1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * Copyright (c) 2013 Red Hat, Inc.
4 * All Rights Reserved.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it would be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19#include "xfs.h"
20#include "xfs_fs.h"
21#include "xfs_types.h"
22#include "xfs_bit.h"
23#include "xfs_log.h"
24#include "xfs_trans.h"
25#include "xfs_sb.h"
26#include "xfs_ag.h"
27#include "xfs_mount.h"
28#include "xfs_error.h"
29#include "xfs_da_btree.h"
30#include "xfs_bmap_btree.h"
31#include "xfs_dinode.h"
32#include "xfs_inode.h"
33#include "xfs_alloc.h"
34#include "xfs_inode_item.h"
35#include "xfs_bmap.h"
36#include "xfs_attr.h"
37#include "xfs_attr_leaf.h"
38#include "xfs_attr_remote.h"
39#include "xfs_trans_space.h"
40#include "xfs_trace.h"
41#include "xfs_cksum.h"
42#include "xfs_buf_item.h"
43
44#define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */
45
46/*
47 * Each contiguous block has a header, so it is not just a simple attribute
48 * length to FSB conversion.
49 */
50static int
51xfs_attr3_rmt_blocks(
52 struct xfs_mount *mp,
53 int attrlen)
54{
55 int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp,
56 mp->m_sb.sb_blocksize);
57 return (attrlen + buflen - 1) / buflen;
58}
59
60static bool
61xfs_attr3_rmt_verify(
62 struct xfs_buf *bp)
63{
64 struct xfs_mount *mp = bp->b_target->bt_mount;
65 struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
66
67 if (!xfs_sb_version_hascrc(&mp->m_sb))
68 return false;
69 if (rmt->rm_magic != cpu_to_be32(XFS_ATTR3_RMT_MAGIC))
70 return false;
71 if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_uuid))
72 return false;
73 if (bp->b_bn != be64_to_cpu(rmt->rm_blkno))
74 return false;
75 if (be32_to_cpu(rmt->rm_offset) +
76 be32_to_cpu(rmt->rm_bytes) >= XATTR_SIZE_MAX)
77 return false;
78 if (rmt->rm_owner == 0)
79 return false;
80
81 return true;
82}
83
84static void
85xfs_attr3_rmt_read_verify(
86 struct xfs_buf *bp)
87{
88 struct xfs_mount *mp = bp->b_target->bt_mount;
89
90 /* no verification of non-crc buffers */
91 if (!xfs_sb_version_hascrc(&mp->m_sb))
92 return;
93
94 if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
95 XFS_ATTR3_RMT_CRC_OFF) ||
96 !xfs_attr3_rmt_verify(bp)) {
97 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
98 xfs_buf_ioerror(bp, EFSCORRUPTED);
99 }
100}
101
102static void
103xfs_attr3_rmt_write_verify(
104 struct xfs_buf *bp)
105{
106 struct xfs_mount *mp = bp->b_target->bt_mount;
107 struct xfs_buf_log_item *bip = bp->b_fspriv;
108
109 /* no verification of non-crc buffers */
110 if (!xfs_sb_version_hascrc(&mp->m_sb))
111 return;
112
113 if (!xfs_attr3_rmt_verify(bp)) {
114 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
115 xfs_buf_ioerror(bp, EFSCORRUPTED);
116 return;
117 }
118
119 if (bip) {
120 struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
121 rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn);
122 }
123 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
124 XFS_ATTR3_RMT_CRC_OFF);
125}
126
127const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
128 .verify_read = xfs_attr3_rmt_read_verify,
129 .verify_write = xfs_attr3_rmt_write_verify,
130};
131
132static int
133xfs_attr3_rmt_hdr_set(
134 struct xfs_mount *mp,
135 xfs_ino_t ino,
136 uint32_t offset,
137 uint32_t size,
138 struct xfs_buf *bp)
139{
140 struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
141
142 if (!xfs_sb_version_hascrc(&mp->m_sb))
143 return 0;
144
145 rmt->rm_magic = cpu_to_be32(XFS_ATTR3_RMT_MAGIC);
146 rmt->rm_offset = cpu_to_be32(offset);
147 rmt->rm_bytes = cpu_to_be32(size);
148 uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_uuid);
149 rmt->rm_owner = cpu_to_be64(ino);
150 rmt->rm_blkno = cpu_to_be64(bp->b_bn);
151 bp->b_ops = &xfs_attr3_rmt_buf_ops;
152
153 return sizeof(struct xfs_attr3_rmt_hdr);
154}
155
156/*
157 * Checking of the remote attribute header is split into two parts. the verifier
158 * does CRC, location and bounds checking, the unpacking function checks the
159 * attribute parameters and owner.
160 */
161static bool
162xfs_attr3_rmt_hdr_ok(
163 struct xfs_mount *mp,
164 xfs_ino_t ino,
165 uint32_t offset,
166 uint32_t size,
167 struct xfs_buf *bp)
168{
169 struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
170
171 if (offset != be32_to_cpu(rmt->rm_offset))
172 return false;
173 if (size != be32_to_cpu(rmt->rm_bytes))
174 return false;
175 if (ino != be64_to_cpu(rmt->rm_owner))
176 return false;
177
178 /* ok */
179 return true;
180}
181
182/*
183 * Read the value associated with an attribute from the out-of-line buffer
184 * that we stored it in.
185 */
186int
187xfs_attr_rmtval_get(
188 struct xfs_da_args *args)
189{
190 struct xfs_bmbt_irec map[ATTR_RMTVALUE_MAPSIZE];
191 struct xfs_mount *mp = args->dp->i_mount;
192 struct xfs_buf *bp;
193 xfs_daddr_t dblkno;
194 xfs_dablk_t lblkno = args->rmtblkno;
195 void *dst = args->value;
196 int valuelen = args->valuelen;
197 int nmap;
198 int error;
199 int blkcnt;
200 int i;
201 int offset = 0;
202
203 trace_xfs_attr_rmtval_get(args);
204
205 ASSERT(!(args->flags & ATTR_KERNOVAL));
206
207 while (valuelen > 0) {
208 nmap = ATTR_RMTVALUE_MAPSIZE;
209 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
210 args->rmtblkcnt, map, &nmap,
211 XFS_BMAPI_ATTRFORK);
212 if (error)
213 return error;
214 ASSERT(nmap >= 1);
215
216 for (i = 0; (i < nmap) && (valuelen > 0); i++) {
217 int byte_cnt;
218 char *src;
219
220 ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) &&
221 (map[i].br_startblock != HOLESTARTBLOCK));
222 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
223 blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
224 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
225 dblkno, blkcnt, 0, &bp,
226 &xfs_attr3_rmt_buf_ops);
227 if (error)
228 return error;
229
230 byte_cnt = min_t(int, valuelen, BBTOB(bp->b_length));
231 byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, byte_cnt);
232
233 src = bp->b_addr;
234 if (xfs_sb_version_hascrc(&mp->m_sb)) {
235 if (!xfs_attr3_rmt_hdr_ok(mp, args->dp->i_ino,
236 offset, byte_cnt, bp)) {
237 xfs_alert(mp,
238"remote attribute header does not match required off/len/owner (0x%x/Ox%x,0x%llx)",
239 offset, byte_cnt, args->dp->i_ino);
240 xfs_buf_relse(bp);
241 return EFSCORRUPTED;
242
243 }
244
245 src += sizeof(struct xfs_attr3_rmt_hdr);
246 }
247
248 memcpy(dst, src, byte_cnt);
249 xfs_buf_relse(bp);
250
251 offset += byte_cnt;
252 dst += byte_cnt;
253 valuelen -= byte_cnt;
254
255 lblkno += map[i].br_blockcount;
256 }
257 }
258 ASSERT(valuelen == 0);
259 return 0;
260}
261
262/*
263 * Write the value associated with an attribute into the out-of-line buffer
264 * that we have defined for it.
265 */
266int
267xfs_attr_rmtval_set(
268 struct xfs_da_args *args)
269{
270 struct xfs_inode *dp = args->dp;
271 struct xfs_mount *mp = dp->i_mount;
272 struct xfs_bmbt_irec map;
273 struct xfs_buf *bp;
274 xfs_daddr_t dblkno;
275 xfs_dablk_t lblkno;
276 xfs_fileoff_t lfileoff = 0;
277 void *src = args->value;
278 int blkcnt;
279 int valuelen;
280 int nmap;
281 int error;
282 int hdrcnt = 0;
283 bool crcs = xfs_sb_version_hascrc(&mp->m_sb);
284 int offset = 0;
285
286 trace_xfs_attr_rmtval_set(args);
287
288 /*
289 * Find a "hole" in the attribute address space large enough for
290 * us to drop the new attribute's value into. Because CRC enable
291 * attributes have headers, we can't just do a straight byte to FSB
292 * conversion. We calculate the worst case block count in this case
293 * and we may not need that many, so we have to handle this when
294 * allocating the blocks below.
295 */
296 if (!crcs)
297 blkcnt = XFS_B_TO_FSB(mp, args->valuelen);
298 else
299 blkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
300
301 error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
302 XFS_ATTR_FORK);
303 if (error)
304 return error;
305
306 /* Start with the attribute data. We'll allocate the rest afterwards. */
307 if (crcs)
308 blkcnt = XFS_B_TO_FSB(mp, args->valuelen);
309
310 args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff;
311 args->rmtblkcnt = blkcnt;
312
313 /*
314 * Roll through the "value", allocating blocks on disk as required.
315 */
316 while (blkcnt > 0) {
317 int committed;
318
319 /*
320 * Allocate a single extent, up to the size of the value.
321 */
322 xfs_bmap_init(args->flist, args->firstblock);
323 nmap = 1;
324 error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
325 blkcnt,
326 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
327 args->firstblock, args->total, &map, &nmap,
328 args->flist);
329 if (!error) {
330 error = xfs_bmap_finish(&args->trans, args->flist,
331 &committed);
332 }
333 if (error) {
334 ASSERT(committed);
335 args->trans = NULL;
336 xfs_bmap_cancel(args->flist);
337 return(error);
338 }
339
340 /*
341 * bmap_finish() may have committed the last trans and started
342 * a new one. We need the inode to be in all transactions.
343 */
344 if (committed)
345 xfs_trans_ijoin(args->trans, dp, 0);
346
347 ASSERT(nmap == 1);
348 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
349 (map.br_startblock != HOLESTARTBLOCK));
350 lblkno += map.br_blockcount;
351 blkcnt -= map.br_blockcount;
352 hdrcnt++;
353
354 /*
355 * If we have enough blocks for the attribute data, calculate
356 * how many extra blocks we need for headers. We might run
357 * through this multiple times in the case that the additional
358 * headers in the blocks needed for the data fragments spills
359 * into requiring more blocks. e.g. for 512 byte blocks, we'll
360 * spill for another block every 9 headers we require in this
361 * loop.
362 */
363 if (crcs && blkcnt == 0) {
364 int total_len;
365
366 total_len = args->valuelen +
367 hdrcnt * sizeof(struct xfs_attr3_rmt_hdr);
368 blkcnt = XFS_B_TO_FSB(mp, total_len);
369 blkcnt -= args->rmtblkcnt;
370 args->rmtblkcnt += blkcnt;
371 }
372
373 /*
374 * Start the next trans in the chain.
375 */
376 error = xfs_trans_roll(&args->trans, dp);
377 if (error)
378 return (error);
379 }
380
381 /*
382 * Roll through the "value", copying the attribute value to the
383 * already-allocated blocks. Blocks are written synchronously
384 * so that we can know they are all on disk before we turn off
385 * the INCOMPLETE flag.
386 */
387 lblkno = args->rmtblkno;
388 valuelen = args->valuelen;
389 while (valuelen > 0) {
390 int byte_cnt;
391 char *buf;
392
393 /*
394 * Try to remember where we decided to put the value.
395 */
396 xfs_bmap_init(args->flist, args->firstblock);
397 nmap = 1;
398 error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
399 args->rmtblkcnt, &map, &nmap,
400 XFS_BMAPI_ATTRFORK);
401 if (error)
402 return(error);
403 ASSERT(nmap == 1);
404 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
405 (map.br_startblock != HOLESTARTBLOCK));
406
407 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
408 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
409
410 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, 0);
411 if (!bp)
412 return ENOMEM;
413 bp->b_ops = &xfs_attr3_rmt_buf_ops;
414
415 byte_cnt = BBTOB(bp->b_length);
416 byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, byte_cnt);
417 if (valuelen < byte_cnt)
418 byte_cnt = valuelen;
419
420 buf = bp->b_addr;
421 buf += xfs_attr3_rmt_hdr_set(mp, dp->i_ino, offset,
422 byte_cnt, bp);
423 memcpy(buf, src, byte_cnt);
424
425 if (byte_cnt < BBTOB(bp->b_length))
426 xfs_buf_zero(bp, byte_cnt,
427 BBTOB(bp->b_length) - byte_cnt);
428
429 error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */
430 xfs_buf_relse(bp);
431 if (error)
432 return error;
433
434 src += byte_cnt;
435 valuelen -= byte_cnt;
436 offset += byte_cnt;
437 hdrcnt--;
438
439 lblkno += map.br_blockcount;
440 }
441 ASSERT(valuelen == 0);
442 ASSERT(hdrcnt == 0);
443 return 0;
444}
445
446/*
447 * Remove the value associated with an attribute by deleting the
448 * out-of-line buffer that it is stored on.
449 */
450int
451xfs_attr_rmtval_remove(xfs_da_args_t *args)
452{
453 xfs_mount_t *mp;
454 xfs_bmbt_irec_t map;
455 xfs_buf_t *bp;
456 xfs_daddr_t dblkno;
457 xfs_dablk_t lblkno;
458 int valuelen, blkcnt, nmap, error, done, committed;
459
460 trace_xfs_attr_rmtval_remove(args);
461
462 mp = args->dp->i_mount;
463
464 /*
465 * Roll through the "value", invalidating the attribute value's
466 * blocks.
467 */
468 lblkno = args->rmtblkno;
469 valuelen = args->rmtblkcnt;
470 while (valuelen > 0) {
471 /*
472 * Try to remember where we decided to put the value.
473 */
474 nmap = 1;
475 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
476 args->rmtblkcnt, &map, &nmap,
477 XFS_BMAPI_ATTRFORK);
478 if (error)
479 return(error);
480 ASSERT(nmap == 1);
481 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
482 (map.br_startblock != HOLESTARTBLOCK));
483
484 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
485 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
486
487 /*
488 * If the "remote" value is in the cache, remove it.
489 */
490 bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, XBF_TRYLOCK);
491 if (bp) {
492 xfs_buf_stale(bp);
493 xfs_buf_relse(bp);
494 bp = NULL;
495 }
496
497 valuelen -= map.br_blockcount;
498
499 lblkno += map.br_blockcount;
500 }
501
502 /*
503 * Keep de-allocating extents until the remote-value region is gone.
504 */
505 lblkno = args->rmtblkno;
506 blkcnt = args->rmtblkcnt;
507 done = 0;
508 while (!done) {
509 xfs_bmap_init(args->flist, args->firstblock);
510 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
511 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
512 1, args->firstblock, args->flist,
513 &done);
514 if (!error) {
515 error = xfs_bmap_finish(&args->trans, args->flist,
516 &committed);
517 }
518 if (error) {
519 ASSERT(committed);
520 args->trans = NULL;
521 xfs_bmap_cancel(args->flist);
522 return error;
523 }
524
525 /*
526 * bmap_finish() may have committed the last trans and started
527 * a new one. We need the inode to be in all transactions.
528 */
529 if (committed)
530 xfs_trans_ijoin(args->trans, args->dp, 0);
531
532 /*
533 * Close out trans and start the next one in the chain.
534 */
535 error = xfs_trans_roll(&args->trans, args->dp);
536 if (error)
537 return (error);
538 }
539 return(0);
540}
541
diff --git a/fs/xfs/xfs_attr_remote.h b/fs/xfs/xfs_attr_remote.h
new file mode 100644
index 000000000000..c7cca60a062a
--- /dev/null
+++ b/fs/xfs/xfs_attr_remote.h
@@ -0,0 +1,46 @@
1/*
2 * Copyright (c) 2013 Red Hat, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_ATTR_REMOTE_H__
19#define __XFS_ATTR_REMOTE_H__
20
21#define XFS_ATTR3_RMT_MAGIC 0x5841524d /* XARM */
22
23struct xfs_attr3_rmt_hdr {
24 __be32 rm_magic;
25 __be32 rm_offset;
26 __be32 rm_bytes;
27 __be32 rm_crc;
28 uuid_t rm_uuid;
29 __be64 rm_owner;
30 __be64 rm_blkno;
31 __be64 rm_lsn;
32};
33
34#define XFS_ATTR3_RMT_CRC_OFF offsetof(struct xfs_attr3_rmt_hdr, rm_crc)
35
36#define XFS_ATTR3_RMT_BUF_SPACE(mp, bufsize) \
37 ((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \
38 sizeof(struct xfs_attr3_rmt_hdr) : 0))
39
40extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops;
41
42int xfs_attr_rmtval_get(struct xfs_da_args *args);
43int xfs_attr_rmtval_set(struct xfs_da_args *args);
44int xfs_attr_rmtval_remove(struct xfs_da_args *args);
45
46#endif /* __XFS_ATTR_REMOTE_H__ */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index b44af9211bd9..89042848f9ec 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -25,6 +25,7 @@
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h" 27#include "xfs_dir2.h"
28#include "xfs_mount.h"
28#include "xfs_da_btree.h" 29#include "xfs_da_btree.h"
29#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
30#include "xfs_alloc_btree.h" 31#include "xfs_alloc_btree.h"
@@ -47,180 +48,78 @@
47#include "xfs_filestream.h" 48#include "xfs_filestream.h"
48#include "xfs_vnodeops.h" 49#include "xfs_vnodeops.h"
49#include "xfs_trace.h" 50#include "xfs_trace.h"
51#include "xfs_symlink.h"
50 52
51 53
52kmem_zone_t *xfs_bmap_free_item_zone; 54kmem_zone_t *xfs_bmap_free_item_zone;
53 55
54/* 56/*
55 * Prototypes for internal bmap routines. 57 * Miscellaneous helper functions
56 */
57
58#ifdef DEBUG
59STATIC void
60xfs_bmap_check_leaf_extents(
61 struct xfs_btree_cur *cur,
62 struct xfs_inode *ip,
63 int whichfork);
64#else
65#define xfs_bmap_check_leaf_extents(cur, ip, whichfork) do { } while (0)
66#endif
67
68
69/*
70 * Called from xfs_bmap_add_attrfork to handle extents format files.
71 */
72STATIC int /* error */
73xfs_bmap_add_attrfork_extents(
74 xfs_trans_t *tp, /* transaction pointer */
75 xfs_inode_t *ip, /* incore inode pointer */
76 xfs_fsblock_t *firstblock, /* first block allocated */
77 xfs_bmap_free_t *flist, /* blocks to free at commit */
78 int *flags); /* inode logging flags */
79
80/*
81 * Called from xfs_bmap_add_attrfork to handle local format files.
82 */ 58 */
83STATIC int /* error */
84xfs_bmap_add_attrfork_local(
85 xfs_trans_t *tp, /* transaction pointer */
86 xfs_inode_t *ip, /* incore inode pointer */
87 xfs_fsblock_t *firstblock, /* first block allocated */
88 xfs_bmap_free_t *flist, /* blocks to free at commit */
89 int *flags); /* inode logging flags */
90 59
91/* 60/*
92 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. 61 * Compute and fill in the value of the maximum depth of a bmap btree
93 * It figures out where to ask the underlying allocator to put the new extent. 62 * in this filesystem. Done once, during mount.
94 */
95STATIC int /* error */
96xfs_bmap_alloc(
97 xfs_bmalloca_t *ap); /* bmap alloc argument struct */
98
99/*
100 * Transform a btree format file with only one leaf node, where the
101 * extents list will fit in the inode, into an extents format file.
102 * Since the file extents are already in-core, all we have to do is
103 * give up the space for the btree root and pitch the leaf block.
104 */
105STATIC int /* error */
106xfs_bmap_btree_to_extents(
107 xfs_trans_t *tp, /* transaction pointer */
108 xfs_inode_t *ip, /* incore inode pointer */
109 xfs_btree_cur_t *cur, /* btree cursor */
110 int *logflagsp, /* inode logging flags */
111 int whichfork); /* data or attr fork */
112
113/*
114 * Remove the entry "free" from the free item list. Prev points to the
115 * previous entry, unless "free" is the head of the list.
116 */
117STATIC void
118xfs_bmap_del_free(
119 xfs_bmap_free_t *flist, /* free item list header */
120 xfs_bmap_free_item_t *prev, /* previous item on list, if any */
121 xfs_bmap_free_item_t *free); /* list item to be freed */
122
123/*
124 * Convert an extents-format file into a btree-format file.
125 * The new file will have a root block (in the inode) and a single child block.
126 */
127STATIC int /* error */
128xfs_bmap_extents_to_btree(
129 xfs_trans_t *tp, /* transaction pointer */
130 xfs_inode_t *ip, /* incore inode pointer */
131 xfs_fsblock_t *firstblock, /* first-block-allocated */
132 xfs_bmap_free_t *flist, /* blocks freed in xaction */
133 xfs_btree_cur_t **curp, /* cursor returned to caller */
134 int wasdel, /* converting a delayed alloc */
135 int *logflagsp, /* inode logging flags */
136 int whichfork); /* data or attr fork */
137
138/*
139 * Convert a local file to an extents file.
140 * This code is sort of bogus, since the file data needs to get
141 * logged so it won't be lost. The bmap-level manipulations are ok, though.
142 */
143STATIC int /* error */
144xfs_bmap_local_to_extents(
145 xfs_trans_t *tp, /* transaction pointer */
146 xfs_inode_t *ip, /* incore inode pointer */
147 xfs_fsblock_t *firstblock, /* first block allocated in xaction */
148 xfs_extlen_t total, /* total blocks needed by transaction */
149 int *logflagsp, /* inode logging flags */
150 int whichfork, /* data or attr fork */
151 void (*init_fn)(struct xfs_buf *bp,
152 struct xfs_inode *ip,
153 struct xfs_ifork *ifp));
154
155/*
156 * Search the extents list for the inode, for the extent containing bno.
157 * If bno lies in a hole, point to the next entry. If bno lies past eof,
158 * *eofp will be set, and *prevp will contain the last entry (null if none).
159 * Else, *lastxp will be set to the index of the found
160 * entry; *gotp will contain the entry.
161 */
162STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */
163xfs_bmap_search_extents(
164 xfs_inode_t *ip, /* incore inode pointer */
165 xfs_fileoff_t bno, /* block number searched for */
166 int whichfork, /* data or attr fork */
167 int *eofp, /* out: end of file found */
168 xfs_extnum_t *lastxp, /* out: last extent index */
169 xfs_bmbt_irec_t *gotp, /* out: extent entry found */
170 xfs_bmbt_irec_t *prevp); /* out: previous extent entry found */
171
172/*
173 * Compute the worst-case number of indirect blocks that will be used
174 * for ip's delayed extent of length "len".
175 */
176STATIC xfs_filblks_t
177xfs_bmap_worst_indlen(
178 xfs_inode_t *ip, /* incore inode pointer */
179 xfs_filblks_t len); /* delayed extent length */
180
181#ifdef DEBUG
182/*
183 * Perform various validation checks on the values being returned
184 * from xfs_bmapi().
185 */ 63 */
186STATIC void 64void
187xfs_bmap_validate_ret( 65xfs_bmap_compute_maxlevels(
188 xfs_fileoff_t bno, 66 xfs_mount_t *mp, /* file system mount structure */
189 xfs_filblks_t len, 67 int whichfork) /* data or attr fork */
190 int flags, 68{
191 xfs_bmbt_irec_t *mval, 69 int level; /* btree level */
192 int nmap, 70 uint maxblocks; /* max blocks at this level */
193 int ret_nmap); 71 uint maxleafents; /* max leaf entries possible */
194#else 72 int maxrootrecs; /* max records in root block */
195#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) 73 int minleafrecs; /* min records in leaf block */
196#endif /* DEBUG */ 74 int minnoderecs; /* min records in node block */
197 75 int sz; /* root block size */
198STATIC int
199xfs_bmap_count_tree(
200 xfs_mount_t *mp,
201 xfs_trans_t *tp,
202 xfs_ifork_t *ifp,
203 xfs_fsblock_t blockno,
204 int levelin,
205 int *count);
206
207STATIC void
208xfs_bmap_count_leaves(
209 xfs_ifork_t *ifp,
210 xfs_extnum_t idx,
211 int numrecs,
212 int *count);
213 76
214STATIC void 77 /*
215xfs_bmap_disk_count_leaves( 78 * The maximum number of extents in a file, hence the maximum
216 struct xfs_mount *mp, 79 * number of leaf entries, is controlled by the type of di_nextents
217 struct xfs_btree_block *block, 80 * (a signed 32-bit number, xfs_extnum_t), or by di_anextents
218 int numrecs, 81 * (a signed 16-bit number, xfs_aextnum_t).
219 int *count); 82 *
83 * Note that we can no longer assume that if we are in ATTR1 that
84 * the fork offset of all the inodes will be
85 * (xfs_default_attroffset(ip) >> 3) because we could have mounted
86 * with ATTR2 and then mounted back with ATTR1, keeping the
87 * di_forkoff's fixed but probably at various positions. Therefore,
88 * for both ATTR1 and ATTR2 we have to assume the worst case scenario
89 * of a minimum size available.
90 */
91 if (whichfork == XFS_DATA_FORK) {
92 maxleafents = MAXEXTNUM;
93 sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
94 } else {
95 maxleafents = MAXAEXTNUM;
96 sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
97 }
98 maxrootrecs = xfs_bmdr_maxrecs(mp, sz, 0);
99 minleafrecs = mp->m_bmap_dmnr[0];
100 minnoderecs = mp->m_bmap_dmnr[1];
101 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
102 for (level = 1; maxblocks > 1; level++) {
103 if (maxblocks <= maxrootrecs)
104 maxblocks = 1;
105 else
106 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
107 }
108 mp->m_bm_maxlevels[whichfork] = level;
109}
220 110
221/* 111/*
222 * Bmap internal routines. 112 * Convert the given file system block to a disk block. We have to treat it
113 * differently based on whether the file is a real time file or not, because the
114 * bmap code does.
223 */ 115 */
116xfs_daddr_t
117xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
118{
119 return (XFS_IS_REALTIME_INODE(ip) ? \
120 (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \
121 XFS_FSB_TO_DADDR((ip)->i_mount, (fsb)));
122}
224 123
225STATIC int /* error */ 124STATIC int /* error */
226xfs_bmbt_lookup_eq( 125xfs_bmbt_lookup_eq(
@@ -290,6 +189,1070 @@ xfs_bmbt_update(
290} 189}
291 190
292/* 191/*
192 * Compute the worst-case number of indirect blocks that will be used
193 * for ip's delayed extent of length "len".
194 */
195STATIC xfs_filblks_t
196xfs_bmap_worst_indlen(
197 xfs_inode_t *ip, /* incore inode pointer */
198 xfs_filblks_t len) /* delayed extent length */
199{
200 int level; /* btree level number */
201 int maxrecs; /* maximum record count at this level */
202 xfs_mount_t *mp; /* mount structure */
203 xfs_filblks_t rval; /* return value */
204
205 mp = ip->i_mount;
206 maxrecs = mp->m_bmap_dmxr[0];
207 for (level = 0, rval = 0;
208 level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
209 level++) {
210 len += maxrecs - 1;
211 do_div(len, maxrecs);
212 rval += len;
213 if (len == 1)
214 return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
215 level - 1;
216 if (level == 0)
217 maxrecs = mp->m_bmap_dmxr[1];
218 }
219 return rval;
220}
221
222/*
223 * Calculate the default attribute fork offset for newly created inodes.
224 */
225uint
226xfs_default_attroffset(
227 struct xfs_inode *ip)
228{
229 struct xfs_mount *mp = ip->i_mount;
230 uint offset;
231
232 if (mp->m_sb.sb_inodesize == 256) {
233 offset = XFS_LITINO(mp, ip->i_d.di_version) -
234 XFS_BMDR_SPACE_CALC(MINABTPTRS);
235 } else {
236 offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
237 }
238
239 ASSERT(offset < XFS_LITINO(mp, ip->i_d.di_version));
240 return offset;
241}
242
243/*
244 * Helper routine to reset inode di_forkoff field when switching
245 * attribute fork from local to extent format - we reset it where
246 * possible to make space available for inline data fork extents.
247 */
248STATIC void
249xfs_bmap_forkoff_reset(
250 xfs_mount_t *mp,
251 xfs_inode_t *ip,
252 int whichfork)
253{
254 if (whichfork == XFS_ATTR_FORK &&
255 ip->i_d.di_format != XFS_DINODE_FMT_DEV &&
256 ip->i_d.di_format != XFS_DINODE_FMT_UUID &&
257 ip->i_d.di_format != XFS_DINODE_FMT_BTREE) {
258 uint dfl_forkoff = xfs_default_attroffset(ip) >> 3;
259
260 if (dfl_forkoff > ip->i_d.di_forkoff)
261 ip->i_d.di_forkoff = dfl_forkoff;
262 }
263}
264
265/*
266 * Extent tree block counting routines.
267 */
268
269/*
270 * Count leaf blocks given a range of extent records.
271 */
272STATIC void
273xfs_bmap_count_leaves(
274 xfs_ifork_t *ifp,
275 xfs_extnum_t idx,
276 int numrecs,
277 int *count)
278{
279 int b;
280
281 for (b = 0; b < numrecs; b++) {
282 xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b);
283 *count += xfs_bmbt_get_blockcount(frp);
284 }
285}
286
287/*
288 * Count leaf blocks given a range of extent records originally
289 * in btree format.
290 */
291STATIC void
292xfs_bmap_disk_count_leaves(
293 struct xfs_mount *mp,
294 struct xfs_btree_block *block,
295 int numrecs,
296 int *count)
297{
298 int b;
299 xfs_bmbt_rec_t *frp;
300
301 for (b = 1; b <= numrecs; b++) {
302 frp = XFS_BMBT_REC_ADDR(mp, block, b);
303 *count += xfs_bmbt_disk_get_blockcount(frp);
304 }
305}
306
307/*
308 * Recursively walks each level of a btree
309 * to count total fsblocks is use.
310 */
311STATIC int /* error */
312xfs_bmap_count_tree(
313 xfs_mount_t *mp, /* file system mount point */
314 xfs_trans_t *tp, /* transaction pointer */
315 xfs_ifork_t *ifp, /* inode fork pointer */
316 xfs_fsblock_t blockno, /* file system block number */
317 int levelin, /* level in btree */
318 int *count) /* Count of blocks */
319{
320 int error;
321 xfs_buf_t *bp, *nbp;
322 int level = levelin;
323 __be64 *pp;
324 xfs_fsblock_t bno = blockno;
325 xfs_fsblock_t nextbno;
326 struct xfs_btree_block *block, *nextblock;
327 int numrecs;
328
329 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF,
330 &xfs_bmbt_buf_ops);
331 if (error)
332 return error;
333 *count += 1;
334 block = XFS_BUF_TO_BLOCK(bp);
335
336 if (--level) {
337 /* Not at node above leaves, count this level of nodes */
338 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
339 while (nextbno != NULLFSBLOCK) {
340 error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp,
341 XFS_BMAP_BTREE_REF,
342 &xfs_bmbt_buf_ops);
343 if (error)
344 return error;
345 *count += 1;
346 nextblock = XFS_BUF_TO_BLOCK(nbp);
347 nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib);
348 xfs_trans_brelse(tp, nbp);
349 }
350
351 /* Dive to the next level */
352 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
353 bno = be64_to_cpu(*pp);
354 if (unlikely((error =
355 xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) {
356 xfs_trans_brelse(tp, bp);
357 XFS_ERROR_REPORT("xfs_bmap_count_tree(1)",
358 XFS_ERRLEVEL_LOW, mp);
359 return XFS_ERROR(EFSCORRUPTED);
360 }
361 xfs_trans_brelse(tp, bp);
362 } else {
363 /* count all level 1 nodes and their leaves */
364 for (;;) {
365 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
366 numrecs = be16_to_cpu(block->bb_numrecs);
367 xfs_bmap_disk_count_leaves(mp, block, numrecs, count);
368 xfs_trans_brelse(tp, bp);
369 if (nextbno == NULLFSBLOCK)
370 break;
371 bno = nextbno;
372 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
373 XFS_BMAP_BTREE_REF,
374 &xfs_bmbt_buf_ops);
375 if (error)
376 return error;
377 *count += 1;
378 block = XFS_BUF_TO_BLOCK(bp);
379 }
380 }
381 return 0;
382}
383
384/*
385 * Count fsblocks of the given fork.
386 */
387int /* error */
388xfs_bmap_count_blocks(
389 xfs_trans_t *tp, /* transaction pointer */
390 xfs_inode_t *ip, /* incore inode */
391 int whichfork, /* data or attr fork */
392 int *count) /* out: count of blocks */
393{
394 struct xfs_btree_block *block; /* current btree block */
395 xfs_fsblock_t bno; /* block # of "block" */
396 xfs_ifork_t *ifp; /* fork structure */
397 int level; /* btree level, for checking */
398 xfs_mount_t *mp; /* file system mount structure */
399 __be64 *pp; /* pointer to block address */
400
401 bno = NULLFSBLOCK;
402 mp = ip->i_mount;
403 ifp = XFS_IFORK_PTR(ip, whichfork);
404 if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
405 xfs_bmap_count_leaves(ifp, 0,
406 ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t),
407 count);
408 return 0;
409 }
410
411 /*
412 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
413 */
414 block = ifp->if_broot;
415 level = be16_to_cpu(block->bb_level);
416 ASSERT(level > 0);
417 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
418 bno = be64_to_cpu(*pp);
419 ASSERT(bno != NULLDFSBNO);
420 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
421 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
422
423 if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) {
424 XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW,
425 mp);
426 return XFS_ERROR(EFSCORRUPTED);
427 }
428
429 return 0;
430}
431
432/*
433 * Debug/sanity checking code
434 */
435
436STATIC int
437xfs_bmap_sanity_check(
438 struct xfs_mount *mp,
439 struct xfs_buf *bp,
440 int level)
441{
442 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
443
444 if (block->bb_magic != cpu_to_be32(XFS_BMAP_CRC_MAGIC) &&
445 block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC))
446 return 0;
447
448 if (be16_to_cpu(block->bb_level) != level ||
449 be16_to_cpu(block->bb_numrecs) == 0 ||
450 be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
451 return 0;
452
453 return 1;
454}
455
456#ifdef DEBUG
457STATIC struct xfs_buf *
458xfs_bmap_get_bp(
459 struct xfs_btree_cur *cur,
460 xfs_fsblock_t bno)
461{
462 struct xfs_log_item_desc *lidp;
463 int i;
464
465 if (!cur)
466 return NULL;
467
468 for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
469 if (!cur->bc_bufs[i])
470 break;
471 if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
472 return cur->bc_bufs[i];
473 }
474
475 /* Chase down all the log items to see if the bp is there */
476 list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) {
477 struct xfs_buf_log_item *bip;
478 bip = (struct xfs_buf_log_item *)lidp->lid_item;
479 if (bip->bli_item.li_type == XFS_LI_BUF &&
480 XFS_BUF_ADDR(bip->bli_buf) == bno)
481 return bip->bli_buf;
482 }
483
484 return NULL;
485}
486
487STATIC void
488xfs_check_block(
489 struct xfs_btree_block *block,
490 xfs_mount_t *mp,
491 int root,
492 short sz)
493{
494 int i, j, dmxr;
495 __be64 *pp, *thispa; /* pointer to block address */
496 xfs_bmbt_key_t *prevp, *keyp;
497
498 ASSERT(be16_to_cpu(block->bb_level) > 0);
499
500 prevp = NULL;
501 for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
502 dmxr = mp->m_bmap_dmxr[0];
503 keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
504
505 if (prevp) {
506 ASSERT(be64_to_cpu(prevp->br_startoff) <
507 be64_to_cpu(keyp->br_startoff));
508 }
509 prevp = keyp;
510
511 /*
512 * Compare the block numbers to see if there are dups.
513 */
514 if (root)
515 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
516 else
517 pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
518
519 for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
520 if (root)
521 thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
522 else
523 thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
524 if (*thispa == *pp) {
525 xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
526 __func__, j, i,
527 (unsigned long long)be64_to_cpu(*thispa));
528 panic("%s: ptrs are equal in node\n",
529 __func__);
530 }
531 }
532 }
533}
534
535/*
536 * Check that the extents for the inode ip are in the right order in all
537 * btree leaves.
538 */
539
540STATIC void
541xfs_bmap_check_leaf_extents(
542 xfs_btree_cur_t *cur, /* btree cursor or null */
543 xfs_inode_t *ip, /* incore inode pointer */
544 int whichfork) /* data or attr fork */
545{
546 struct xfs_btree_block *block; /* current btree block */
547 xfs_fsblock_t bno; /* block # of "block" */
548 xfs_buf_t *bp; /* buffer for "block" */
549 int error; /* error return value */
550 xfs_extnum_t i=0, j; /* index into the extents list */
551 xfs_ifork_t *ifp; /* fork structure */
552 int level; /* btree level, for checking */
553 xfs_mount_t *mp; /* file system mount structure */
554 __be64 *pp; /* pointer to block address */
555 xfs_bmbt_rec_t *ep; /* pointer to current extent */
556 xfs_bmbt_rec_t last = {0, 0}; /* last extent in prev block */
557 xfs_bmbt_rec_t *nextp; /* pointer to next extent */
558 int bp_release = 0;
559
560 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) {
561 return;
562 }
563
564 bno = NULLFSBLOCK;
565 mp = ip->i_mount;
566 ifp = XFS_IFORK_PTR(ip, whichfork);
567 block = ifp->if_broot;
568 /*
569 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
570 */
571 level = be16_to_cpu(block->bb_level);
572 ASSERT(level > 0);
573 xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
574 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
575 bno = be64_to_cpu(*pp);
576
577 ASSERT(bno != NULLDFSBNO);
578 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
579 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
580
581 /*
582 * Go down the tree until leaf level is reached, following the first
583 * pointer (leftmost) at each level.
584 */
585 while (level-- > 0) {
586 /* See if buf is in cur first */
587 bp_release = 0;
588 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
589 if (!bp) {
590 bp_release = 1;
591 error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
592 XFS_BMAP_BTREE_REF,
593 &xfs_bmbt_buf_ops);
594 if (error)
595 goto error_norelse;
596 }
597 block = XFS_BUF_TO_BLOCK(bp);
598 XFS_WANT_CORRUPTED_GOTO(
599 xfs_bmap_sanity_check(mp, bp, level),
600 error0);
601 if (level == 0)
602 break;
603
604 /*
605 * Check this block for basic sanity (increasing keys and
606 * no duplicate blocks).
607 */
608
609 xfs_check_block(block, mp, 0, 0);
610 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
611 bno = be64_to_cpu(*pp);
612 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
613 if (bp_release) {
614 bp_release = 0;
615 xfs_trans_brelse(NULL, bp);
616 }
617 }
618
619 /*
620 * Here with bp and block set to the leftmost leaf node in the tree.
621 */
622 i = 0;
623
624 /*
625 * Loop over all leaf nodes checking that all extents are in the right order.
626 */
627 for (;;) {
628 xfs_fsblock_t nextbno;
629 xfs_extnum_t num_recs;
630
631
632 num_recs = xfs_btree_get_numrecs(block);
633
634 /*
635 * Read-ahead the next leaf block, if any.
636 */
637
638 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
639
640 /*
641 * Check all the extents to make sure they are OK.
642 * If we had a previous block, the last entry should
643 * conform with the first entry in this one.
644 */
645
646 ep = XFS_BMBT_REC_ADDR(mp, block, 1);
647 if (i) {
648 ASSERT(xfs_bmbt_disk_get_startoff(&last) +
649 xfs_bmbt_disk_get_blockcount(&last) <=
650 xfs_bmbt_disk_get_startoff(ep));
651 }
652 for (j = 1; j < num_recs; j++) {
653 nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
654 ASSERT(xfs_bmbt_disk_get_startoff(ep) +
655 xfs_bmbt_disk_get_blockcount(ep) <=
656 xfs_bmbt_disk_get_startoff(nextp));
657 ep = nextp;
658 }
659
660 last = *ep;
661 i += num_recs;
662 if (bp_release) {
663 bp_release = 0;
664 xfs_trans_brelse(NULL, bp);
665 }
666 bno = nextbno;
667 /*
668 * If we've reached the end, stop.
669 */
670 if (bno == NULLFSBLOCK)
671 break;
672
673 bp_release = 0;
674 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
675 if (!bp) {
676 bp_release = 1;
677 error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
678 XFS_BMAP_BTREE_REF,
679 &xfs_bmbt_buf_ops);
680 if (error)
681 goto error_norelse;
682 }
683 block = XFS_BUF_TO_BLOCK(bp);
684 }
685 if (bp_release) {
686 bp_release = 0;
687 xfs_trans_brelse(NULL, bp);
688 }
689 return;
690
691error0:
692 xfs_warn(mp, "%s: at error0", __func__);
693 if (bp_release)
694 xfs_trans_brelse(NULL, bp);
695error_norelse:
696 xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
697 __func__, i);
698 panic("%s: CORRUPTED BTREE OR SOMETHING", __func__);
699 return;
700}
701
702/*
703 * Add bmap trace insert entries for all the contents of the extent records.
704 */
705void
706xfs_bmap_trace_exlist(
707 xfs_inode_t *ip, /* incore inode pointer */
708 xfs_extnum_t cnt, /* count of entries in the list */
709 int whichfork, /* data or attr fork */
710 unsigned long caller_ip)
711{
712 xfs_extnum_t idx; /* extent record index */
713 xfs_ifork_t *ifp; /* inode fork pointer */
714 int state = 0;
715
716 if (whichfork == XFS_ATTR_FORK)
717 state |= BMAP_ATTRFORK;
718
719 ifp = XFS_IFORK_PTR(ip, whichfork);
720 ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
721 for (idx = 0; idx < cnt; idx++)
722 trace_xfs_extlist(ip, idx, whichfork, caller_ip);
723}
724
725/*
726 * Validate that the bmbt_irecs being returned from bmapi are valid
727 * given the callers original parameters. Specifically check the
728 * ranges of the returned irecs to ensure that they only extent beyond
729 * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
730 */
731STATIC void
732xfs_bmap_validate_ret(
733 xfs_fileoff_t bno,
734 xfs_filblks_t len,
735 int flags,
736 xfs_bmbt_irec_t *mval,
737 int nmap,
738 int ret_nmap)
739{
740 int i; /* index to map values */
741
742 ASSERT(ret_nmap <= nmap);
743
744 for (i = 0; i < ret_nmap; i++) {
745 ASSERT(mval[i].br_blockcount > 0);
746 if (!(flags & XFS_BMAPI_ENTIRE)) {
747 ASSERT(mval[i].br_startoff >= bno);
748 ASSERT(mval[i].br_blockcount <= len);
749 ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
750 bno + len);
751 } else {
752 ASSERT(mval[i].br_startoff < bno + len);
753 ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
754 bno);
755 }
756 ASSERT(i == 0 ||
757 mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
758 mval[i].br_startoff);
759 ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
760 mval[i].br_startblock != HOLESTARTBLOCK);
761 ASSERT(mval[i].br_state == XFS_EXT_NORM ||
762 mval[i].br_state == XFS_EXT_UNWRITTEN);
763 }
764}
765
766#else
767#define xfs_bmap_check_leaf_extents(cur, ip, whichfork) do { } while (0)
768#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)
769#endif /* DEBUG */
770
771/*
772 * bmap free list manipulation functions
773 */
774
775/*
776 * Add the extent to the list of extents to be free at transaction end.
777 * The list is maintained sorted (by block number).
778 */
779void
780xfs_bmap_add_free(
781 xfs_fsblock_t bno, /* fs block number of extent */
782 xfs_filblks_t len, /* length of extent */
783 xfs_bmap_free_t *flist, /* list of extents */
784 xfs_mount_t *mp) /* mount point structure */
785{
786 xfs_bmap_free_item_t *cur; /* current (next) element */
787 xfs_bmap_free_item_t *new; /* new element */
788 xfs_bmap_free_item_t *prev; /* previous element */
789#ifdef DEBUG
790 xfs_agnumber_t agno;
791 xfs_agblock_t agbno;
792
793 ASSERT(bno != NULLFSBLOCK);
794 ASSERT(len > 0);
795 ASSERT(len <= MAXEXTLEN);
796 ASSERT(!isnullstartblock(bno));
797 agno = XFS_FSB_TO_AGNO(mp, bno);
798 agbno = XFS_FSB_TO_AGBNO(mp, bno);
799 ASSERT(agno < mp->m_sb.sb_agcount);
800 ASSERT(agbno < mp->m_sb.sb_agblocks);
801 ASSERT(len < mp->m_sb.sb_agblocks);
802 ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
803#endif
804 ASSERT(xfs_bmap_free_item_zone != NULL);
805 new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
806 new->xbfi_startblock = bno;
807 new->xbfi_blockcount = (xfs_extlen_t)len;
808 for (prev = NULL, cur = flist->xbf_first;
809 cur != NULL;
810 prev = cur, cur = cur->xbfi_next) {
811 if (cur->xbfi_startblock >= bno)
812 break;
813 }
814 if (prev)
815 prev->xbfi_next = new;
816 else
817 flist->xbf_first = new;
818 new->xbfi_next = cur;
819 flist->xbf_count++;
820}
821
822/*
823 * Remove the entry "free" from the free item list. Prev points to the
824 * previous entry, unless "free" is the head of the list.
825 */
826STATIC void
827xfs_bmap_del_free(
828 xfs_bmap_free_t *flist, /* free item list header */
829 xfs_bmap_free_item_t *prev, /* previous item on list, if any */
830 xfs_bmap_free_item_t *free) /* list item to be freed */
831{
832 if (prev)
833 prev->xbfi_next = free->xbfi_next;
834 else
835 flist->xbf_first = free->xbfi_next;
836 flist->xbf_count--;
837 kmem_zone_free(xfs_bmap_free_item_zone, free);
838}
839
840
841/*
842 * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
843 * caller. Frees all the extents that need freeing, which must be done
844 * last due to locking considerations. We never free any extents in
845 * the first transaction.
846 *
847 * Return 1 if the given transaction was committed and a new one
848 * started, and 0 otherwise in the committed parameter.
849 */
850int /* error */
851xfs_bmap_finish(
852 xfs_trans_t **tp, /* transaction pointer addr */
853 xfs_bmap_free_t *flist, /* i/o: list extents to free */
854 int *committed) /* xact committed or not */
855{
856 xfs_efd_log_item_t *efd; /* extent free data */
857 xfs_efi_log_item_t *efi; /* extent free intention */
858 int error; /* error return value */
859 xfs_bmap_free_item_t *free; /* free extent item */
860 unsigned int logres; /* new log reservation */
861 unsigned int logcount; /* new log count */
862 xfs_mount_t *mp; /* filesystem mount structure */
863 xfs_bmap_free_item_t *next; /* next item on free list */
864 xfs_trans_t *ntp; /* new transaction pointer */
865
866 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
867 if (flist->xbf_count == 0) {
868 *committed = 0;
869 return 0;
870 }
871 ntp = *tp;
872 efi = xfs_trans_get_efi(ntp, flist->xbf_count);
873 for (free = flist->xbf_first; free; free = free->xbfi_next)
874 xfs_trans_log_efi_extent(ntp, efi, free->xbfi_startblock,
875 free->xbfi_blockcount);
876 logres = ntp->t_log_res;
877 logcount = ntp->t_log_count;
878 ntp = xfs_trans_dup(*tp);
879 error = xfs_trans_commit(*tp, 0);
880 *tp = ntp;
881 *committed = 1;
882 /*
883 * We have a new transaction, so we should return committed=1,
884 * even though we're returning an error.
885 */
886 if (error)
887 return error;
888
889 /*
890 * transaction commit worked ok so we can drop the extra ticket
891 * reference that we gained in xfs_trans_dup()
892 */
893 xfs_log_ticket_put(ntp->t_ticket);
894
895 if ((error = xfs_trans_reserve(ntp, 0, logres, 0, XFS_TRANS_PERM_LOG_RES,
896 logcount)))
897 return error;
898 efd = xfs_trans_get_efd(ntp, efi, flist->xbf_count);
899 for (free = flist->xbf_first; free != NULL; free = next) {
900 next = free->xbfi_next;
901 if ((error = xfs_free_extent(ntp, free->xbfi_startblock,
902 free->xbfi_blockcount))) {
903 /*
904 * The bmap free list will be cleaned up at a
905 * higher level. The EFI will be canceled when
906 * this transaction is aborted.
907 * Need to force shutdown here to make sure it
908 * happens, since this transaction may not be
909 * dirty yet.
910 */
911 mp = ntp->t_mountp;
912 if (!XFS_FORCED_SHUTDOWN(mp))
913 xfs_force_shutdown(mp,
914 (error == EFSCORRUPTED) ?
915 SHUTDOWN_CORRUPT_INCORE :
916 SHUTDOWN_META_IO_ERROR);
917 return error;
918 }
919 xfs_trans_log_efd_extent(ntp, efd, free->xbfi_startblock,
920 free->xbfi_blockcount);
921 xfs_bmap_del_free(flist, NULL, free);
922 }
923 return 0;
924}
925
926/*
927 * Free up any items left in the list.
928 */
929void
930xfs_bmap_cancel(
931 xfs_bmap_free_t *flist) /* list of bmap_free_items */
932{
933 xfs_bmap_free_item_t *free; /* free list item */
934 xfs_bmap_free_item_t *next;
935
936 if (flist->xbf_count == 0)
937 return;
938 ASSERT(flist->xbf_first != NULL);
939 for (free = flist->xbf_first; free; free = next) {
940 next = free->xbfi_next;
941 xfs_bmap_del_free(flist, NULL, free);
942 }
943 ASSERT(flist->xbf_count == 0);
944}
945
946/*
947 * Inode fork format manipulation functions
948 */
949
950/*
951 * Transform a btree format file with only one leaf node, where the
952 * extents list will fit in the inode, into an extents format file.
953 * Since the file extents are already in-core, all we have to do is
954 * give up the space for the btree root and pitch the leaf block.
955 */
956STATIC int /* error */
957xfs_bmap_btree_to_extents(
958 xfs_trans_t *tp, /* transaction pointer */
959 xfs_inode_t *ip, /* incore inode pointer */
960 xfs_btree_cur_t *cur, /* btree cursor */
961 int *logflagsp, /* inode logging flags */
962 int whichfork) /* data or attr fork */
963{
964 /* REFERENCED */
965 struct xfs_btree_block *cblock;/* child btree block */
966 xfs_fsblock_t cbno; /* child block number */
967 xfs_buf_t *cbp; /* child block's buffer */
968 int error; /* error return value */
969 xfs_ifork_t *ifp; /* inode fork data */
970 xfs_mount_t *mp; /* mount point structure */
971 __be64 *pp; /* ptr to block address */
972 struct xfs_btree_block *rblock;/* root btree block */
973
974 mp = ip->i_mount;
975 ifp = XFS_IFORK_PTR(ip, whichfork);
976 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
977 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
978 rblock = ifp->if_broot;
979 ASSERT(be16_to_cpu(rblock->bb_level) == 1);
980 ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
981 ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
982 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
983 cbno = be64_to_cpu(*pp);
984 *logflagsp = 0;
985#ifdef DEBUG
986 if ((error = xfs_btree_check_lptr(cur, cbno, 1)))
987 return error;
988#endif
989 error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF,
990 &xfs_bmbt_buf_ops);
991 if (error)
992 return error;
993 cblock = XFS_BUF_TO_BLOCK(cbp);
994 if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
995 return error;
996 xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
997 ip->i_d.di_nblocks--;
998 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
999 xfs_trans_binval(tp, cbp);
1000 if (cur->bc_bufs[0] == cbp)
1001 cur->bc_bufs[0] = NULL;
1002 xfs_iroot_realloc(ip, -1, whichfork);
1003 ASSERT(ifp->if_broot == NULL);
1004 ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
1005 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
1006 *logflagsp = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
1007 return 0;
1008}
1009
1010/*
1011 * Convert an extents-format file into a btree-format file.
1012 * The new file will have a root block (in the inode) and a single child block.
1013 */
1014STATIC int /* error */
1015xfs_bmap_extents_to_btree(
1016 xfs_trans_t *tp, /* transaction pointer */
1017 xfs_inode_t *ip, /* incore inode pointer */
1018 xfs_fsblock_t *firstblock, /* first-block-allocated */
1019 xfs_bmap_free_t *flist, /* blocks freed in xaction */
1020 xfs_btree_cur_t **curp, /* cursor returned to caller */
1021 int wasdel, /* converting a delayed alloc */
1022 int *logflagsp, /* inode logging flags */
1023 int whichfork) /* data or attr fork */
1024{
1025 struct xfs_btree_block *ablock; /* allocated (child) bt block */
1026 xfs_buf_t *abp; /* buffer for ablock */
1027 xfs_alloc_arg_t args; /* allocation arguments */
1028 xfs_bmbt_rec_t *arp; /* child record pointer */
1029 struct xfs_btree_block *block; /* btree root block */
1030 xfs_btree_cur_t *cur; /* bmap btree cursor */
1031 xfs_bmbt_rec_host_t *ep; /* extent record pointer */
1032 int error; /* error return value */
1033 xfs_extnum_t i, cnt; /* extent record index */
1034 xfs_ifork_t *ifp; /* inode fork pointer */
1035 xfs_bmbt_key_t *kp; /* root block key pointer */
1036 xfs_mount_t *mp; /* mount structure */
1037 xfs_extnum_t nextents; /* number of file extents */
1038 xfs_bmbt_ptr_t *pp; /* root block address pointer */
1039
1040 mp = ip->i_mount;
1041 ifp = XFS_IFORK_PTR(ip, whichfork);
1042 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
1043
1044 /*
1045 * Make space in the inode incore.
1046 */
1047 xfs_iroot_realloc(ip, 1, whichfork);
1048 ifp->if_flags |= XFS_IFBROOT;
1049
1050 /*
1051 * Fill in the root.
1052 */
1053 block = ifp->if_broot;
1054 if (xfs_sb_version_hascrc(&mp->m_sb))
1055 xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
1056 XFS_BMAP_CRC_MAGIC, 1, 1, ip->i_ino,
1057 XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
1058 else
1059 xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
1060 XFS_BMAP_MAGIC, 1, 1, ip->i_ino,
1061 XFS_BTREE_LONG_PTRS);
1062
1063 /*
1064 * Need a cursor. Can't allocate until bb_level is filled in.
1065 */
1066 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
1067 cur->bc_private.b.firstblock = *firstblock;
1068 cur->bc_private.b.flist = flist;
1069 cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
1070 /*
1071 * Convert to a btree with two levels, one record in root.
1072 */
1073 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
1074 memset(&args, 0, sizeof(args));
1075 args.tp = tp;
1076 args.mp = mp;
1077 args.firstblock = *firstblock;
1078 if (*firstblock == NULLFSBLOCK) {
1079 args.type = XFS_ALLOCTYPE_START_BNO;
1080 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
1081 } else if (flist->xbf_low) {
1082 args.type = XFS_ALLOCTYPE_START_BNO;
1083 args.fsbno = *firstblock;
1084 } else {
1085 args.type = XFS_ALLOCTYPE_NEAR_BNO;
1086 args.fsbno = *firstblock;
1087 }
1088 args.minlen = args.maxlen = args.prod = 1;
1089 args.wasdel = wasdel;
1090 *logflagsp = 0;
1091 if ((error = xfs_alloc_vextent(&args))) {
1092 xfs_iroot_realloc(ip, -1, whichfork);
1093 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
1094 return error;
1095 }
1096 /*
1097 * Allocation can't fail, the space was reserved.
1098 */
1099 ASSERT(args.fsbno != NULLFSBLOCK);
1100 ASSERT(*firstblock == NULLFSBLOCK ||
1101 args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) ||
1102 (flist->xbf_low &&
1103 args.agno > XFS_FSB_TO_AGNO(mp, *firstblock)));
1104 *firstblock = cur->bc_private.b.firstblock = args.fsbno;
1105 cur->bc_private.b.allocated++;
1106 ip->i_d.di_nblocks++;
1107 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
1108 abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
1109 /*
1110 * Fill in the child block.
1111 */
1112 abp->b_ops = &xfs_bmbt_buf_ops;
1113 ablock = XFS_BUF_TO_BLOCK(abp);
1114 if (xfs_sb_version_hascrc(&mp->m_sb))
1115 xfs_btree_init_block_int(mp, ablock, abp->b_bn,
1116 XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino,
1117 XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
1118 else
1119 xfs_btree_init_block_int(mp, ablock, abp->b_bn,
1120 XFS_BMAP_MAGIC, 0, 0, ip->i_ino,
1121 XFS_BTREE_LONG_PTRS);
1122
1123 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
1124 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1125 for (cnt = i = 0; i < nextents; i++) {
1126 ep = xfs_iext_get_ext(ifp, i);
1127 if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) {
1128 arp->l0 = cpu_to_be64(ep->l0);
1129 arp->l1 = cpu_to_be64(ep->l1);
1130 arp++; cnt++;
1131 }
1132 }
1133 ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork));
1134 xfs_btree_set_numrecs(ablock, cnt);
1135
1136 /*
1137 * Fill in the root key and pointer.
1138 */
1139 kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
1140 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
1141 kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
1142 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
1143 be16_to_cpu(block->bb_level)));
1144 *pp = cpu_to_be64(args.fsbno);
1145
1146 /*
1147 * Do all this logging at the end so that
1148 * the root is at the right level.
1149 */
1150 xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
1151 xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
1152 ASSERT(*curp == NULL);
1153 *curp = cur;
1154 *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
1155 return 0;
1156}
1157
1158/*
1159 * Convert a local file to an extents file.
1160 * This code is out of bounds for data forks of regular files,
1161 * since the file data needs to get logged so things will stay consistent.
1162 * (The bmap-level manipulations are ok, though).
1163 */
1164STATIC int /* error */
1165xfs_bmap_local_to_extents(
1166 xfs_trans_t *tp, /* transaction pointer */
1167 xfs_inode_t *ip, /* incore inode pointer */
1168 xfs_fsblock_t *firstblock, /* first block allocated in xaction */
1169 xfs_extlen_t total, /* total blocks needed by transaction */
1170 int *logflagsp, /* inode logging flags */
1171 int whichfork,
1172 void (*init_fn)(struct xfs_trans *tp,
1173 struct xfs_buf *bp,
1174 struct xfs_inode *ip,
1175 struct xfs_ifork *ifp))
1176{
1177 int error; /* error return value */
1178 int flags; /* logging flags returned */
1179 xfs_ifork_t *ifp; /* inode fork pointer */
1180
1181 /*
1182 * We don't want to deal with the case of keeping inode data inline yet.
1183 * So sending the data fork of a regular inode is invalid.
1184 */
1185 ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK));
1186 ifp = XFS_IFORK_PTR(ip, whichfork);
1187 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
1188 flags = 0;
1189 error = 0;
1190 if (ifp->if_bytes) {
1191 xfs_alloc_arg_t args; /* allocation arguments */
1192 xfs_buf_t *bp; /* buffer for extent block */
1193 xfs_bmbt_rec_host_t *ep;/* extent record pointer */
1194
1195 ASSERT((ifp->if_flags &
1196 (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE);
1197 memset(&args, 0, sizeof(args));
1198 args.tp = tp;
1199 args.mp = ip->i_mount;
1200 args.firstblock = *firstblock;
1201 /*
1202 * Allocate a block. We know we need only one, since the
1203 * file currently fits in an inode.
1204 */
1205 if (*firstblock == NULLFSBLOCK) {
1206 args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
1207 args.type = XFS_ALLOCTYPE_START_BNO;
1208 } else {
1209 args.fsbno = *firstblock;
1210 args.type = XFS_ALLOCTYPE_NEAR_BNO;
1211 }
1212 args.total = total;
1213 args.minlen = args.maxlen = args.prod = 1;
1214 error = xfs_alloc_vextent(&args);
1215 if (error)
1216 goto done;
1217
1218 /* Can't fail, the space was reserved. */
1219 ASSERT(args.fsbno != NULLFSBLOCK);
1220 ASSERT(args.len == 1);
1221 *firstblock = args.fsbno;
1222 bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
1223
1224 /* initialise the block and copy the data */
1225 init_fn(tp, bp, ip, ifp);
1226
1227 /* account for the change in fork size and log everything */
1228 xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
1229 xfs_bmap_forkoff_reset(args.mp, ip, whichfork);
1230 xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
1231 xfs_iext_add(ifp, 0, 1);
1232 ep = xfs_iext_get_ext(ifp, 0);
1233 xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
1234 trace_xfs_bmap_post_update(ip, 0,
1235 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0,
1236 _THIS_IP_);
1237 XFS_IFORK_NEXT_SET(ip, whichfork, 1);
1238 ip->i_d.di_nblocks = 1;
1239 xfs_trans_mod_dquot_byino(tp, ip,
1240 XFS_TRANS_DQ_BCOUNT, 1L);
1241 flags |= xfs_ilog_fext(whichfork);
1242 } else {
1243 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
1244 xfs_bmap_forkoff_reset(ip->i_mount, ip, whichfork);
1245 }
1246 ifp->if_flags &= ~XFS_IFINLINE;
1247 ifp->if_flags |= XFS_IFEXTENTS;
1248 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
1249 flags |= XFS_ILOG_CORE;
1250done:
1251 *logflagsp = flags;
1252 return error;
1253}
1254
1255/*
293 * Called from xfs_bmap_add_attrfork to handle btree format files. 1256 * Called from xfs_bmap_add_attrfork to handle btree format files.
294 */ 1257 */
295STATIC int /* error */ 1258STATIC int /* error */
@@ -360,29 +1323,22 @@ xfs_bmap_add_attrfork_extents(
360} 1323}
361 1324
362/* 1325/*
363 * Block initialisation functions for local to extent format conversion. 1326 * Block initialisation function for local to extent format conversion.
364 * As these get more complex, they will be moved to the relevant files, 1327 *
365 * but for now they are too simple to worry about. 1328 * This shouldn't actually be called by anyone, so make sure debug kernels cause
1329 * a noticable failure.
366 */ 1330 */
367STATIC void 1331STATIC void
368xfs_bmap_local_to_extents_init_fn( 1332xfs_bmap_local_to_extents_init_fn(
1333 struct xfs_trans *tp,
369 struct xfs_buf *bp, 1334 struct xfs_buf *bp,
370 struct xfs_inode *ip, 1335 struct xfs_inode *ip,
371 struct xfs_ifork *ifp) 1336 struct xfs_ifork *ifp)
372{ 1337{
1338 ASSERT(0);
373 bp->b_ops = &xfs_bmbt_buf_ops; 1339 bp->b_ops = &xfs_bmbt_buf_ops;
374 memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); 1340 memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
375} 1341 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF);
376
377STATIC void
378xfs_symlink_local_to_remote(
379 struct xfs_buf *bp,
380 struct xfs_inode *ip,
381 struct xfs_ifork *ifp)
382{
383 /* remote symlink blocks are not verifiable until CRCs come along */
384 bp->b_ops = NULL;
385 memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
386} 1342}
387 1343
388/* 1344/*
@@ -394,8 +1350,7 @@ xfs_symlink_local_to_remote(
394 * 1350 *
395 * XXX (dgc): investigate whether directory conversion can use the generic 1351 * XXX (dgc): investigate whether directory conversion can use the generic
396 * formatting callout. It should be possible - it's just a very complex 1352 * formatting callout. It should be possible - it's just a very complex
397 * formatter. it would also require passing the transaction through to the init 1353 * formatter.
398 * function.
399 */ 1354 */
400STATIC int /* error */ 1355STATIC int /* error */
401xfs_bmap_add_attrfork_local( 1356xfs_bmap_add_attrfork_local(
@@ -432,6 +1387,640 @@ xfs_bmap_add_attrfork_local(
432} 1387}
433 1388
434/* 1389/*
1390 * Convert inode from non-attributed to attributed.
1391 * Must not be in a transaction, ip must not be locked.
1392 */
1393int /* error code */
1394xfs_bmap_add_attrfork(
1395 xfs_inode_t *ip, /* incore inode pointer */
1396 int size, /* space new attribute needs */
1397 int rsvd) /* xact may use reserved blks */
1398{
1399 xfs_fsblock_t firstblock; /* 1st block/ag allocated */
1400 xfs_bmap_free_t flist; /* freed extent records */
1401 xfs_mount_t *mp; /* mount structure */
1402 xfs_trans_t *tp; /* transaction pointer */
1403 int blks; /* space reservation */
1404 int version = 1; /* superblock attr version */
1405 int committed; /* xaction was committed */
1406 int logflags; /* logging flags */
1407 int error; /* error return value */
1408
1409 ASSERT(XFS_IFORK_Q(ip) == 0);
1410
1411 mp = ip->i_mount;
1412 ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1413 tp = xfs_trans_alloc(mp, XFS_TRANS_ADDAFORK);
1414 blks = XFS_ADDAFORK_SPACE_RES(mp);
1415 if (rsvd)
1416 tp->t_flags |= XFS_TRANS_RESERVE;
1417 if ((error = xfs_trans_reserve(tp, blks, XFS_ADDAFORK_LOG_RES(mp), 0,
1418 XFS_TRANS_PERM_LOG_RES, XFS_ADDAFORK_LOG_COUNT)))
1419 goto error0;
1420 xfs_ilock(ip, XFS_ILOCK_EXCL);
1421 error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
1422 XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
1423 XFS_QMOPT_RES_REGBLKS);
1424 if (error) {
1425 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1426 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
1427 return error;
1428 }
1429 if (XFS_IFORK_Q(ip))
1430 goto error1;
1431 if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
1432 /*
1433 * For inodes coming from pre-6.2 filesystems.
1434 */
1435 ASSERT(ip->i_d.di_aformat == 0);
1436 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
1437 }
1438 ASSERT(ip->i_d.di_anextents == 0);
1439
1440 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1441 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1442
1443 switch (ip->i_d.di_format) {
1444 case XFS_DINODE_FMT_DEV:
1445 ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
1446 break;
1447 case XFS_DINODE_FMT_UUID:
1448 ip->i_d.di_forkoff = roundup(sizeof(uuid_t), 8) >> 3;
1449 break;
1450 case XFS_DINODE_FMT_LOCAL:
1451 case XFS_DINODE_FMT_EXTENTS:
1452 case XFS_DINODE_FMT_BTREE:
1453 ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
1454 if (!ip->i_d.di_forkoff)
1455 ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
1456 else if (mp->m_flags & XFS_MOUNT_ATTR2)
1457 version = 2;
1458 break;
1459 default:
1460 ASSERT(0);
1461 error = XFS_ERROR(EINVAL);
1462 goto error1;
1463 }
1464
1465 ASSERT(ip->i_afp == NULL);
1466 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
1467 ip->i_afp->if_flags = XFS_IFEXTENTS;
1468 logflags = 0;
1469 xfs_bmap_init(&flist, &firstblock);
1470 switch (ip->i_d.di_format) {
1471 case XFS_DINODE_FMT_LOCAL:
1472 error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &flist,
1473 &logflags);
1474 break;
1475 case XFS_DINODE_FMT_EXTENTS:
1476 error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock,
1477 &flist, &logflags);
1478 break;
1479 case XFS_DINODE_FMT_BTREE:
1480 error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &flist,
1481 &logflags);
1482 break;
1483 default:
1484 error = 0;
1485 break;
1486 }
1487 if (logflags)
1488 xfs_trans_log_inode(tp, ip, logflags);
1489 if (error)
1490 goto error2;
1491 if (!xfs_sb_version_hasattr(&mp->m_sb) ||
1492 (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
1493 __int64_t sbfields = 0;
1494
1495 spin_lock(&mp->m_sb_lock);
1496 if (!xfs_sb_version_hasattr(&mp->m_sb)) {
1497 xfs_sb_version_addattr(&mp->m_sb);
1498 sbfields |= XFS_SB_VERSIONNUM;
1499 }
1500 if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
1501 xfs_sb_version_addattr2(&mp->m_sb);
1502 sbfields |= (XFS_SB_VERSIONNUM | XFS_SB_FEATURES2);
1503 }
1504 if (sbfields) {
1505 spin_unlock(&mp->m_sb_lock);
1506 xfs_mod_sb(tp, sbfields);
1507 } else
1508 spin_unlock(&mp->m_sb_lock);
1509 }
1510
1511 error = xfs_bmap_finish(&tp, &flist, &committed);
1512 if (error)
1513 goto error2;
1514 return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1515error2:
1516 xfs_bmap_cancel(&flist);
1517error1:
1518 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1519error0:
1520 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
1521 return error;
1522}
1523
1524/*
1525 * Internal and external extent tree search functions.
1526 */
1527
1528/*
1529 * Read in the extents to if_extents.
1530 * All inode fields are set up by caller, we just traverse the btree
1531 * and copy the records in. If the file system cannot contain unwritten
1532 * extents, the records are checked for no "state" flags.
1533 */
1534int /* error */
1535xfs_bmap_read_extents(
1536 xfs_trans_t *tp, /* transaction pointer */
1537 xfs_inode_t *ip, /* incore inode */
1538 int whichfork) /* data or attr fork */
1539{
1540 struct xfs_btree_block *block; /* current btree block */
1541 xfs_fsblock_t bno; /* block # of "block" */
1542 xfs_buf_t *bp; /* buffer for "block" */
1543 int error; /* error return value */
1544 xfs_exntfmt_t exntf; /* XFS_EXTFMT_NOSTATE, if checking */
1545 xfs_extnum_t i, j; /* index into the extents list */
1546 xfs_ifork_t *ifp; /* fork structure */
1547 int level; /* btree level, for checking */
1548 xfs_mount_t *mp; /* file system mount structure */
1549 __be64 *pp; /* pointer to block address */
1550 /* REFERENCED */
1551 xfs_extnum_t room; /* number of entries there's room for */
1552
1553 bno = NULLFSBLOCK;
1554 mp = ip->i_mount;
1555 ifp = XFS_IFORK_PTR(ip, whichfork);
1556 exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE :
1557 XFS_EXTFMT_INODE(ip);
1558 block = ifp->if_broot;
1559 /*
1560 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
1561 */
1562 level = be16_to_cpu(block->bb_level);
1563 ASSERT(level > 0);
1564 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
1565 bno = be64_to_cpu(*pp);
1566 ASSERT(bno != NULLDFSBNO);
1567 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
1568 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
1569 /*
1570 * Go down the tree until leaf level is reached, following the first
1571 * pointer (leftmost) at each level.
1572 */
1573 while (level-- > 0) {
1574 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
1575 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
1576 if (error)
1577 return error;
1578 block = XFS_BUF_TO_BLOCK(bp);
1579 XFS_WANT_CORRUPTED_GOTO(
1580 xfs_bmap_sanity_check(mp, bp, level),
1581 error0);
1582 if (level == 0)
1583 break;
1584 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
1585 bno = be64_to_cpu(*pp);
1586 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
1587 xfs_trans_brelse(tp, bp);
1588 }
1589 /*
1590 * Here with bp and block set to the leftmost leaf node in the tree.
1591 */
1592 room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1593 i = 0;
1594 /*
1595 * Loop over all leaf nodes. Copy information to the extent records.
1596 */
1597 for (;;) {
1598 xfs_bmbt_rec_t *frp;
1599 xfs_fsblock_t nextbno;
1600 xfs_extnum_t num_recs;
1601 xfs_extnum_t start;
1602
1603 num_recs = xfs_btree_get_numrecs(block);
1604 if (unlikely(i + num_recs > room)) {
1605 ASSERT(i + num_recs <= room);
1606 xfs_warn(ip->i_mount,
1607 "corrupt dinode %Lu, (btree extents).",
1608 (unsigned long long) ip->i_ino);
1609 XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)",
1610 XFS_ERRLEVEL_LOW, ip->i_mount, block);
1611 goto error0;
1612 }
1613 XFS_WANT_CORRUPTED_GOTO(
1614 xfs_bmap_sanity_check(mp, bp, 0),
1615 error0);
1616 /*
1617 * Read-ahead the next leaf block, if any.
1618 */
1619 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
1620 if (nextbno != NULLFSBLOCK)
1621 xfs_btree_reada_bufl(mp, nextbno, 1,
1622 &xfs_bmbt_buf_ops);
1623 /*
1624 * Copy records into the extent records.
1625 */
1626 frp = XFS_BMBT_REC_ADDR(mp, block, 1);
1627 start = i;
1628 for (j = 0; j < num_recs; j++, i++, frp++) {
1629 xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i);
1630 trp->l0 = be64_to_cpu(frp->l0);
1631 trp->l1 = be64_to_cpu(frp->l1);
1632 }
1633 if (exntf == XFS_EXTFMT_NOSTATE) {
1634 /*
1635 * Check all attribute bmap btree records and
1636 * any "older" data bmap btree records for a
1637 * set bit in the "extent flag" position.
1638 */
1639 if (unlikely(xfs_check_nostate_extents(ifp,
1640 start, num_recs))) {
1641 XFS_ERROR_REPORT("xfs_bmap_read_extents(2)",
1642 XFS_ERRLEVEL_LOW,
1643 ip->i_mount);
1644 goto error0;
1645 }
1646 }
1647 xfs_trans_brelse(tp, bp);
1648 bno = nextbno;
1649 /*
1650 * If we've reached the end, stop.
1651 */
1652 if (bno == NULLFSBLOCK)
1653 break;
1654 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
1655 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
1656 if (error)
1657 return error;
1658 block = XFS_BUF_TO_BLOCK(bp);
1659 }
1660 ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
1661 ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
1662 XFS_BMAP_TRACE_EXLIST(ip, i, whichfork);
1663 return 0;
1664error0:
1665 xfs_trans_brelse(tp, bp);
1666 return XFS_ERROR(EFSCORRUPTED);
1667}
1668
1669
1670/*
1671 * Search the extent records for the entry containing block bno.
1672 * If bno lies in a hole, point to the next entry. If bno lies
1673 * past eof, *eofp will be set, and *prevp will contain the last
1674 * entry (null if none). Else, *lastxp will be set to the index
1675 * of the found entry; *gotp will contain the entry.
1676 */
1677STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */
1678xfs_bmap_search_multi_extents(
1679 xfs_ifork_t *ifp, /* inode fork pointer */
1680 xfs_fileoff_t bno, /* block number searched for */
1681 int *eofp, /* out: end of file found */
1682 xfs_extnum_t *lastxp, /* out: last extent index */
1683 xfs_bmbt_irec_t *gotp, /* out: extent entry found */
1684 xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */
1685{
1686 xfs_bmbt_rec_host_t *ep; /* extent record pointer */
1687 xfs_extnum_t lastx; /* last extent index */
1688
1689 /*
1690 * Initialize the extent entry structure to catch access to
1691 * uninitialized br_startblock field.
1692 */
1693 gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL;
1694 gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL;
1695 gotp->br_state = XFS_EXT_INVALID;
1696#if XFS_BIG_BLKNOS
1697 gotp->br_startblock = 0xffffa5a5a5a5a5a5LL;
1698#else
1699 gotp->br_startblock = 0xffffa5a5;
1700#endif
1701 prevp->br_startoff = NULLFILEOFF;
1702
1703 ep = xfs_iext_bno_to_ext(ifp, bno, &lastx);
1704 if (lastx > 0) {
1705 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp);
1706 }
1707 if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
1708 xfs_bmbt_get_all(ep, gotp);
1709 *eofp = 0;
1710 } else {
1711 if (lastx > 0) {
1712 *gotp = *prevp;
1713 }
1714 *eofp = 1;
1715 ep = NULL;
1716 }
1717 *lastxp = lastx;
1718 return ep;
1719}
1720
1721/*
1722 * Search the extents list for the inode, for the extent containing bno.
1723 * If bno lies in a hole, point to the next entry. If bno lies past eof,
1724 * *eofp will be set, and *prevp will contain the last entry (null if none).
1725 * Else, *lastxp will be set to the index of the found
1726 * entry; *gotp will contain the entry.
1727 */
1728STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */
1729xfs_bmap_search_extents(
1730 xfs_inode_t *ip, /* incore inode pointer */
1731 xfs_fileoff_t bno, /* block number searched for */
1732 int fork, /* data or attr fork */
1733 int *eofp, /* out: end of file found */
1734 xfs_extnum_t *lastxp, /* out: last extent index */
1735 xfs_bmbt_irec_t *gotp, /* out: extent entry found */
1736 xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */
1737{
1738 xfs_ifork_t *ifp; /* inode fork pointer */
1739 xfs_bmbt_rec_host_t *ep; /* extent record pointer */
1740
1741 XFS_STATS_INC(xs_look_exlist);
1742 ifp = XFS_IFORK_PTR(ip, fork);
1743
1744 ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp);
1745
1746 if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) &&
1747 !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) {
1748 xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
1749 "Access to block zero in inode %llu "
1750 "start_block: %llx start_off: %llx "
1751 "blkcnt: %llx extent-state: %x lastx: %x\n",
1752 (unsigned long long)ip->i_ino,
1753 (unsigned long long)gotp->br_startblock,
1754 (unsigned long long)gotp->br_startoff,
1755 (unsigned long long)gotp->br_blockcount,
1756 gotp->br_state, *lastxp);
1757 *lastxp = NULLEXTNUM;
1758 *eofp = 1;
1759 return NULL;
1760 }
1761 return ep;
1762}
1763
1764/*
1765 * Returns the file-relative block number of the first unused block(s)
1766 * in the file with at least "len" logically contiguous blocks free.
1767 * This is the lowest-address hole if the file has holes, else the first block
1768 * past the end of file.
1769 * Return 0 if the file is currently local (in-inode).
1770 */
1771int /* error */
1772xfs_bmap_first_unused(
1773 xfs_trans_t *tp, /* transaction pointer */
1774 xfs_inode_t *ip, /* incore inode */
1775 xfs_extlen_t len, /* size of hole to find */
1776 xfs_fileoff_t *first_unused, /* unused block */
1777 int whichfork) /* data or attr fork */
1778{
1779 int error; /* error return value */
1780 int idx; /* extent record index */
1781 xfs_ifork_t *ifp; /* inode fork pointer */
1782 xfs_fileoff_t lastaddr; /* last block number seen */
1783 xfs_fileoff_t lowest; /* lowest useful block */
1784 xfs_fileoff_t max; /* starting useful block */
1785 xfs_fileoff_t off; /* offset for this block */
1786 xfs_extnum_t nextents; /* number of extent entries */
1787
1788 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE ||
1789 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ||
1790 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
1791 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
1792 *first_unused = 0;
1793 return 0;
1794 }
1795 ifp = XFS_IFORK_PTR(ip, whichfork);
1796 if (!(ifp->if_flags & XFS_IFEXTENTS) &&
1797 (error = xfs_iread_extents(tp, ip, whichfork)))
1798 return error;
1799 lowest = *first_unused;
1800 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1801 for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) {
1802 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx);
1803 off = xfs_bmbt_get_startoff(ep);
1804 /*
1805 * See if the hole before this extent will work.
1806 */
1807 if (off >= lowest + len && off - max >= len) {
1808 *first_unused = max;
1809 return 0;
1810 }
1811 lastaddr = off + xfs_bmbt_get_blockcount(ep);
1812 max = XFS_FILEOFF_MAX(lastaddr, lowest);
1813 }
1814 *first_unused = max;
1815 return 0;
1816}
1817
1818/*
1819 * Returns the file-relative block number of the last block + 1 before
1820 * last_block (input value) in the file.
1821 * This is not based on i_size, it is based on the extent records.
1822 * Returns 0 for local files, as they do not have extent records.
1823 */
1824int /* error */
1825xfs_bmap_last_before(
1826 xfs_trans_t *tp, /* transaction pointer */
1827 xfs_inode_t *ip, /* incore inode */
1828 xfs_fileoff_t *last_block, /* last block */
1829 int whichfork) /* data or attr fork */
1830{
1831 xfs_fileoff_t bno; /* input file offset */
1832 int eof; /* hit end of file */
1833 xfs_bmbt_rec_host_t *ep; /* pointer to last extent */
1834 int error; /* error return value */
1835 xfs_bmbt_irec_t got; /* current extent value */
1836 xfs_ifork_t *ifp; /* inode fork pointer */
1837 xfs_extnum_t lastx; /* last extent used */
1838 xfs_bmbt_irec_t prev; /* previous extent value */
1839
1840 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
1841 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
1842 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
1843 return XFS_ERROR(EIO);
1844 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
1845 *last_block = 0;
1846 return 0;
1847 }
1848 ifp = XFS_IFORK_PTR(ip, whichfork);
1849 if (!(ifp->if_flags & XFS_IFEXTENTS) &&
1850 (error = xfs_iread_extents(tp, ip, whichfork)))
1851 return error;
1852 bno = *last_block - 1;
1853 ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
1854 &prev);
1855 if (eof || xfs_bmbt_get_startoff(ep) > bno) {
1856 if (prev.br_startoff == NULLFILEOFF)
1857 *last_block = 0;
1858 else
1859 *last_block = prev.br_startoff + prev.br_blockcount;
1860 }
1861 /*
1862 * Otherwise *last_block is already the right answer.
1863 */
1864 return 0;
1865}
1866
1867STATIC int
1868xfs_bmap_last_extent(
1869 struct xfs_trans *tp,
1870 struct xfs_inode *ip,
1871 int whichfork,
1872 struct xfs_bmbt_irec *rec,
1873 int *is_empty)
1874{
1875 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
1876 int error;
1877 int nextents;
1878
1879 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1880 error = xfs_iread_extents(tp, ip, whichfork);
1881 if (error)
1882 return error;
1883 }
1884
1885 nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
1886 if (nextents == 0) {
1887 *is_empty = 1;
1888 return 0;
1889 }
1890
1891 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, nextents - 1), rec);
1892 *is_empty = 0;
1893 return 0;
1894}
1895
1896/*
1897 * Check the last inode extent to determine whether this allocation will result
1898 * in blocks being allocated at the end of the file. When we allocate new data
1899 * blocks at the end of the file which do not start at the previous data block,
1900 * we will try to align the new blocks at stripe unit boundaries.
1901 *
1902 * Returns 0 in bma->aeof if the file (fork) is empty as any new write will be
1903 * at, or past the EOF.
1904 */
1905STATIC int
1906xfs_bmap_isaeof(
1907 struct xfs_bmalloca *bma,
1908 int whichfork)
1909{
1910 struct xfs_bmbt_irec rec;
1911 int is_empty;
1912 int error;
1913
1914 bma->aeof = 0;
1915 error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
1916 &is_empty);
1917 if (error || is_empty)
1918 return error;
1919
1920 /*
1921 * Check if we are allocation or past the last extent, or at least into
1922 * the last delayed allocated extent.
1923 */
1924 bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
1925 (bma->offset >= rec.br_startoff &&
1926 isnullstartblock(rec.br_startblock));
1927 return 0;
1928}
1929
1930/*
1931 * Check if the endoff is outside the last extent. If so the caller will grow
1932 * the allocation to a stripe unit boundary. All offsets are considered outside
1933 * the end of file for an empty fork, so 1 is returned in *eof in that case.
1934 */
1935int
1936xfs_bmap_eof(
1937 struct xfs_inode *ip,
1938 xfs_fileoff_t endoff,
1939 int whichfork,
1940 int *eof)
1941{
1942 struct xfs_bmbt_irec rec;
1943 int error;
1944
1945 error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, eof);
1946 if (error || *eof)
1947 return error;
1948
1949 *eof = endoff >= rec.br_startoff + rec.br_blockcount;
1950 return 0;
1951}
1952
1953/*
1954 * Returns the file-relative block number of the first block past eof in
1955 * the file. This is not based on i_size, it is based on the extent records.
1956 * Returns 0 for local files, as they do not have extent records.
1957 */
1958int
1959xfs_bmap_last_offset(
1960 struct xfs_trans *tp,
1961 struct xfs_inode *ip,
1962 xfs_fileoff_t *last_block,
1963 int whichfork)
1964{
1965 struct xfs_bmbt_irec rec;
1966 int is_empty;
1967 int error;
1968
1969 *last_block = 0;
1970
1971 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL)
1972 return 0;
1973
1974 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
1975 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
1976 return XFS_ERROR(EIO);
1977
1978 error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
1979 if (error || is_empty)
1980 return error;
1981
1982 *last_block = rec.br_startoff + rec.br_blockcount;
1983 return 0;
1984}
1985
1986/*
1987 * Returns whether the selected fork of the inode has exactly one
1988 * block or not. For the data fork we check this matches di_size,
1989 * implying the file's range is 0..bsize-1.
1990 */
1991int /* 1=>1 block, 0=>otherwise */
1992xfs_bmap_one_block(
1993 xfs_inode_t *ip, /* incore inode */
1994 int whichfork) /* data or attr fork */
1995{
1996 xfs_bmbt_rec_host_t *ep; /* ptr to fork's extent */
1997 xfs_ifork_t *ifp; /* inode fork pointer */
1998 int rval; /* return value */
1999 xfs_bmbt_irec_t s; /* internal version of extent */
2000
2001#ifndef DEBUG
2002 if (whichfork == XFS_DATA_FORK)
2003 return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize;
2004#endif /* !DEBUG */
2005 if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1)
2006 return 0;
2007 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
2008 return 0;
2009 ifp = XFS_IFORK_PTR(ip, whichfork);
2010 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
2011 ep = xfs_iext_get_ext(ifp, 0);
2012 xfs_bmbt_get_all(ep, &s);
2013 rval = s.br_startoff == 0 && s.br_blockcount == 1;
2014 if (rval && whichfork == XFS_DATA_FORK)
2015 ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize);
2016 return rval;
2017}
2018
2019/*
2020 * Extent tree manipulation functions used during allocation.
2021 */
2022
2023/*
435 * Convert a delayed allocation to a real allocation. 2024 * Convert a delayed allocation to a real allocation.
436 */ 2025 */
437STATIC int /* error */ 2026STATIC int /* error */
@@ -1894,6 +3483,10 @@ done:
1894} 3483}
1895 3484
1896/* 3485/*
3486 * Functions used in the extent read, allocate and remove paths
3487 */
3488
3489/*
1897 * Adjust the size of the new extent based on di_extsize and rt extsize. 3490 * Adjust the size of the new extent based on di_extsize and rt extsize.
1898 */ 3491 */
1899STATIC int 3492STATIC int
@@ -2666,1628 +4259,6 @@ xfs_bmap_alloc(
2666} 4259}
2667 4260
2668/* 4261/*
2669 * Transform a btree format file with only one leaf node, where the
2670 * extents list will fit in the inode, into an extents format file.
2671 * Since the file extents are already in-core, all we have to do is
2672 * give up the space for the btree root and pitch the leaf block.
2673 */
2674STATIC int /* error */
2675xfs_bmap_btree_to_extents(
2676 xfs_trans_t *tp, /* transaction pointer */
2677 xfs_inode_t *ip, /* incore inode pointer */
2678 xfs_btree_cur_t *cur, /* btree cursor */
2679 int *logflagsp, /* inode logging flags */
2680 int whichfork) /* data or attr fork */
2681{
2682 /* REFERENCED */
2683 struct xfs_btree_block *cblock;/* child btree block */
2684 xfs_fsblock_t cbno; /* child block number */
2685 xfs_buf_t *cbp; /* child block's buffer */
2686 int error; /* error return value */
2687 xfs_ifork_t *ifp; /* inode fork data */
2688 xfs_mount_t *mp; /* mount point structure */
2689 __be64 *pp; /* ptr to block address */
2690 struct xfs_btree_block *rblock;/* root btree block */
2691
2692 mp = ip->i_mount;
2693 ifp = XFS_IFORK_PTR(ip, whichfork);
2694 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
2695 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
2696 rblock = ifp->if_broot;
2697 ASSERT(be16_to_cpu(rblock->bb_level) == 1);
2698 ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
2699 ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
2700 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
2701 cbno = be64_to_cpu(*pp);
2702 *logflagsp = 0;
2703#ifdef DEBUG
2704 if ((error = xfs_btree_check_lptr(cur, cbno, 1)))
2705 return error;
2706#endif
2707 error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF,
2708 &xfs_bmbt_buf_ops);
2709 if (error)
2710 return error;
2711 cblock = XFS_BUF_TO_BLOCK(cbp);
2712 if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
2713 return error;
2714 xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
2715 ip->i_d.di_nblocks--;
2716 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
2717 xfs_trans_binval(tp, cbp);
2718 if (cur->bc_bufs[0] == cbp)
2719 cur->bc_bufs[0] = NULL;
2720 xfs_iroot_realloc(ip, -1, whichfork);
2721 ASSERT(ifp->if_broot == NULL);
2722 ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
2723 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
2724 *logflagsp = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2725 return 0;
2726}
2727
2728/*
2729 * Called by xfs_bmapi to update file extent records and the btree
2730 * after removing space (or undoing a delayed allocation).
2731 */
2732STATIC int /* error */
2733xfs_bmap_del_extent(
2734 xfs_inode_t *ip, /* incore inode pointer */
2735 xfs_trans_t *tp, /* current transaction pointer */
2736 xfs_extnum_t *idx, /* extent number to update/delete */
2737 xfs_bmap_free_t *flist, /* list of extents to be freed */
2738 xfs_btree_cur_t *cur, /* if null, not a btree */
2739 xfs_bmbt_irec_t *del, /* data to remove from extents */
2740 int *logflagsp, /* inode logging flags */
2741 int whichfork) /* data or attr fork */
2742{
2743 xfs_filblks_t da_new; /* new delay-alloc indirect blocks */
2744 xfs_filblks_t da_old; /* old delay-alloc indirect blocks */
2745 xfs_fsblock_t del_endblock=0; /* first block past del */
2746 xfs_fileoff_t del_endoff; /* first offset past del */
2747 int delay; /* current block is delayed allocated */
2748 int do_fx; /* free extent at end of routine */
2749 xfs_bmbt_rec_host_t *ep; /* current extent entry pointer */
2750 int error; /* error return value */
2751 int flags; /* inode logging flags */
2752 xfs_bmbt_irec_t got; /* current extent entry */
2753 xfs_fileoff_t got_endoff; /* first offset past got */
2754 int i; /* temp state */
2755 xfs_ifork_t *ifp; /* inode fork pointer */
2756 xfs_mount_t *mp; /* mount structure */
2757 xfs_filblks_t nblks; /* quota/sb block count */
2758 xfs_bmbt_irec_t new; /* new record to be inserted */
2759 /* REFERENCED */
2760 uint qfield; /* quota field to update */
2761 xfs_filblks_t temp; /* for indirect length calculations */
2762 xfs_filblks_t temp2; /* for indirect length calculations */
2763 int state = 0;
2764
2765 XFS_STATS_INC(xs_del_exlist);
2766
2767 if (whichfork == XFS_ATTR_FORK)
2768 state |= BMAP_ATTRFORK;
2769
2770 mp = ip->i_mount;
2771 ifp = XFS_IFORK_PTR(ip, whichfork);
2772 ASSERT((*idx >= 0) && (*idx < ifp->if_bytes /
2773 (uint)sizeof(xfs_bmbt_rec_t)));
2774 ASSERT(del->br_blockcount > 0);
2775 ep = xfs_iext_get_ext(ifp, *idx);
2776 xfs_bmbt_get_all(ep, &got);
2777 ASSERT(got.br_startoff <= del->br_startoff);
2778 del_endoff = del->br_startoff + del->br_blockcount;
2779 got_endoff = got.br_startoff + got.br_blockcount;
2780 ASSERT(got_endoff >= del_endoff);
2781 delay = isnullstartblock(got.br_startblock);
2782 ASSERT(isnullstartblock(del->br_startblock) == delay);
2783 flags = 0;
2784 qfield = 0;
2785 error = 0;
2786 /*
2787 * If deleting a real allocation, must free up the disk space.
2788 */
2789 if (!delay) {
2790 flags = XFS_ILOG_CORE;
2791 /*
2792 * Realtime allocation. Free it and record di_nblocks update.
2793 */
2794 if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
2795 xfs_fsblock_t bno;
2796 xfs_filblks_t len;
2797
2798 ASSERT(do_mod(del->br_blockcount,
2799 mp->m_sb.sb_rextsize) == 0);
2800 ASSERT(do_mod(del->br_startblock,
2801 mp->m_sb.sb_rextsize) == 0);
2802 bno = del->br_startblock;
2803 len = del->br_blockcount;
2804 do_div(bno, mp->m_sb.sb_rextsize);
2805 do_div(len, mp->m_sb.sb_rextsize);
2806 error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
2807 if (error)
2808 goto done;
2809 do_fx = 0;
2810 nblks = len * mp->m_sb.sb_rextsize;
2811 qfield = XFS_TRANS_DQ_RTBCOUNT;
2812 }
2813 /*
2814 * Ordinary allocation.
2815 */
2816 else {
2817 do_fx = 1;
2818 nblks = del->br_blockcount;
2819 qfield = XFS_TRANS_DQ_BCOUNT;
2820 }
2821 /*
2822 * Set up del_endblock and cur for later.
2823 */
2824 del_endblock = del->br_startblock + del->br_blockcount;
2825 if (cur) {
2826 if ((error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
2827 got.br_startblock, got.br_blockcount,
2828 &i)))
2829 goto done;
2830 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2831 }
2832 da_old = da_new = 0;
2833 } else {
2834 da_old = startblockval(got.br_startblock);
2835 da_new = 0;
2836 nblks = 0;
2837 do_fx = 0;
2838 }
2839 /*
2840 * Set flag value to use in switch statement.
2841 * Left-contig is 2, right-contig is 1.
2842 */
2843 switch (((got.br_startoff == del->br_startoff) << 1) |
2844 (got_endoff == del_endoff)) {
2845 case 3:
2846 /*
2847 * Matches the whole extent. Delete the entry.
2848 */
2849 xfs_iext_remove(ip, *idx, 1,
2850 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0);
2851 --*idx;
2852 if (delay)
2853 break;
2854
2855 XFS_IFORK_NEXT_SET(ip, whichfork,
2856 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
2857 flags |= XFS_ILOG_CORE;
2858 if (!cur) {
2859 flags |= xfs_ilog_fext(whichfork);
2860 break;
2861 }
2862 if ((error = xfs_btree_delete(cur, &i)))
2863 goto done;
2864 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2865 break;
2866
2867 case 2:
2868 /*
2869 * Deleting the first part of the extent.
2870 */
2871 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2872 xfs_bmbt_set_startoff(ep, del_endoff);
2873 temp = got.br_blockcount - del->br_blockcount;
2874 xfs_bmbt_set_blockcount(ep, temp);
2875 if (delay) {
2876 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2877 da_old);
2878 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
2879 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2880 da_new = temp;
2881 break;
2882 }
2883 xfs_bmbt_set_startblock(ep, del_endblock);
2884 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2885 if (!cur) {
2886 flags |= xfs_ilog_fext(whichfork);
2887 break;
2888 }
2889 if ((error = xfs_bmbt_update(cur, del_endoff, del_endblock,
2890 got.br_blockcount - del->br_blockcount,
2891 got.br_state)))
2892 goto done;
2893 break;
2894
2895 case 1:
2896 /*
2897 * Deleting the last part of the extent.
2898 */
2899 temp = got.br_blockcount - del->br_blockcount;
2900 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2901 xfs_bmbt_set_blockcount(ep, temp);
2902 if (delay) {
2903 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2904 da_old);
2905 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
2906 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2907 da_new = temp;
2908 break;
2909 }
2910 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2911 if (!cur) {
2912 flags |= xfs_ilog_fext(whichfork);
2913 break;
2914 }
2915 if ((error = xfs_bmbt_update(cur, got.br_startoff,
2916 got.br_startblock,
2917 got.br_blockcount - del->br_blockcount,
2918 got.br_state)))
2919 goto done;
2920 break;
2921
2922 case 0:
2923 /*
2924 * Deleting the middle of the extent.
2925 */
2926 temp = del->br_startoff - got.br_startoff;
2927 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2928 xfs_bmbt_set_blockcount(ep, temp);
2929 new.br_startoff = del_endoff;
2930 temp2 = got_endoff - del_endoff;
2931 new.br_blockcount = temp2;
2932 new.br_state = got.br_state;
2933 if (!delay) {
2934 new.br_startblock = del_endblock;
2935 flags |= XFS_ILOG_CORE;
2936 if (cur) {
2937 if ((error = xfs_bmbt_update(cur,
2938 got.br_startoff,
2939 got.br_startblock, temp,
2940 got.br_state)))
2941 goto done;
2942 if ((error = xfs_btree_increment(cur, 0, &i)))
2943 goto done;
2944 cur->bc_rec.b = new;
2945 error = xfs_btree_insert(cur, &i);
2946 if (error && error != ENOSPC)
2947 goto done;
2948 /*
2949 * If get no-space back from btree insert,
2950 * it tried a split, and we have a zero
2951 * block reservation.
2952 * Fix up our state and return the error.
2953 */
2954 if (error == ENOSPC) {
2955 /*
2956 * Reset the cursor, don't trust
2957 * it after any insert operation.
2958 */
2959 if ((error = xfs_bmbt_lookup_eq(cur,
2960 got.br_startoff,
2961 got.br_startblock,
2962 temp, &i)))
2963 goto done;
2964 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2965 /*
2966 * Update the btree record back
2967 * to the original value.
2968 */
2969 if ((error = xfs_bmbt_update(cur,
2970 got.br_startoff,
2971 got.br_startblock,
2972 got.br_blockcount,
2973 got.br_state)))
2974 goto done;
2975 /*
2976 * Reset the extent record back
2977 * to the original value.
2978 */
2979 xfs_bmbt_set_blockcount(ep,
2980 got.br_blockcount);
2981 flags = 0;
2982 error = XFS_ERROR(ENOSPC);
2983 goto done;
2984 }
2985 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2986 } else
2987 flags |= xfs_ilog_fext(whichfork);
2988 XFS_IFORK_NEXT_SET(ip, whichfork,
2989 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
2990 } else {
2991 ASSERT(whichfork == XFS_DATA_FORK);
2992 temp = xfs_bmap_worst_indlen(ip, temp);
2993 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
2994 temp2 = xfs_bmap_worst_indlen(ip, temp2);
2995 new.br_startblock = nullstartblock((int)temp2);
2996 da_new = temp + temp2;
2997 while (da_new > da_old) {
2998 if (temp) {
2999 temp--;
3000 da_new--;
3001 xfs_bmbt_set_startblock(ep,
3002 nullstartblock((int)temp));
3003 }
3004 if (da_new == da_old)
3005 break;
3006 if (temp2) {
3007 temp2--;
3008 da_new--;
3009 new.br_startblock =
3010 nullstartblock((int)temp2);
3011 }
3012 }
3013 }
3014 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
3015 xfs_iext_insert(ip, *idx + 1, 1, &new, state);
3016 ++*idx;
3017 break;
3018 }
3019 /*
3020 * If we need to, add to list of extents to delete.
3021 */
3022 if (do_fx)
3023 xfs_bmap_add_free(del->br_startblock, del->br_blockcount, flist,
3024 mp);
3025 /*
3026 * Adjust inode # blocks in the file.
3027 */
3028 if (nblks)
3029 ip->i_d.di_nblocks -= nblks;
3030 /*
3031 * Adjust quota data.
3032 */
3033 if (qfield)
3034 xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
3035
3036 /*
3037 * Account for change in delayed indirect blocks.
3038 * Nothing to do for disk quota accounting here.
3039 */
3040 ASSERT(da_old >= da_new);
3041 if (da_old > da_new) {
3042 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
3043 (int64_t)(da_old - da_new), 0);
3044 }
3045done:
3046 *logflagsp = flags;
3047 return error;
3048}
3049
3050/*
3051 * Remove the entry "free" from the free item list. Prev points to the
3052 * previous entry, unless "free" is the head of the list.
3053 */
3054STATIC void
3055xfs_bmap_del_free(
3056 xfs_bmap_free_t *flist, /* free item list header */
3057 xfs_bmap_free_item_t *prev, /* previous item on list, if any */
3058 xfs_bmap_free_item_t *free) /* list item to be freed */
3059{
3060 if (prev)
3061 prev->xbfi_next = free->xbfi_next;
3062 else
3063 flist->xbf_first = free->xbfi_next;
3064 flist->xbf_count--;
3065 kmem_zone_free(xfs_bmap_free_item_zone, free);
3066}
3067
3068/*
3069 * Convert an extents-format file into a btree-format file.
3070 * The new file will have a root block (in the inode) and a single child block.
3071 */
3072STATIC int /* error */
3073xfs_bmap_extents_to_btree(
3074 xfs_trans_t *tp, /* transaction pointer */
3075 xfs_inode_t *ip, /* incore inode pointer */
3076 xfs_fsblock_t *firstblock, /* first-block-allocated */
3077 xfs_bmap_free_t *flist, /* blocks freed in xaction */
3078 xfs_btree_cur_t **curp, /* cursor returned to caller */
3079 int wasdel, /* converting a delayed alloc */
3080 int *logflagsp, /* inode logging flags */
3081 int whichfork) /* data or attr fork */
3082{
3083 struct xfs_btree_block *ablock; /* allocated (child) bt block */
3084 xfs_buf_t *abp; /* buffer for ablock */
3085 xfs_alloc_arg_t args; /* allocation arguments */
3086 xfs_bmbt_rec_t *arp; /* child record pointer */
3087 struct xfs_btree_block *block; /* btree root block */
3088 xfs_btree_cur_t *cur; /* bmap btree cursor */
3089 xfs_bmbt_rec_host_t *ep; /* extent record pointer */
3090 int error; /* error return value */
3091 xfs_extnum_t i, cnt; /* extent record index */
3092 xfs_ifork_t *ifp; /* inode fork pointer */
3093 xfs_bmbt_key_t *kp; /* root block key pointer */
3094 xfs_mount_t *mp; /* mount structure */
3095 xfs_extnum_t nextents; /* number of file extents */
3096 xfs_bmbt_ptr_t *pp; /* root block address pointer */
3097
3098 ifp = XFS_IFORK_PTR(ip, whichfork);
3099 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
3100
3101 /*
3102 * Make space in the inode incore.
3103 */
3104 xfs_iroot_realloc(ip, 1, whichfork);
3105 ifp->if_flags |= XFS_IFBROOT;
3106
3107 /*
3108 * Fill in the root.
3109 */
3110 block = ifp->if_broot;
3111 block->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
3112 block->bb_level = cpu_to_be16(1);
3113 block->bb_numrecs = cpu_to_be16(1);
3114 block->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
3115 block->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
3116
3117 /*
3118 * Need a cursor. Can't allocate until bb_level is filled in.
3119 */
3120 mp = ip->i_mount;
3121 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
3122 cur->bc_private.b.firstblock = *firstblock;
3123 cur->bc_private.b.flist = flist;
3124 cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
3125 /*
3126 * Convert to a btree with two levels, one record in root.
3127 */
3128 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
3129 memset(&args, 0, sizeof(args));
3130 args.tp = tp;
3131 args.mp = mp;
3132 args.firstblock = *firstblock;
3133 if (*firstblock == NULLFSBLOCK) {
3134 args.type = XFS_ALLOCTYPE_START_BNO;
3135 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
3136 } else if (flist->xbf_low) {
3137 args.type = XFS_ALLOCTYPE_START_BNO;
3138 args.fsbno = *firstblock;
3139 } else {
3140 args.type = XFS_ALLOCTYPE_NEAR_BNO;
3141 args.fsbno = *firstblock;
3142 }
3143 args.minlen = args.maxlen = args.prod = 1;
3144 args.wasdel = wasdel;
3145 *logflagsp = 0;
3146 if ((error = xfs_alloc_vextent(&args))) {
3147 xfs_iroot_realloc(ip, -1, whichfork);
3148 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
3149 return error;
3150 }
3151 /*
3152 * Allocation can't fail, the space was reserved.
3153 */
3154 ASSERT(args.fsbno != NULLFSBLOCK);
3155 ASSERT(*firstblock == NULLFSBLOCK ||
3156 args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) ||
3157 (flist->xbf_low &&
3158 args.agno > XFS_FSB_TO_AGNO(mp, *firstblock)));
3159 *firstblock = cur->bc_private.b.firstblock = args.fsbno;
3160 cur->bc_private.b.allocated++;
3161 ip->i_d.di_nblocks++;
3162 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
3163 abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
3164 /*
3165 * Fill in the child block.
3166 */
3167 abp->b_ops = &xfs_bmbt_buf_ops;
3168 ablock = XFS_BUF_TO_BLOCK(abp);
3169 ablock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
3170 ablock->bb_level = 0;
3171 ablock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
3172 ablock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
3173 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
3174 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
3175 for (cnt = i = 0; i < nextents; i++) {
3176 ep = xfs_iext_get_ext(ifp, i);
3177 if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) {
3178 arp->l0 = cpu_to_be64(ep->l0);
3179 arp->l1 = cpu_to_be64(ep->l1);
3180 arp++; cnt++;
3181 }
3182 }
3183 ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork));
3184 xfs_btree_set_numrecs(ablock, cnt);
3185
3186 /*
3187 * Fill in the root key and pointer.
3188 */
3189 kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
3190 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
3191 kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
3192 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
3193 be16_to_cpu(block->bb_level)));
3194 *pp = cpu_to_be64(args.fsbno);
3195
3196 /*
3197 * Do all this logging at the end so that
3198 * the root is at the right level.
3199 */
3200 xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
3201 xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
3202 ASSERT(*curp == NULL);
3203 *curp = cur;
3204 *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
3205 return 0;
3206}
3207
3208/*
3209 * Calculate the default attribute fork offset for newly created inodes.
3210 */
3211uint
3212xfs_default_attroffset(
3213 struct xfs_inode *ip)
3214{
3215 struct xfs_mount *mp = ip->i_mount;
3216 uint offset;
3217
3218 if (mp->m_sb.sb_inodesize == 256) {
3219 offset = XFS_LITINO(mp) -
3220 XFS_BMDR_SPACE_CALC(MINABTPTRS);
3221 } else {
3222 offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
3223 }
3224
3225 ASSERT(offset < XFS_LITINO(mp));
3226 return offset;
3227}
3228
3229/*
3230 * Helper routine to reset inode di_forkoff field when switching
3231 * attribute fork from local to extent format - we reset it where
3232 * possible to make space available for inline data fork extents.
3233 */
3234STATIC void
3235xfs_bmap_forkoff_reset(
3236 xfs_mount_t *mp,
3237 xfs_inode_t *ip,
3238 int whichfork)
3239{
3240 if (whichfork == XFS_ATTR_FORK &&
3241 ip->i_d.di_format != XFS_DINODE_FMT_DEV &&
3242 ip->i_d.di_format != XFS_DINODE_FMT_UUID &&
3243 ip->i_d.di_format != XFS_DINODE_FMT_BTREE) {
3244 uint dfl_forkoff = xfs_default_attroffset(ip) >> 3;
3245
3246 if (dfl_forkoff > ip->i_d.di_forkoff)
3247 ip->i_d.di_forkoff = dfl_forkoff;
3248 }
3249}
3250
3251/*
3252 * Convert a local file to an extents file.
3253 * This code is out of bounds for data forks of regular files,
3254 * since the file data needs to get logged so things will stay consistent.
3255 * (The bmap-level manipulations are ok, though).
3256 */
3257STATIC int /* error */
3258xfs_bmap_local_to_extents(
3259 xfs_trans_t *tp, /* transaction pointer */
3260 xfs_inode_t *ip, /* incore inode pointer */
3261 xfs_fsblock_t *firstblock, /* first block allocated in xaction */
3262 xfs_extlen_t total, /* total blocks needed by transaction */
3263 int *logflagsp, /* inode logging flags */
3264 int whichfork,
3265 void (*init_fn)(struct xfs_buf *bp,
3266 struct xfs_inode *ip,
3267 struct xfs_ifork *ifp))
3268{
3269 int error; /* error return value */
3270 int flags; /* logging flags returned */
3271 xfs_ifork_t *ifp; /* inode fork pointer */
3272
3273 /*
3274 * We don't want to deal with the case of keeping inode data inline yet.
3275 * So sending the data fork of a regular inode is invalid.
3276 */
3277 ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK));
3278 ifp = XFS_IFORK_PTR(ip, whichfork);
3279 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
3280 flags = 0;
3281 error = 0;
3282 if (ifp->if_bytes) {
3283 xfs_alloc_arg_t args; /* allocation arguments */
3284 xfs_buf_t *bp; /* buffer for extent block */
3285 xfs_bmbt_rec_host_t *ep;/* extent record pointer */
3286
3287 ASSERT((ifp->if_flags &
3288 (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE);
3289 memset(&args, 0, sizeof(args));
3290 args.tp = tp;
3291 args.mp = ip->i_mount;
3292 args.firstblock = *firstblock;
3293 /*
3294 * Allocate a block. We know we need only one, since the
3295 * file currently fits in an inode.
3296 */
3297 if (*firstblock == NULLFSBLOCK) {
3298 args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
3299 args.type = XFS_ALLOCTYPE_START_BNO;
3300 } else {
3301 args.fsbno = *firstblock;
3302 args.type = XFS_ALLOCTYPE_NEAR_BNO;
3303 }
3304 args.total = total;
3305 args.minlen = args.maxlen = args.prod = 1;
3306 error = xfs_alloc_vextent(&args);
3307 if (error)
3308 goto done;
3309
3310 /* Can't fail, the space was reserved. */
3311 ASSERT(args.fsbno != NULLFSBLOCK);
3312 ASSERT(args.len == 1);
3313 *firstblock = args.fsbno;
3314 bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
3315
3316 /* initialise the block and copy the data */
3317 init_fn(bp, ip, ifp);
3318
3319 /* account for the change in fork size and log everything */
3320 xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
3321 xfs_bmap_forkoff_reset(args.mp, ip, whichfork);
3322 xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
3323 xfs_iext_add(ifp, 0, 1);
3324 ep = xfs_iext_get_ext(ifp, 0);
3325 xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
3326 trace_xfs_bmap_post_update(ip, 0,
3327 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0,
3328 _THIS_IP_);
3329 XFS_IFORK_NEXT_SET(ip, whichfork, 1);
3330 ip->i_d.di_nblocks = 1;
3331 xfs_trans_mod_dquot_byino(tp, ip,
3332 XFS_TRANS_DQ_BCOUNT, 1L);
3333 flags |= xfs_ilog_fext(whichfork);
3334 } else {
3335 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
3336 xfs_bmap_forkoff_reset(ip->i_mount, ip, whichfork);
3337 }
3338 ifp->if_flags &= ~XFS_IFINLINE;
3339 ifp->if_flags |= XFS_IFEXTENTS;
3340 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
3341 flags |= XFS_ILOG_CORE;
3342done:
3343 *logflagsp = flags;
3344 return error;
3345}
3346
3347/*
3348 * Search the extent records for the entry containing block bno.
3349 * If bno lies in a hole, point to the next entry. If bno lies
3350 * past eof, *eofp will be set, and *prevp will contain the last
3351 * entry (null if none). Else, *lastxp will be set to the index
3352 * of the found entry; *gotp will contain the entry.
3353 */
3354STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */
3355xfs_bmap_search_multi_extents(
3356 xfs_ifork_t *ifp, /* inode fork pointer */
3357 xfs_fileoff_t bno, /* block number searched for */
3358 int *eofp, /* out: end of file found */
3359 xfs_extnum_t *lastxp, /* out: last extent index */
3360 xfs_bmbt_irec_t *gotp, /* out: extent entry found */
3361 xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */
3362{
3363 xfs_bmbt_rec_host_t *ep; /* extent record pointer */
3364 xfs_extnum_t lastx; /* last extent index */
3365
3366 /*
3367 * Initialize the extent entry structure to catch access to
3368 * uninitialized br_startblock field.
3369 */
3370 gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL;
3371 gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL;
3372 gotp->br_state = XFS_EXT_INVALID;
3373#if XFS_BIG_BLKNOS
3374 gotp->br_startblock = 0xffffa5a5a5a5a5a5LL;
3375#else
3376 gotp->br_startblock = 0xffffa5a5;
3377#endif
3378 prevp->br_startoff = NULLFILEOFF;
3379
3380 ep = xfs_iext_bno_to_ext(ifp, bno, &lastx);
3381 if (lastx > 0) {
3382 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp);
3383 }
3384 if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
3385 xfs_bmbt_get_all(ep, gotp);
3386 *eofp = 0;
3387 } else {
3388 if (lastx > 0) {
3389 *gotp = *prevp;
3390 }
3391 *eofp = 1;
3392 ep = NULL;
3393 }
3394 *lastxp = lastx;
3395 return ep;
3396}
3397
3398/*
3399 * Search the extents list for the inode, for the extent containing bno.
3400 * If bno lies in a hole, point to the next entry. If bno lies past eof,
3401 * *eofp will be set, and *prevp will contain the last entry (null if none).
3402 * Else, *lastxp will be set to the index of the found
3403 * entry; *gotp will contain the entry.
3404 */
3405STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */
3406xfs_bmap_search_extents(
3407 xfs_inode_t *ip, /* incore inode pointer */
3408 xfs_fileoff_t bno, /* block number searched for */
3409 int fork, /* data or attr fork */
3410 int *eofp, /* out: end of file found */
3411 xfs_extnum_t *lastxp, /* out: last extent index */
3412 xfs_bmbt_irec_t *gotp, /* out: extent entry found */
3413 xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */
3414{
3415 xfs_ifork_t *ifp; /* inode fork pointer */
3416 xfs_bmbt_rec_host_t *ep; /* extent record pointer */
3417
3418 XFS_STATS_INC(xs_look_exlist);
3419 ifp = XFS_IFORK_PTR(ip, fork);
3420
3421 ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp);
3422
3423 if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) &&
3424 !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) {
3425 xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
3426 "Access to block zero in inode %llu "
3427 "start_block: %llx start_off: %llx "
3428 "blkcnt: %llx extent-state: %x lastx: %x\n",
3429 (unsigned long long)ip->i_ino,
3430 (unsigned long long)gotp->br_startblock,
3431 (unsigned long long)gotp->br_startoff,
3432 (unsigned long long)gotp->br_blockcount,
3433 gotp->br_state, *lastxp);
3434 *lastxp = NULLEXTNUM;
3435 *eofp = 1;
3436 return NULL;
3437 }
3438 return ep;
3439}
3440
3441/*
3442 * Compute the worst-case number of indirect blocks that will be used
3443 * for ip's delayed extent of length "len".
3444 */
3445STATIC xfs_filblks_t
3446xfs_bmap_worst_indlen(
3447 xfs_inode_t *ip, /* incore inode pointer */
3448 xfs_filblks_t len) /* delayed extent length */
3449{
3450 int level; /* btree level number */
3451 int maxrecs; /* maximum record count at this level */
3452 xfs_mount_t *mp; /* mount structure */
3453 xfs_filblks_t rval; /* return value */
3454
3455 mp = ip->i_mount;
3456 maxrecs = mp->m_bmap_dmxr[0];
3457 for (level = 0, rval = 0;
3458 level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
3459 level++) {
3460 len += maxrecs - 1;
3461 do_div(len, maxrecs);
3462 rval += len;
3463 if (len == 1)
3464 return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
3465 level - 1;
3466 if (level == 0)
3467 maxrecs = mp->m_bmap_dmxr[1];
3468 }
3469 return rval;
3470}
3471
3472/*
3473 * Convert inode from non-attributed to attributed.
3474 * Must not be in a transaction, ip must not be locked.
3475 */
3476int /* error code */
3477xfs_bmap_add_attrfork(
3478 xfs_inode_t *ip, /* incore inode pointer */
3479 int size, /* space new attribute needs */
3480 int rsvd) /* xact may use reserved blks */
3481{
3482 xfs_fsblock_t firstblock; /* 1st block/ag allocated */
3483 xfs_bmap_free_t flist; /* freed extent records */
3484 xfs_mount_t *mp; /* mount structure */
3485 xfs_trans_t *tp; /* transaction pointer */
3486 int blks; /* space reservation */
3487 int version = 1; /* superblock attr version */
3488 int committed; /* xaction was committed */
3489 int logflags; /* logging flags */
3490 int error; /* error return value */
3491
3492 ASSERT(XFS_IFORK_Q(ip) == 0);
3493
3494 mp = ip->i_mount;
3495 ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
3496 tp = xfs_trans_alloc(mp, XFS_TRANS_ADDAFORK);
3497 blks = XFS_ADDAFORK_SPACE_RES(mp);
3498 if (rsvd)
3499 tp->t_flags |= XFS_TRANS_RESERVE;
3500 if ((error = xfs_trans_reserve(tp, blks, XFS_ADDAFORK_LOG_RES(mp), 0,
3501 XFS_TRANS_PERM_LOG_RES, XFS_ADDAFORK_LOG_COUNT)))
3502 goto error0;
3503 xfs_ilock(ip, XFS_ILOCK_EXCL);
3504 error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
3505 XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
3506 XFS_QMOPT_RES_REGBLKS);
3507 if (error) {
3508 xfs_iunlock(ip, XFS_ILOCK_EXCL);
3509 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
3510 return error;
3511 }
3512 if (XFS_IFORK_Q(ip))
3513 goto error1;
3514 if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
3515 /*
3516 * For inodes coming from pre-6.2 filesystems.
3517 */
3518 ASSERT(ip->i_d.di_aformat == 0);
3519 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
3520 }
3521 ASSERT(ip->i_d.di_anextents == 0);
3522
3523 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
3524 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
3525
3526 switch (ip->i_d.di_format) {
3527 case XFS_DINODE_FMT_DEV:
3528 ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
3529 break;
3530 case XFS_DINODE_FMT_UUID:
3531 ip->i_d.di_forkoff = roundup(sizeof(uuid_t), 8) >> 3;
3532 break;
3533 case XFS_DINODE_FMT_LOCAL:
3534 case XFS_DINODE_FMT_EXTENTS:
3535 case XFS_DINODE_FMT_BTREE:
3536 ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
3537 if (!ip->i_d.di_forkoff)
3538 ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
3539 else if (mp->m_flags & XFS_MOUNT_ATTR2)
3540 version = 2;
3541 break;
3542 default:
3543 ASSERT(0);
3544 error = XFS_ERROR(EINVAL);
3545 goto error1;
3546 }
3547
3548 ASSERT(ip->i_afp == NULL);
3549 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
3550 ip->i_afp->if_flags = XFS_IFEXTENTS;
3551 logflags = 0;
3552 xfs_bmap_init(&flist, &firstblock);
3553 switch (ip->i_d.di_format) {
3554 case XFS_DINODE_FMT_LOCAL:
3555 error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &flist,
3556 &logflags);
3557 break;
3558 case XFS_DINODE_FMT_EXTENTS:
3559 error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock,
3560 &flist, &logflags);
3561 break;
3562 case XFS_DINODE_FMT_BTREE:
3563 error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &flist,
3564 &logflags);
3565 break;
3566 default:
3567 error = 0;
3568 break;
3569 }
3570 if (logflags)
3571 xfs_trans_log_inode(tp, ip, logflags);
3572 if (error)
3573 goto error2;
3574 if (!xfs_sb_version_hasattr(&mp->m_sb) ||
3575 (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
3576 __int64_t sbfields = 0;
3577
3578 spin_lock(&mp->m_sb_lock);
3579 if (!xfs_sb_version_hasattr(&mp->m_sb)) {
3580 xfs_sb_version_addattr(&mp->m_sb);
3581 sbfields |= XFS_SB_VERSIONNUM;
3582 }
3583 if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
3584 xfs_sb_version_addattr2(&mp->m_sb);
3585 sbfields |= (XFS_SB_VERSIONNUM | XFS_SB_FEATURES2);
3586 }
3587 if (sbfields) {
3588 spin_unlock(&mp->m_sb_lock);
3589 xfs_mod_sb(tp, sbfields);
3590 } else
3591 spin_unlock(&mp->m_sb_lock);
3592 }
3593
3594 error = xfs_bmap_finish(&tp, &flist, &committed);
3595 if (error)
3596 goto error2;
3597 return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
3598error2:
3599 xfs_bmap_cancel(&flist);
3600error1:
3601 xfs_iunlock(ip, XFS_ILOCK_EXCL);
3602error0:
3603 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
3604 return error;
3605}
3606
3607/*
3608 * Add the extent to the list of extents to be free at transaction end.
3609 * The list is maintained sorted (by block number).
3610 */
3611/* ARGSUSED */
3612void
3613xfs_bmap_add_free(
3614 xfs_fsblock_t bno, /* fs block number of extent */
3615 xfs_filblks_t len, /* length of extent */
3616 xfs_bmap_free_t *flist, /* list of extents */
3617 xfs_mount_t *mp) /* mount point structure */
3618{
3619 xfs_bmap_free_item_t *cur; /* current (next) element */
3620 xfs_bmap_free_item_t *new; /* new element */
3621 xfs_bmap_free_item_t *prev; /* previous element */
3622#ifdef DEBUG
3623 xfs_agnumber_t agno;
3624 xfs_agblock_t agbno;
3625
3626 ASSERT(bno != NULLFSBLOCK);
3627 ASSERT(len > 0);
3628 ASSERT(len <= MAXEXTLEN);
3629 ASSERT(!isnullstartblock(bno));
3630 agno = XFS_FSB_TO_AGNO(mp, bno);
3631 agbno = XFS_FSB_TO_AGBNO(mp, bno);
3632 ASSERT(agno < mp->m_sb.sb_agcount);
3633 ASSERT(agbno < mp->m_sb.sb_agblocks);
3634 ASSERT(len < mp->m_sb.sb_agblocks);
3635 ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
3636#endif
3637 ASSERT(xfs_bmap_free_item_zone != NULL);
3638 new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
3639 new->xbfi_startblock = bno;
3640 new->xbfi_blockcount = (xfs_extlen_t)len;
3641 for (prev = NULL, cur = flist->xbf_first;
3642 cur != NULL;
3643 prev = cur, cur = cur->xbfi_next) {
3644 if (cur->xbfi_startblock >= bno)
3645 break;
3646 }
3647 if (prev)
3648 prev->xbfi_next = new;
3649 else
3650 flist->xbf_first = new;
3651 new->xbfi_next = cur;
3652 flist->xbf_count++;
3653}
3654
3655/*
3656 * Compute and fill in the value of the maximum depth of a bmap btree
3657 * in this filesystem. Done once, during mount.
3658 */
3659void
3660xfs_bmap_compute_maxlevels(
3661 xfs_mount_t *mp, /* file system mount structure */
3662 int whichfork) /* data or attr fork */
3663{
3664 int level; /* btree level */
3665 uint maxblocks; /* max blocks at this level */
3666 uint maxleafents; /* max leaf entries possible */
3667 int maxrootrecs; /* max records in root block */
3668 int minleafrecs; /* min records in leaf block */
3669 int minnoderecs; /* min records in node block */
3670 int sz; /* root block size */
3671
3672 /*
3673 * The maximum number of extents in a file, hence the maximum
3674 * number of leaf entries, is controlled by the type of di_nextents
3675 * (a signed 32-bit number, xfs_extnum_t), or by di_anextents
3676 * (a signed 16-bit number, xfs_aextnum_t).
3677 *
3678 * Note that we can no longer assume that if we are in ATTR1 that
3679 * the fork offset of all the inodes will be
3680 * (xfs_default_attroffset(ip) >> 3) because we could have mounted
3681 * with ATTR2 and then mounted back with ATTR1, keeping the
3682 * di_forkoff's fixed but probably at various positions. Therefore,
3683 * for both ATTR1 and ATTR2 we have to assume the worst case scenario
3684 * of a minimum size available.
3685 */
3686 if (whichfork == XFS_DATA_FORK) {
3687 maxleafents = MAXEXTNUM;
3688 sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
3689 } else {
3690 maxleafents = MAXAEXTNUM;
3691 sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
3692 }
3693 maxrootrecs = xfs_bmdr_maxrecs(mp, sz, 0);
3694 minleafrecs = mp->m_bmap_dmnr[0];
3695 minnoderecs = mp->m_bmap_dmnr[1];
3696 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
3697 for (level = 1; maxblocks > 1; level++) {
3698 if (maxblocks <= maxrootrecs)
3699 maxblocks = 1;
3700 else
3701 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
3702 }
3703 mp->m_bm_maxlevels[whichfork] = level;
3704}
3705
3706/*
3707 * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
3708 * caller. Frees all the extents that need freeing, which must be done
3709 * last due to locking considerations. We never free any extents in
3710 * the first transaction.
3711 *
3712 * Return 1 if the given transaction was committed and a new one
3713 * started, and 0 otherwise in the committed parameter.
3714 */
3715int /* error */
3716xfs_bmap_finish(
3717 xfs_trans_t **tp, /* transaction pointer addr */
3718 xfs_bmap_free_t *flist, /* i/o: list extents to free */
3719 int *committed) /* xact committed or not */
3720{
3721 xfs_efd_log_item_t *efd; /* extent free data */
3722 xfs_efi_log_item_t *efi; /* extent free intention */
3723 int error; /* error return value */
3724 xfs_bmap_free_item_t *free; /* free extent item */
3725 unsigned int logres; /* new log reservation */
3726 unsigned int logcount; /* new log count */
3727 xfs_mount_t *mp; /* filesystem mount structure */
3728 xfs_bmap_free_item_t *next; /* next item on free list */
3729 xfs_trans_t *ntp; /* new transaction pointer */
3730
3731 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
3732 if (flist->xbf_count == 0) {
3733 *committed = 0;
3734 return 0;
3735 }
3736 ntp = *tp;
3737 efi = xfs_trans_get_efi(ntp, flist->xbf_count);
3738 for (free = flist->xbf_first; free; free = free->xbfi_next)
3739 xfs_trans_log_efi_extent(ntp, efi, free->xbfi_startblock,
3740 free->xbfi_blockcount);
3741 logres = ntp->t_log_res;
3742 logcount = ntp->t_log_count;
3743 ntp = xfs_trans_dup(*tp);
3744 error = xfs_trans_commit(*tp, 0);
3745 *tp = ntp;
3746 *committed = 1;
3747 /*
3748 * We have a new transaction, so we should return committed=1,
3749 * even though we're returning an error.
3750 */
3751 if (error)
3752 return error;
3753
3754 /*
3755 * transaction commit worked ok so we can drop the extra ticket
3756 * reference that we gained in xfs_trans_dup()
3757 */
3758 xfs_log_ticket_put(ntp->t_ticket);
3759
3760 if ((error = xfs_trans_reserve(ntp, 0, logres, 0, XFS_TRANS_PERM_LOG_RES,
3761 logcount)))
3762 return error;
3763 efd = xfs_trans_get_efd(ntp, efi, flist->xbf_count);
3764 for (free = flist->xbf_first; free != NULL; free = next) {
3765 next = free->xbfi_next;
3766 if ((error = xfs_free_extent(ntp, free->xbfi_startblock,
3767 free->xbfi_blockcount))) {
3768 /*
3769 * The bmap free list will be cleaned up at a
3770 * higher level. The EFI will be canceled when
3771 * this transaction is aborted.
3772 * Need to force shutdown here to make sure it
3773 * happens, since this transaction may not be
3774 * dirty yet.
3775 */
3776 mp = ntp->t_mountp;
3777 if (!XFS_FORCED_SHUTDOWN(mp))
3778 xfs_force_shutdown(mp,
3779 (error == EFSCORRUPTED) ?
3780 SHUTDOWN_CORRUPT_INCORE :
3781 SHUTDOWN_META_IO_ERROR);
3782 return error;
3783 }
3784 xfs_trans_log_efd_extent(ntp, efd, free->xbfi_startblock,
3785 free->xbfi_blockcount);
3786 xfs_bmap_del_free(flist, NULL, free);
3787 }
3788 return 0;
3789}
3790
3791/*
3792 * Free up any items left in the list.
3793 */
3794void
3795xfs_bmap_cancel(
3796 xfs_bmap_free_t *flist) /* list of bmap_free_items */
3797{
3798 xfs_bmap_free_item_t *free; /* free list item */
3799 xfs_bmap_free_item_t *next;
3800
3801 if (flist->xbf_count == 0)
3802 return;
3803 ASSERT(flist->xbf_first != NULL);
3804 for (free = flist->xbf_first; free; free = next) {
3805 next = free->xbfi_next;
3806 xfs_bmap_del_free(flist, NULL, free);
3807 }
3808 ASSERT(flist->xbf_count == 0);
3809}
3810
3811/*
3812 * Returns the file-relative block number of the first unused block(s)
3813 * in the file with at least "len" logically contiguous blocks free.
3814 * This is the lowest-address hole if the file has holes, else the first block
3815 * past the end of file.
3816 * Return 0 if the file is currently local (in-inode).
3817 */
3818int /* error */
3819xfs_bmap_first_unused(
3820 xfs_trans_t *tp, /* transaction pointer */
3821 xfs_inode_t *ip, /* incore inode */
3822 xfs_extlen_t len, /* size of hole to find */
3823 xfs_fileoff_t *first_unused, /* unused block */
3824 int whichfork) /* data or attr fork */
3825{
3826 int error; /* error return value */
3827 int idx; /* extent record index */
3828 xfs_ifork_t *ifp; /* inode fork pointer */
3829 xfs_fileoff_t lastaddr; /* last block number seen */
3830 xfs_fileoff_t lowest; /* lowest useful block */
3831 xfs_fileoff_t max; /* starting useful block */
3832 xfs_fileoff_t off; /* offset for this block */
3833 xfs_extnum_t nextents; /* number of extent entries */
3834
3835 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE ||
3836 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ||
3837 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
3838 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
3839 *first_unused = 0;
3840 return 0;
3841 }
3842 ifp = XFS_IFORK_PTR(ip, whichfork);
3843 if (!(ifp->if_flags & XFS_IFEXTENTS) &&
3844 (error = xfs_iread_extents(tp, ip, whichfork)))
3845 return error;
3846 lowest = *first_unused;
3847 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
3848 for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) {
3849 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx);
3850 off = xfs_bmbt_get_startoff(ep);
3851 /*
3852 * See if the hole before this extent will work.
3853 */
3854 if (off >= lowest + len && off - max >= len) {
3855 *first_unused = max;
3856 return 0;
3857 }
3858 lastaddr = off + xfs_bmbt_get_blockcount(ep);
3859 max = XFS_FILEOFF_MAX(lastaddr, lowest);
3860 }
3861 *first_unused = max;
3862 return 0;
3863}
3864
3865/*
3866 * Returns the file-relative block number of the last block + 1 before
3867 * last_block (input value) in the file.
3868 * This is not based on i_size, it is based on the extent records.
3869 * Returns 0 for local files, as they do not have extent records.
3870 */
3871int /* error */
3872xfs_bmap_last_before(
3873 xfs_trans_t *tp, /* transaction pointer */
3874 xfs_inode_t *ip, /* incore inode */
3875 xfs_fileoff_t *last_block, /* last block */
3876 int whichfork) /* data or attr fork */
3877{
3878 xfs_fileoff_t bno; /* input file offset */
3879 int eof; /* hit end of file */
3880 xfs_bmbt_rec_host_t *ep; /* pointer to last extent */
3881 int error; /* error return value */
3882 xfs_bmbt_irec_t got; /* current extent value */
3883 xfs_ifork_t *ifp; /* inode fork pointer */
3884 xfs_extnum_t lastx; /* last extent used */
3885 xfs_bmbt_irec_t prev; /* previous extent value */
3886
3887 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
3888 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
3889 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
3890 return XFS_ERROR(EIO);
3891 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
3892 *last_block = 0;
3893 return 0;
3894 }
3895 ifp = XFS_IFORK_PTR(ip, whichfork);
3896 if (!(ifp->if_flags & XFS_IFEXTENTS) &&
3897 (error = xfs_iread_extents(tp, ip, whichfork)))
3898 return error;
3899 bno = *last_block - 1;
3900 ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
3901 &prev);
3902 if (eof || xfs_bmbt_get_startoff(ep) > bno) {
3903 if (prev.br_startoff == NULLFILEOFF)
3904 *last_block = 0;
3905 else
3906 *last_block = prev.br_startoff + prev.br_blockcount;
3907 }
3908 /*
3909 * Otherwise *last_block is already the right answer.
3910 */
3911 return 0;
3912}
3913
3914STATIC int
3915xfs_bmap_last_extent(
3916 struct xfs_trans *tp,
3917 struct xfs_inode *ip,
3918 int whichfork,
3919 struct xfs_bmbt_irec *rec,
3920 int *is_empty)
3921{
3922 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
3923 int error;
3924 int nextents;
3925
3926 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
3927 error = xfs_iread_extents(tp, ip, whichfork);
3928 if (error)
3929 return error;
3930 }
3931
3932 nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
3933 if (nextents == 0) {
3934 *is_empty = 1;
3935 return 0;
3936 }
3937
3938 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, nextents - 1), rec);
3939 *is_empty = 0;
3940 return 0;
3941}
3942
3943/*
3944 * Check the last inode extent to determine whether this allocation will result
3945 * in blocks being allocated at the end of the file. When we allocate new data
3946 * blocks at the end of the file which do not start at the previous data block,
3947 * we will try to align the new blocks at stripe unit boundaries.
3948 *
3949 * Returns 0 in bma->aeof if the file (fork) is empty as any new write will be
3950 * at, or past the EOF.
3951 */
3952STATIC int
3953xfs_bmap_isaeof(
3954 struct xfs_bmalloca *bma,
3955 int whichfork)
3956{
3957 struct xfs_bmbt_irec rec;
3958 int is_empty;
3959 int error;
3960
3961 bma->aeof = 0;
3962 error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
3963 &is_empty);
3964 if (error || is_empty)
3965 return error;
3966
3967 /*
3968 * Check if we are allocation or past the last extent, or at least into
3969 * the last delayed allocated extent.
3970 */
3971 bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
3972 (bma->offset >= rec.br_startoff &&
3973 isnullstartblock(rec.br_startblock));
3974 return 0;
3975}
3976
3977/*
3978 * Check if the endoff is outside the last extent. If so the caller will grow
3979 * the allocation to a stripe unit boundary. All offsets are considered outside
3980 * the end of file for an empty fork, so 1 is returned in *eof in that case.
3981 */
3982int
3983xfs_bmap_eof(
3984 struct xfs_inode *ip,
3985 xfs_fileoff_t endoff,
3986 int whichfork,
3987 int *eof)
3988{
3989 struct xfs_bmbt_irec rec;
3990 int error;
3991
3992 error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, eof);
3993 if (error || *eof)
3994 return error;
3995
3996 *eof = endoff >= rec.br_startoff + rec.br_blockcount;
3997 return 0;
3998}
3999
4000/*
4001 * Returns the file-relative block number of the first block past eof in
4002 * the file. This is not based on i_size, it is based on the extent records.
4003 * Returns 0 for local files, as they do not have extent records.
4004 */
4005int
4006xfs_bmap_last_offset(
4007 struct xfs_trans *tp,
4008 struct xfs_inode *ip,
4009 xfs_fileoff_t *last_block,
4010 int whichfork)
4011{
4012 struct xfs_bmbt_irec rec;
4013 int is_empty;
4014 int error;
4015
4016 *last_block = 0;
4017
4018 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL)
4019 return 0;
4020
4021 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
4022 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
4023 return XFS_ERROR(EIO);
4024
4025 error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
4026 if (error || is_empty)
4027 return error;
4028
4029 *last_block = rec.br_startoff + rec.br_blockcount;
4030 return 0;
4031}
4032
4033/*
4034 * Returns whether the selected fork of the inode has exactly one
4035 * block or not. For the data fork we check this matches di_size,
4036 * implying the file's range is 0..bsize-1.
4037 */
4038int /* 1=>1 block, 0=>otherwise */
4039xfs_bmap_one_block(
4040 xfs_inode_t *ip, /* incore inode */
4041 int whichfork) /* data or attr fork */
4042{
4043 xfs_bmbt_rec_host_t *ep; /* ptr to fork's extent */
4044 xfs_ifork_t *ifp; /* inode fork pointer */
4045 int rval; /* return value */
4046 xfs_bmbt_irec_t s; /* internal version of extent */
4047
4048#ifndef DEBUG
4049 if (whichfork == XFS_DATA_FORK)
4050 return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize;
4051#endif /* !DEBUG */
4052 if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1)
4053 return 0;
4054 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
4055 return 0;
4056 ifp = XFS_IFORK_PTR(ip, whichfork);
4057 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
4058 ep = xfs_iext_get_ext(ifp, 0);
4059 xfs_bmbt_get_all(ep, &s);
4060 rval = s.br_startoff == 0 && s.br_blockcount == 1;
4061 if (rval && whichfork == XFS_DATA_FORK)
4062 ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize);
4063 return rval;
4064}
4065
4066STATIC int
4067xfs_bmap_sanity_check(
4068 struct xfs_mount *mp,
4069 struct xfs_buf *bp,
4070 int level)
4071{
4072 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4073
4074 if (block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC) ||
4075 be16_to_cpu(block->bb_level) != level ||
4076 be16_to_cpu(block->bb_numrecs) == 0 ||
4077 be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
4078 return 0;
4079 return 1;
4080}
4081
4082/*
4083 * Read in the extents to if_extents.
4084 * All inode fields are set up by caller, we just traverse the btree
4085 * and copy the records in. If the file system cannot contain unwritten
4086 * extents, the records are checked for no "state" flags.
4087 */
4088int /* error */
4089xfs_bmap_read_extents(
4090 xfs_trans_t *tp, /* transaction pointer */
4091 xfs_inode_t *ip, /* incore inode */
4092 int whichfork) /* data or attr fork */
4093{
4094 struct xfs_btree_block *block; /* current btree block */
4095 xfs_fsblock_t bno; /* block # of "block" */
4096 xfs_buf_t *bp; /* buffer for "block" */
4097 int error; /* error return value */
4098 xfs_exntfmt_t exntf; /* XFS_EXTFMT_NOSTATE, if checking */
4099 xfs_extnum_t i, j; /* index into the extents list */
4100 xfs_ifork_t *ifp; /* fork structure */
4101 int level; /* btree level, for checking */
4102 xfs_mount_t *mp; /* file system mount structure */
4103 __be64 *pp; /* pointer to block address */
4104 /* REFERENCED */
4105 xfs_extnum_t room; /* number of entries there's room for */
4106
4107 bno = NULLFSBLOCK;
4108 mp = ip->i_mount;
4109 ifp = XFS_IFORK_PTR(ip, whichfork);
4110 exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE :
4111 XFS_EXTFMT_INODE(ip);
4112 block = ifp->if_broot;
4113 /*
4114 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
4115 */
4116 level = be16_to_cpu(block->bb_level);
4117 ASSERT(level > 0);
4118 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
4119 bno = be64_to_cpu(*pp);
4120 ASSERT(bno != NULLDFSBNO);
4121 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
4122 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
4123 /*
4124 * Go down the tree until leaf level is reached, following the first
4125 * pointer (leftmost) at each level.
4126 */
4127 while (level-- > 0) {
4128 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
4129 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
4130 if (error)
4131 return error;
4132 block = XFS_BUF_TO_BLOCK(bp);
4133 XFS_WANT_CORRUPTED_GOTO(
4134 xfs_bmap_sanity_check(mp, bp, level),
4135 error0);
4136 if (level == 0)
4137 break;
4138 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
4139 bno = be64_to_cpu(*pp);
4140 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
4141 xfs_trans_brelse(tp, bp);
4142 }
4143 /*
4144 * Here with bp and block set to the leftmost leaf node in the tree.
4145 */
4146 room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
4147 i = 0;
4148 /*
4149 * Loop over all leaf nodes. Copy information to the extent records.
4150 */
4151 for (;;) {
4152 xfs_bmbt_rec_t *frp;
4153 xfs_fsblock_t nextbno;
4154 xfs_extnum_t num_recs;
4155 xfs_extnum_t start;
4156
4157 num_recs = xfs_btree_get_numrecs(block);
4158 if (unlikely(i + num_recs > room)) {
4159 ASSERT(i + num_recs <= room);
4160 xfs_warn(ip->i_mount,
4161 "corrupt dinode %Lu, (btree extents).",
4162 (unsigned long long) ip->i_ino);
4163 XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)",
4164 XFS_ERRLEVEL_LOW, ip->i_mount, block);
4165 goto error0;
4166 }
4167 XFS_WANT_CORRUPTED_GOTO(
4168 xfs_bmap_sanity_check(mp, bp, 0),
4169 error0);
4170 /*
4171 * Read-ahead the next leaf block, if any.
4172 */
4173 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
4174 if (nextbno != NULLFSBLOCK)
4175 xfs_btree_reada_bufl(mp, nextbno, 1,
4176 &xfs_bmbt_buf_ops);
4177 /*
4178 * Copy records into the extent records.
4179 */
4180 frp = XFS_BMBT_REC_ADDR(mp, block, 1);
4181 start = i;
4182 for (j = 0; j < num_recs; j++, i++, frp++) {
4183 xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i);
4184 trp->l0 = be64_to_cpu(frp->l0);
4185 trp->l1 = be64_to_cpu(frp->l1);
4186 }
4187 if (exntf == XFS_EXTFMT_NOSTATE) {
4188 /*
4189 * Check all attribute bmap btree records and
4190 * any "older" data bmap btree records for a
4191 * set bit in the "extent flag" position.
4192 */
4193 if (unlikely(xfs_check_nostate_extents(ifp,
4194 start, num_recs))) {
4195 XFS_ERROR_REPORT("xfs_bmap_read_extents(2)",
4196 XFS_ERRLEVEL_LOW,
4197 ip->i_mount);
4198 goto error0;
4199 }
4200 }
4201 xfs_trans_brelse(tp, bp);
4202 bno = nextbno;
4203 /*
4204 * If we've reached the end, stop.
4205 */
4206 if (bno == NULLFSBLOCK)
4207 break;
4208 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
4209 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
4210 if (error)
4211 return error;
4212 block = XFS_BUF_TO_BLOCK(bp);
4213 }
4214 ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
4215 ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
4216 XFS_BMAP_TRACE_EXLIST(ip, i, whichfork);
4217 return 0;
4218error0:
4219 xfs_trans_brelse(tp, bp);
4220 return XFS_ERROR(EFSCORRUPTED);
4221}
4222
4223#ifdef DEBUG
4224/*
4225 * Add bmap trace insert entries for all the contents of the extent records.
4226 */
4227void
4228xfs_bmap_trace_exlist(
4229 xfs_inode_t *ip, /* incore inode pointer */
4230 xfs_extnum_t cnt, /* count of entries in the list */
4231 int whichfork, /* data or attr fork */
4232 unsigned long caller_ip)
4233{
4234 xfs_extnum_t idx; /* extent record index */
4235 xfs_ifork_t *ifp; /* inode fork pointer */
4236 int state = 0;
4237
4238 if (whichfork == XFS_ATTR_FORK)
4239 state |= BMAP_ATTRFORK;
4240
4241 ifp = XFS_IFORK_PTR(ip, whichfork);
4242 ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
4243 for (idx = 0; idx < cnt; idx++)
4244 trace_xfs_extlist(ip, idx, whichfork, caller_ip);
4245}
4246
4247/*
4248 * Validate that the bmbt_irecs being returned from bmapi are valid
4249 * given the callers original parameters. Specifically check the
4250 * ranges of the returned irecs to ensure that they only extent beyond
4251 * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
4252 */
4253STATIC void
4254xfs_bmap_validate_ret(
4255 xfs_fileoff_t bno,
4256 xfs_filblks_t len,
4257 int flags,
4258 xfs_bmbt_irec_t *mval,
4259 int nmap,
4260 int ret_nmap)
4261{
4262 int i; /* index to map values */
4263
4264 ASSERT(ret_nmap <= nmap);
4265
4266 for (i = 0; i < ret_nmap; i++) {
4267 ASSERT(mval[i].br_blockcount > 0);
4268 if (!(flags & XFS_BMAPI_ENTIRE)) {
4269 ASSERT(mval[i].br_startoff >= bno);
4270 ASSERT(mval[i].br_blockcount <= len);
4271 ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
4272 bno + len);
4273 } else {
4274 ASSERT(mval[i].br_startoff < bno + len);
4275 ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
4276 bno);
4277 }
4278 ASSERT(i == 0 ||
4279 mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
4280 mval[i].br_startoff);
4281 ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
4282 mval[i].br_startblock != HOLESTARTBLOCK);
4283 ASSERT(mval[i].br_state == XFS_EXT_NORM ||
4284 mval[i].br_state == XFS_EXT_UNWRITTEN);
4285 }
4286}
4287#endif /* DEBUG */
4288
4289
4290/*
4291 * Trim the returned map to the required bounds 4262 * Trim the returned map to the required bounds
4292 */ 4263 */
4293STATIC void 4264STATIC void
@@ -5151,6 +5122,328 @@ error0:
5151} 5122}
5152 5123
5153/* 5124/*
5125 * Called by xfs_bmapi to update file extent records and the btree
5126 * after removing space (or undoing a delayed allocation).
5127 */
5128STATIC int /* error */
5129xfs_bmap_del_extent(
5130 xfs_inode_t *ip, /* incore inode pointer */
5131 xfs_trans_t *tp, /* current transaction pointer */
5132 xfs_extnum_t *idx, /* extent number to update/delete */
5133 xfs_bmap_free_t *flist, /* list of extents to be freed */
5134 xfs_btree_cur_t *cur, /* if null, not a btree */
5135 xfs_bmbt_irec_t *del, /* data to remove from extents */
5136 int *logflagsp, /* inode logging flags */
5137 int whichfork) /* data or attr fork */
5138{
5139 xfs_filblks_t da_new; /* new delay-alloc indirect blocks */
5140 xfs_filblks_t da_old; /* old delay-alloc indirect blocks */
5141 xfs_fsblock_t del_endblock=0; /* first block past del */
5142 xfs_fileoff_t del_endoff; /* first offset past del */
5143 int delay; /* current block is delayed allocated */
5144 int do_fx; /* free extent at end of routine */
5145 xfs_bmbt_rec_host_t *ep; /* current extent entry pointer */
5146 int error; /* error return value */
5147 int flags; /* inode logging flags */
5148 xfs_bmbt_irec_t got; /* current extent entry */
5149 xfs_fileoff_t got_endoff; /* first offset past got */
5150 int i; /* temp state */
5151 xfs_ifork_t *ifp; /* inode fork pointer */
5152 xfs_mount_t *mp; /* mount structure */
5153 xfs_filblks_t nblks; /* quota/sb block count */
5154 xfs_bmbt_irec_t new; /* new record to be inserted */
5155 /* REFERENCED */
5156 uint qfield; /* quota field to update */
5157 xfs_filblks_t temp; /* for indirect length calculations */
5158 xfs_filblks_t temp2; /* for indirect length calculations */
5159 int state = 0;
5160
5161 XFS_STATS_INC(xs_del_exlist);
5162
5163 if (whichfork == XFS_ATTR_FORK)
5164 state |= BMAP_ATTRFORK;
5165
5166 mp = ip->i_mount;
5167 ifp = XFS_IFORK_PTR(ip, whichfork);
5168 ASSERT((*idx >= 0) && (*idx < ifp->if_bytes /
5169 (uint)sizeof(xfs_bmbt_rec_t)));
5170 ASSERT(del->br_blockcount > 0);
5171 ep = xfs_iext_get_ext(ifp, *idx);
5172 xfs_bmbt_get_all(ep, &got);
5173 ASSERT(got.br_startoff <= del->br_startoff);
5174 del_endoff = del->br_startoff + del->br_blockcount;
5175 got_endoff = got.br_startoff + got.br_blockcount;
5176 ASSERT(got_endoff >= del_endoff);
5177 delay = isnullstartblock(got.br_startblock);
5178 ASSERT(isnullstartblock(del->br_startblock) == delay);
5179 flags = 0;
5180 qfield = 0;
5181 error = 0;
5182 /*
5183 * If deleting a real allocation, must free up the disk space.
5184 */
5185 if (!delay) {
5186 flags = XFS_ILOG_CORE;
5187 /*
5188 * Realtime allocation. Free it and record di_nblocks update.
5189 */
5190 if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
5191 xfs_fsblock_t bno;
5192 xfs_filblks_t len;
5193
5194 ASSERT(do_mod(del->br_blockcount,
5195 mp->m_sb.sb_rextsize) == 0);
5196 ASSERT(do_mod(del->br_startblock,
5197 mp->m_sb.sb_rextsize) == 0);
5198 bno = del->br_startblock;
5199 len = del->br_blockcount;
5200 do_div(bno, mp->m_sb.sb_rextsize);
5201 do_div(len, mp->m_sb.sb_rextsize);
5202 error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
5203 if (error)
5204 goto done;
5205 do_fx = 0;
5206 nblks = len * mp->m_sb.sb_rextsize;
5207 qfield = XFS_TRANS_DQ_RTBCOUNT;
5208 }
5209 /*
5210 * Ordinary allocation.
5211 */
5212 else {
5213 do_fx = 1;
5214 nblks = del->br_blockcount;
5215 qfield = XFS_TRANS_DQ_BCOUNT;
5216 }
5217 /*
5218 * Set up del_endblock and cur for later.
5219 */
5220 del_endblock = del->br_startblock + del->br_blockcount;
5221 if (cur) {
5222 if ((error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
5223 got.br_startblock, got.br_blockcount,
5224 &i)))
5225 goto done;
5226 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
5227 }
5228 da_old = da_new = 0;
5229 } else {
5230 da_old = startblockval(got.br_startblock);
5231 da_new = 0;
5232 nblks = 0;
5233 do_fx = 0;
5234 }
5235 /*
5236 * Set flag value to use in switch statement.
5237 * Left-contig is 2, right-contig is 1.
5238 */
5239 switch (((got.br_startoff == del->br_startoff) << 1) |
5240 (got_endoff == del_endoff)) {
5241 case 3:
5242 /*
5243 * Matches the whole extent. Delete the entry.
5244 */
5245 xfs_iext_remove(ip, *idx, 1,
5246 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0);
5247 --*idx;
5248 if (delay)
5249 break;
5250
5251 XFS_IFORK_NEXT_SET(ip, whichfork,
5252 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
5253 flags |= XFS_ILOG_CORE;
5254 if (!cur) {
5255 flags |= xfs_ilog_fext(whichfork);
5256 break;
5257 }
5258 if ((error = xfs_btree_delete(cur, &i)))
5259 goto done;
5260 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
5261 break;
5262
5263 case 2:
5264 /*
5265 * Deleting the first part of the extent.
5266 */
5267 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
5268 xfs_bmbt_set_startoff(ep, del_endoff);
5269 temp = got.br_blockcount - del->br_blockcount;
5270 xfs_bmbt_set_blockcount(ep, temp);
5271 if (delay) {
5272 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
5273 da_old);
5274 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
5275 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5276 da_new = temp;
5277 break;
5278 }
5279 xfs_bmbt_set_startblock(ep, del_endblock);
5280 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5281 if (!cur) {
5282 flags |= xfs_ilog_fext(whichfork);
5283 break;
5284 }
5285 if ((error = xfs_bmbt_update(cur, del_endoff, del_endblock,
5286 got.br_blockcount - del->br_blockcount,
5287 got.br_state)))
5288 goto done;
5289 break;
5290
5291 case 1:
5292 /*
5293 * Deleting the last part of the extent.
5294 */
5295 temp = got.br_blockcount - del->br_blockcount;
5296 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
5297 xfs_bmbt_set_blockcount(ep, temp);
5298 if (delay) {
5299 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
5300 da_old);
5301 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
5302 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5303 da_new = temp;
5304 break;
5305 }
5306 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5307 if (!cur) {
5308 flags |= xfs_ilog_fext(whichfork);
5309 break;
5310 }
5311 if ((error = xfs_bmbt_update(cur, got.br_startoff,
5312 got.br_startblock,
5313 got.br_blockcount - del->br_blockcount,
5314 got.br_state)))
5315 goto done;
5316 break;
5317
5318 case 0:
5319 /*
5320 * Deleting the middle of the extent.
5321 */
5322 temp = del->br_startoff - got.br_startoff;
5323 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
5324 xfs_bmbt_set_blockcount(ep, temp);
5325 new.br_startoff = del_endoff;
5326 temp2 = got_endoff - del_endoff;
5327 new.br_blockcount = temp2;
5328 new.br_state = got.br_state;
5329 if (!delay) {
5330 new.br_startblock = del_endblock;
5331 flags |= XFS_ILOG_CORE;
5332 if (cur) {
5333 if ((error = xfs_bmbt_update(cur,
5334 got.br_startoff,
5335 got.br_startblock, temp,
5336 got.br_state)))
5337 goto done;
5338 if ((error = xfs_btree_increment(cur, 0, &i)))
5339 goto done;
5340 cur->bc_rec.b = new;
5341 error = xfs_btree_insert(cur, &i);
5342 if (error && error != ENOSPC)
5343 goto done;
5344 /*
5345 * If get no-space back from btree insert,
5346 * it tried a split, and we have a zero
5347 * block reservation.
5348 * Fix up our state and return the error.
5349 */
5350 if (error == ENOSPC) {
5351 /*
5352 * Reset the cursor, don't trust
5353 * it after any insert operation.
5354 */
5355 if ((error = xfs_bmbt_lookup_eq(cur,
5356 got.br_startoff,
5357 got.br_startblock,
5358 temp, &i)))
5359 goto done;
5360 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
5361 /*
5362 * Update the btree record back
5363 * to the original value.
5364 */
5365 if ((error = xfs_bmbt_update(cur,
5366 got.br_startoff,
5367 got.br_startblock,
5368 got.br_blockcount,
5369 got.br_state)))
5370 goto done;
5371 /*
5372 * Reset the extent record back
5373 * to the original value.
5374 */
5375 xfs_bmbt_set_blockcount(ep,
5376 got.br_blockcount);
5377 flags = 0;
5378 error = XFS_ERROR(ENOSPC);
5379 goto done;
5380 }
5381 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
5382 } else
5383 flags |= xfs_ilog_fext(whichfork);
5384 XFS_IFORK_NEXT_SET(ip, whichfork,
5385 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
5386 } else {
5387 ASSERT(whichfork == XFS_DATA_FORK);
5388 temp = xfs_bmap_worst_indlen(ip, temp);
5389 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
5390 temp2 = xfs_bmap_worst_indlen(ip, temp2);
5391 new.br_startblock = nullstartblock((int)temp2);
5392 da_new = temp + temp2;
5393 while (da_new > da_old) {
5394 if (temp) {
5395 temp--;
5396 da_new--;
5397 xfs_bmbt_set_startblock(ep,
5398 nullstartblock((int)temp));
5399 }
5400 if (da_new == da_old)
5401 break;
5402 if (temp2) {
5403 temp2--;
5404 da_new--;
5405 new.br_startblock =
5406 nullstartblock((int)temp2);
5407 }
5408 }
5409 }
5410 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5411 xfs_iext_insert(ip, *idx + 1, 1, &new, state);
5412 ++*idx;
5413 break;
5414 }
5415 /*
5416 * If we need to, add to list of extents to delete.
5417 */
5418 if (do_fx)
5419 xfs_bmap_add_free(del->br_startblock, del->br_blockcount, flist,
5420 mp);
5421 /*
5422 * Adjust inode # blocks in the file.
5423 */
5424 if (nblks)
5425 ip->i_d.di_nblocks -= nblks;
5426 /*
5427 * Adjust quota data.
5428 */
5429 if (qfield)
5430 xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
5431
5432 /*
5433 * Account for change in delayed indirect blocks.
5434 * Nothing to do for disk quota accounting here.
5435 */
5436 ASSERT(da_old >= da_new);
5437 if (da_old > da_new) {
5438 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
5439 (int64_t)(da_old - da_new), 0);
5440 }
5441done:
5442 *logflagsp = flags;
5443 return error;
5444}
5445
5446/*
5154 * Unmap (remove) blocks from a file. 5447 * Unmap (remove) blocks from a file.
5155 * If nexts is nonzero then the number of extents to remove is limited to 5448 * If nexts is nonzero then the number of extents to remove is limited to
5156 * that value. If not all extents in the block range can be removed then 5449 * that value. If not all extents in the block range can be removed then
@@ -5811,416 +6104,6 @@ xfs_getbmap(
5811 return error; 6104 return error;
5812} 6105}
5813 6106
5814#ifdef DEBUG
5815STATIC struct xfs_buf *
5816xfs_bmap_get_bp(
5817 struct xfs_btree_cur *cur,
5818 xfs_fsblock_t bno)
5819{
5820 struct xfs_log_item_desc *lidp;
5821 int i;
5822
5823 if (!cur)
5824 return NULL;
5825
5826 for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
5827 if (!cur->bc_bufs[i])
5828 break;
5829 if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
5830 return cur->bc_bufs[i];
5831 }
5832
5833 /* Chase down all the log items to see if the bp is there */
5834 list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) {
5835 struct xfs_buf_log_item *bip;
5836 bip = (struct xfs_buf_log_item *)lidp->lid_item;
5837 if (bip->bli_item.li_type == XFS_LI_BUF &&
5838 XFS_BUF_ADDR(bip->bli_buf) == bno)
5839 return bip->bli_buf;
5840 }
5841
5842 return NULL;
5843}
5844
5845STATIC void
5846xfs_check_block(
5847 struct xfs_btree_block *block,
5848 xfs_mount_t *mp,
5849 int root,
5850 short sz)
5851{
5852 int i, j, dmxr;
5853 __be64 *pp, *thispa; /* pointer to block address */
5854 xfs_bmbt_key_t *prevp, *keyp;
5855
5856 ASSERT(be16_to_cpu(block->bb_level) > 0);
5857
5858 prevp = NULL;
5859 for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
5860 dmxr = mp->m_bmap_dmxr[0];
5861 keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
5862
5863 if (prevp) {
5864 ASSERT(be64_to_cpu(prevp->br_startoff) <
5865 be64_to_cpu(keyp->br_startoff));
5866 }
5867 prevp = keyp;
5868
5869 /*
5870 * Compare the block numbers to see if there are dups.
5871 */
5872 if (root)
5873 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
5874 else
5875 pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
5876
5877 for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
5878 if (root)
5879 thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
5880 else
5881 thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
5882 if (*thispa == *pp) {
5883 xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
5884 __func__, j, i,
5885 (unsigned long long)be64_to_cpu(*thispa));
5886 panic("%s: ptrs are equal in node\n",
5887 __func__);
5888 }
5889 }
5890 }
5891}
5892
5893/*
5894 * Check that the extents for the inode ip are in the right order in all
5895 * btree leaves.
5896 */
5897
5898STATIC void
5899xfs_bmap_check_leaf_extents(
5900 xfs_btree_cur_t *cur, /* btree cursor or null */
5901 xfs_inode_t *ip, /* incore inode pointer */
5902 int whichfork) /* data or attr fork */
5903{
5904 struct xfs_btree_block *block; /* current btree block */
5905 xfs_fsblock_t bno; /* block # of "block" */
5906 xfs_buf_t *bp; /* buffer for "block" */
5907 int error; /* error return value */
5908 xfs_extnum_t i=0, j; /* index into the extents list */
5909 xfs_ifork_t *ifp; /* fork structure */
5910 int level; /* btree level, for checking */
5911 xfs_mount_t *mp; /* file system mount structure */
5912 __be64 *pp; /* pointer to block address */
5913 xfs_bmbt_rec_t *ep; /* pointer to current extent */
5914 xfs_bmbt_rec_t last = {0, 0}; /* last extent in prev block */
5915 xfs_bmbt_rec_t *nextp; /* pointer to next extent */
5916 int bp_release = 0;
5917
5918 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) {
5919 return;
5920 }
5921
5922 bno = NULLFSBLOCK;
5923 mp = ip->i_mount;
5924 ifp = XFS_IFORK_PTR(ip, whichfork);
5925 block = ifp->if_broot;
5926 /*
5927 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
5928 */
5929 level = be16_to_cpu(block->bb_level);
5930 ASSERT(level > 0);
5931 xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
5932 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
5933 bno = be64_to_cpu(*pp);
5934
5935 ASSERT(bno != NULLDFSBNO);
5936 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
5937 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
5938
5939 /*
5940 * Go down the tree until leaf level is reached, following the first
5941 * pointer (leftmost) at each level.
5942 */
5943 while (level-- > 0) {
5944 /* See if buf is in cur first */
5945 bp_release = 0;
5946 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
5947 if (!bp) {
5948 bp_release = 1;
5949 error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
5950 XFS_BMAP_BTREE_REF,
5951 &xfs_bmbt_buf_ops);
5952 if (error)
5953 goto error_norelse;
5954 }
5955 block = XFS_BUF_TO_BLOCK(bp);
5956 XFS_WANT_CORRUPTED_GOTO(
5957 xfs_bmap_sanity_check(mp, bp, level),
5958 error0);
5959 if (level == 0)
5960 break;
5961
5962 /*
5963 * Check this block for basic sanity (increasing keys and
5964 * no duplicate blocks).
5965 */
5966
5967 xfs_check_block(block, mp, 0, 0);
5968 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
5969 bno = be64_to_cpu(*pp);
5970 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
5971 if (bp_release) {
5972 bp_release = 0;
5973 xfs_trans_brelse(NULL, bp);
5974 }
5975 }
5976
5977 /*
5978 * Here with bp and block set to the leftmost leaf node in the tree.
5979 */
5980 i = 0;
5981
5982 /*
5983 * Loop over all leaf nodes checking that all extents are in the right order.
5984 */
5985 for (;;) {
5986 xfs_fsblock_t nextbno;
5987 xfs_extnum_t num_recs;
5988
5989
5990 num_recs = xfs_btree_get_numrecs(block);
5991
5992 /*
5993 * Read-ahead the next leaf block, if any.
5994 */
5995
5996 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
5997
5998 /*
5999 * Check all the extents to make sure they are OK.
6000 * If we had a previous block, the last entry should
6001 * conform with the first entry in this one.
6002 */
6003
6004 ep = XFS_BMBT_REC_ADDR(mp, block, 1);
6005 if (i) {
6006 ASSERT(xfs_bmbt_disk_get_startoff(&last) +
6007 xfs_bmbt_disk_get_blockcount(&last) <=
6008 xfs_bmbt_disk_get_startoff(ep));
6009 }
6010 for (j = 1; j < num_recs; j++) {
6011 nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
6012 ASSERT(xfs_bmbt_disk_get_startoff(ep) +
6013 xfs_bmbt_disk_get_blockcount(ep) <=
6014 xfs_bmbt_disk_get_startoff(nextp));
6015 ep = nextp;
6016 }
6017
6018 last = *ep;
6019 i += num_recs;
6020 if (bp_release) {
6021 bp_release = 0;
6022 xfs_trans_brelse(NULL, bp);
6023 }
6024 bno = nextbno;
6025 /*
6026 * If we've reached the end, stop.
6027 */
6028 if (bno == NULLFSBLOCK)
6029 break;
6030
6031 bp_release = 0;
6032 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
6033 if (!bp) {
6034 bp_release = 1;
6035 error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
6036 XFS_BMAP_BTREE_REF,
6037 &xfs_bmbt_buf_ops);
6038 if (error)
6039 goto error_norelse;
6040 }
6041 block = XFS_BUF_TO_BLOCK(bp);
6042 }
6043 if (bp_release) {
6044 bp_release = 0;
6045 xfs_trans_brelse(NULL, bp);
6046 }
6047 return;
6048
6049error0:
6050 xfs_warn(mp, "%s: at error0", __func__);
6051 if (bp_release)
6052 xfs_trans_brelse(NULL, bp);
6053error_norelse:
6054 xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
6055 __func__, i);
6056 panic("%s: CORRUPTED BTREE OR SOMETHING", __func__);
6057 return;
6058}
6059#endif
6060
6061/*
6062 * Count fsblocks of the given fork.
6063 */
6064int /* error */
6065xfs_bmap_count_blocks(
6066 xfs_trans_t *tp, /* transaction pointer */
6067 xfs_inode_t *ip, /* incore inode */
6068 int whichfork, /* data or attr fork */
6069 int *count) /* out: count of blocks */
6070{
6071 struct xfs_btree_block *block; /* current btree block */
6072 xfs_fsblock_t bno; /* block # of "block" */
6073 xfs_ifork_t *ifp; /* fork structure */
6074 int level; /* btree level, for checking */
6075 xfs_mount_t *mp; /* file system mount structure */
6076 __be64 *pp; /* pointer to block address */
6077
6078 bno = NULLFSBLOCK;
6079 mp = ip->i_mount;
6080 ifp = XFS_IFORK_PTR(ip, whichfork);
6081 if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
6082 xfs_bmap_count_leaves(ifp, 0,
6083 ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t),
6084 count);
6085 return 0;
6086 }
6087
6088 /*
6089 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
6090 */
6091 block = ifp->if_broot;
6092 level = be16_to_cpu(block->bb_level);
6093 ASSERT(level > 0);
6094 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
6095 bno = be64_to_cpu(*pp);
6096 ASSERT(bno != NULLDFSBNO);
6097 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
6098 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
6099
6100 if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) {
6101 XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW,
6102 mp);
6103 return XFS_ERROR(EFSCORRUPTED);
6104 }
6105
6106 return 0;
6107}
6108
6109/*
6110 * Recursively walks each level of a btree
6111 * to count total fsblocks is use.
6112 */
6113STATIC int /* error */
6114xfs_bmap_count_tree(
6115 xfs_mount_t *mp, /* file system mount point */
6116 xfs_trans_t *tp, /* transaction pointer */
6117 xfs_ifork_t *ifp, /* inode fork pointer */
6118 xfs_fsblock_t blockno, /* file system block number */
6119 int levelin, /* level in btree */
6120 int *count) /* Count of blocks */
6121{
6122 int error;
6123 xfs_buf_t *bp, *nbp;
6124 int level = levelin;
6125 __be64 *pp;
6126 xfs_fsblock_t bno = blockno;
6127 xfs_fsblock_t nextbno;
6128 struct xfs_btree_block *block, *nextblock;
6129 int numrecs;
6130
6131 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF,
6132 &xfs_bmbt_buf_ops);
6133 if (error)
6134 return error;
6135 *count += 1;
6136 block = XFS_BUF_TO_BLOCK(bp);
6137
6138 if (--level) {
6139 /* Not at node above leaves, count this level of nodes */
6140 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
6141 while (nextbno != NULLFSBLOCK) {
6142 error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp,
6143 XFS_BMAP_BTREE_REF,
6144 &xfs_bmbt_buf_ops);
6145 if (error)
6146 return error;
6147 *count += 1;
6148 nextblock = XFS_BUF_TO_BLOCK(nbp);
6149 nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib);
6150 xfs_trans_brelse(tp, nbp);
6151 }
6152
6153 /* Dive to the next level */
6154 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
6155 bno = be64_to_cpu(*pp);
6156 if (unlikely((error =
6157 xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) {
6158 xfs_trans_brelse(tp, bp);
6159 XFS_ERROR_REPORT("xfs_bmap_count_tree(1)",
6160 XFS_ERRLEVEL_LOW, mp);
6161 return XFS_ERROR(EFSCORRUPTED);
6162 }
6163 xfs_trans_brelse(tp, bp);
6164 } else {
6165 /* count all level 1 nodes and their leaves */
6166 for (;;) {
6167 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
6168 numrecs = be16_to_cpu(block->bb_numrecs);
6169 xfs_bmap_disk_count_leaves(mp, block, numrecs, count);
6170 xfs_trans_brelse(tp, bp);
6171 if (nextbno == NULLFSBLOCK)
6172 break;
6173 bno = nextbno;
6174 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
6175 XFS_BMAP_BTREE_REF,
6176 &xfs_bmbt_buf_ops);
6177 if (error)
6178 return error;
6179 *count += 1;
6180 block = XFS_BUF_TO_BLOCK(bp);
6181 }
6182 }
6183 return 0;
6184}
6185
6186/*
6187 * Count leaf blocks given a range of extent records.
6188 */
6189STATIC void
6190xfs_bmap_count_leaves(
6191 xfs_ifork_t *ifp,
6192 xfs_extnum_t idx,
6193 int numrecs,
6194 int *count)
6195{
6196 int b;
6197
6198 for (b = 0; b < numrecs; b++) {
6199 xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b);
6200 *count += xfs_bmbt_get_blockcount(frp);
6201 }
6202}
6203
6204/*
6205 * Count leaf blocks given a range of extent records originally
6206 * in btree format.
6207 */
6208STATIC void
6209xfs_bmap_disk_count_leaves(
6210 struct xfs_mount *mp,
6211 struct xfs_btree_block *block,
6212 int numrecs,
6213 int *count)
6214{
6215 int b;
6216 xfs_bmbt_rec_t *frp;
6217
6218 for (b = 1; b <= numrecs; b++) {
6219 frp = XFS_BMBT_REC_ADDR(mp, block, b);
6220 *count += xfs_bmbt_disk_get_blockcount(frp);
6221 }
6222}
6223
6224/* 6107/*
6225 * dead simple method of punching delalyed allocation blocks from a range in 6108 * dead simple method of punching delalyed allocation blocks from a range in
6226 * the inode. Walks a block at a time so will be slow, but is only executed in 6109 * the inode. Walks a block at a time so will be slow, but is only executed in
@@ -6295,16 +6178,3 @@ next_block:
6295 6178
6296 return error; 6179 return error;
6297} 6180}
6298
6299/*
6300 * Convert the given file system block to a disk block. We have to treat it
6301 * differently based on whether the file is a real time file or not, because the
6302 * bmap code does.
6303 */
6304xfs_daddr_t
6305xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
6306{
6307 return (XFS_IS_REALTIME_INODE(ip) ? \
6308 (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \
6309 XFS_FSB_TO_DADDR((ip)->i_mount, (fsb)));
6310}
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 061b45cbe614..3a86c3fa6de1 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -37,6 +37,7 @@
37#include "xfs_error.h" 37#include "xfs_error.h"
38#include "xfs_quota.h" 38#include "xfs_quota.h"
39#include "xfs_trace.h" 39#include "xfs_trace.h"
40#include "xfs_cksum.h"
40 41
41/* 42/*
42 * Determine the extent state. 43 * Determine the extent state.
@@ -59,24 +60,31 @@ xfs_extent_state(
59 */ 60 */
60void 61void
61xfs_bmdr_to_bmbt( 62xfs_bmdr_to_bmbt(
62 struct xfs_mount *mp, 63 struct xfs_inode *ip,
63 xfs_bmdr_block_t *dblock, 64 xfs_bmdr_block_t *dblock,
64 int dblocklen, 65 int dblocklen,
65 struct xfs_btree_block *rblock, 66 struct xfs_btree_block *rblock,
66 int rblocklen) 67 int rblocklen)
67{ 68{
69 struct xfs_mount *mp = ip->i_mount;
68 int dmxr; 70 int dmxr;
69 xfs_bmbt_key_t *fkp; 71 xfs_bmbt_key_t *fkp;
70 __be64 *fpp; 72 __be64 *fpp;
71 xfs_bmbt_key_t *tkp; 73 xfs_bmbt_key_t *tkp;
72 __be64 *tpp; 74 __be64 *tpp;
73 75
74 rblock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); 76 if (xfs_sb_version_hascrc(&mp->m_sb))
77 xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL,
78 XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino,
79 XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
80 else
81 xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL,
82 XFS_BMAP_MAGIC, 0, 0, ip->i_ino,
83 XFS_BTREE_LONG_PTRS);
84
75 rblock->bb_level = dblock->bb_level; 85 rblock->bb_level = dblock->bb_level;
76 ASSERT(be16_to_cpu(rblock->bb_level) > 0); 86 ASSERT(be16_to_cpu(rblock->bb_level) > 0);
77 rblock->bb_numrecs = dblock->bb_numrecs; 87 rblock->bb_numrecs = dblock->bb_numrecs;
78 rblock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
79 rblock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
80 dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0); 88 dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);
81 fkp = XFS_BMDR_KEY_ADDR(dblock, 1); 89 fkp = XFS_BMDR_KEY_ADDR(dblock, 1);
82 tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1); 90 tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
@@ -424,7 +432,13 @@ xfs_bmbt_to_bmdr(
424 xfs_bmbt_key_t *tkp; 432 xfs_bmbt_key_t *tkp;
425 __be64 *tpp; 433 __be64 *tpp;
426 434
427 ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC)); 435 if (xfs_sb_version_hascrc(&mp->m_sb)) {
436 ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_CRC_MAGIC));
437 ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid));
438 ASSERT(rblock->bb_u.l.bb_blkno ==
439 cpu_to_be64(XFS_BUF_DADDR_NULL));
440 } else
441 ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC));
428 ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO)); 442 ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO));
429 ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO)); 443 ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO));
430 ASSERT(rblock->bb_level != 0); 444 ASSERT(rblock->bb_level != 0);
@@ -708,59 +722,89 @@ xfs_bmbt_key_diff(
708 cur->bc_rec.b.br_startoff; 722 cur->bc_rec.b.br_startoff;
709} 723}
710 724
711static void 725static int
712xfs_bmbt_verify( 726xfs_bmbt_verify(
713 struct xfs_buf *bp) 727 struct xfs_buf *bp)
714{ 728{
715 struct xfs_mount *mp = bp->b_target->bt_mount; 729 struct xfs_mount *mp = bp->b_target->bt_mount;
716 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 730 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
717 unsigned int level; 731 unsigned int level;
718 int lblock_ok; /* block passes checks */
719 732
720 /* magic number and level verification. 733 switch (block->bb_magic) {
734 case cpu_to_be32(XFS_BMAP_CRC_MAGIC):
735 if (!xfs_sb_version_hascrc(&mp->m_sb))
736 return false;
737 if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid))
738 return false;
739 if (be64_to_cpu(block->bb_u.l.bb_blkno) != bp->b_bn)
740 return false;
741 /*
742 * XXX: need a better way of verifying the owner here. Right now
743 * just make sure there has been one set.
744 */
745 if (be64_to_cpu(block->bb_u.l.bb_owner) == 0)
746 return false;
747 /* fall through */
748 case cpu_to_be32(XFS_BMAP_MAGIC):
749 break;
750 default:
751 return false;
752 }
753
754 /*
755 * numrecs and level verification.
721 * 756 *
722 * We don't know waht fork we belong to, so just verify that the level 757 * We don't know what fork we belong to, so just verify that the level
723 * is less than the maximum of the two. Later checks will be more 758 * is less than the maximum of the two. Later checks will be more
724 * precise. 759 * precise.
725 */ 760 */
726 level = be16_to_cpu(block->bb_level); 761 level = be16_to_cpu(block->bb_level);
727 lblock_ok = block->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC) && 762 if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1]))
728 level < max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1]); 763 return false;
729 764 if (be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
730 /* numrecs verification */ 765 return false;
731 lblock_ok = lblock_ok &&
732 be16_to_cpu(block->bb_numrecs) <= mp->m_bmap_dmxr[level != 0];
733 766
734 /* sibling pointer verification */ 767 /* sibling pointer verification */
735 lblock_ok = lblock_ok && 768 if (!block->bb_u.l.bb_leftsib ||
736 block->bb_u.l.bb_leftsib && 769 (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLDFSBNO) &&
737 (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) || 770 !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))))
738 XFS_FSB_SANITY_CHECK(mp, 771 return false;
739 be64_to_cpu(block->bb_u.l.bb_leftsib))) && 772 if (!block->bb_u.l.bb_rightsib ||
740 block->bb_u.l.bb_rightsib && 773 (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLDFSBNO) &&
741 (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) || 774 !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))))
742 XFS_FSB_SANITY_CHECK(mp, 775 return false;
743 be64_to_cpu(block->bb_u.l.bb_rightsib))); 776
744 777 return true;
745 if (!lblock_ok) { 778
746 trace_xfs_btree_corrupt(bp, _RET_IP_);
747 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block);
748 xfs_buf_ioerror(bp, EFSCORRUPTED);
749 }
750} 779}
751 780
752static void 781static void
753xfs_bmbt_read_verify( 782xfs_bmbt_read_verify(
754 struct xfs_buf *bp) 783 struct xfs_buf *bp)
755{ 784{
756 xfs_bmbt_verify(bp); 785 if (!(xfs_btree_lblock_verify_crc(bp) &&
786 xfs_bmbt_verify(bp))) {
787 trace_xfs_btree_corrupt(bp, _RET_IP_);
788 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
789 bp->b_target->bt_mount, bp->b_addr);
790 xfs_buf_ioerror(bp, EFSCORRUPTED);
791 }
792
757} 793}
758 794
759static void 795static void
760xfs_bmbt_write_verify( 796xfs_bmbt_write_verify(
761 struct xfs_buf *bp) 797 struct xfs_buf *bp)
762{ 798{
763 xfs_bmbt_verify(bp); 799 if (!xfs_bmbt_verify(bp)) {
800 xfs_warn(bp->b_target->bt_mount, "bmbt daddr 0x%llx failed", bp->b_bn);
801 trace_xfs_btree_corrupt(bp, _RET_IP_);
802 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
803 bp->b_target->bt_mount, bp->b_addr);
804 xfs_buf_ioerror(bp, EFSCORRUPTED);
805 return;
806 }
807 xfs_btree_lblock_calc_crc(bp);
764} 808}
765 809
766const struct xfs_buf_ops xfs_bmbt_buf_ops = { 810const struct xfs_buf_ops xfs_bmbt_buf_ops = {
@@ -838,6 +882,8 @@ xfs_bmbt_init_cursor(
838 882
839 cur->bc_ops = &xfs_bmbt_ops; 883 cur->bc_ops = &xfs_bmbt_ops;
840 cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE; 884 cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE;
885 if (xfs_sb_version_hascrc(&mp->m_sb))
886 cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
841 887
842 cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork); 888 cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
843 cur->bc_private.b.ip = ip; 889 cur->bc_private.b.ip = ip;
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 88469ca08696..70c43d9f72c1 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -18,7 +18,8 @@
18#ifndef __XFS_BMAP_BTREE_H__ 18#ifndef __XFS_BMAP_BTREE_H__
19#define __XFS_BMAP_BTREE_H__ 19#define __XFS_BMAP_BTREE_H__
20 20
21#define XFS_BMAP_MAGIC 0x424d4150 /* 'BMAP' */ 21#define XFS_BMAP_MAGIC 0x424d4150 /* 'BMAP' */
22#define XFS_BMAP_CRC_MAGIC 0x424d4133 /* 'BMA3' */
22 23
23struct xfs_btree_cur; 24struct xfs_btree_cur;
24struct xfs_btree_block; 25struct xfs_btree_block;
@@ -136,10 +137,10 @@ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;
136 137
137/* 138/*
138 * Btree block header size depends on a superblock flag. 139 * Btree block header size depends on a superblock flag.
139 *
140 * (not quite yet, but soon)
141 */ 140 */
142#define XFS_BMBT_BLOCK_LEN(mp) XFS_BTREE_LBLOCK_LEN 141#define XFS_BMBT_BLOCK_LEN(mp) \
142 (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
143 XFS_BTREE_LBLOCK_CRC_LEN : XFS_BTREE_LBLOCK_LEN)
143 144
144#define XFS_BMBT_REC_ADDR(mp, block, index) \ 145#define XFS_BMBT_REC_ADDR(mp, block, index) \
145 ((xfs_bmbt_rec_t *) \ 146 ((xfs_bmbt_rec_t *) \
@@ -186,12 +187,12 @@ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;
186#define XFS_BMAP_BROOT_PTR_ADDR(mp, bb, i, sz) \ 187#define XFS_BMAP_BROOT_PTR_ADDR(mp, bb, i, sz) \
187 XFS_BMBT_PTR_ADDR(mp, bb, i, xfs_bmbt_maxrecs(mp, sz, 0)) 188 XFS_BMBT_PTR_ADDR(mp, bb, i, xfs_bmbt_maxrecs(mp, sz, 0))
188 189
189#define XFS_BMAP_BROOT_SPACE_CALC(nrecs) \ 190#define XFS_BMAP_BROOT_SPACE_CALC(mp, nrecs) \
190 (int)(XFS_BTREE_LBLOCK_LEN + \ 191 (int)(XFS_BMBT_BLOCK_LEN(mp) + \
191 ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))) 192 ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t))))
192 193
193#define XFS_BMAP_BROOT_SPACE(bb) \ 194#define XFS_BMAP_BROOT_SPACE(mp, bb) \
194 (XFS_BMAP_BROOT_SPACE_CALC(be16_to_cpu((bb)->bb_numrecs))) 195 (XFS_BMAP_BROOT_SPACE_CALC(mp, be16_to_cpu((bb)->bb_numrecs)))
195#define XFS_BMDR_SPACE_CALC(nrecs) \ 196#define XFS_BMDR_SPACE_CALC(nrecs) \
196 (int)(sizeof(xfs_bmdr_block_t) + \ 197 (int)(sizeof(xfs_bmdr_block_t) + \
197 ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))) 198 ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t))))
@@ -204,7 +205,7 @@ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;
204/* 205/*
205 * Prototypes for xfs_bmap.c to call. 206 * Prototypes for xfs_bmap.c to call.
206 */ 207 */
207extern void xfs_bmdr_to_bmbt(struct xfs_mount *, xfs_bmdr_block_t *, int, 208extern void xfs_bmdr_to_bmbt(struct xfs_inode *, xfs_bmdr_block_t *, int,
208 struct xfs_btree_block *, int); 209 struct xfs_btree_block *, int);
209extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); 210extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s);
210extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r); 211extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r);
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index db010408d701..8804b8a3c310 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -30,9 +30,11 @@
30#include "xfs_dinode.h" 30#include "xfs_dinode.h"
31#include "xfs_inode.h" 31#include "xfs_inode.h"
32#include "xfs_inode_item.h" 32#include "xfs_inode_item.h"
33#include "xfs_buf_item.h"
33#include "xfs_btree.h" 34#include "xfs_btree.h"
34#include "xfs_error.h" 35#include "xfs_error.h"
35#include "xfs_trace.h" 36#include "xfs_trace.h"
37#include "xfs_cksum.h"
36 38
37/* 39/*
38 * Cursor allocation zone. 40 * Cursor allocation zone.
@@ -42,9 +44,13 @@ kmem_zone_t *xfs_btree_cur_zone;
42/* 44/*
43 * Btree magic numbers. 45 * Btree magic numbers.
44 */ 46 */
45const __uint32_t xfs_magics[XFS_BTNUM_MAX] = { 47static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
46 XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC 48 { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC },
49 { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC,
50 XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC }
47}; 51};
52#define xfs_btree_magic(cur) \
53 xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
48 54
49 55
50STATIC int /* error (0 or EFSCORRUPTED) */ 56STATIC int /* error (0 or EFSCORRUPTED) */
@@ -54,30 +60,38 @@ xfs_btree_check_lblock(
54 int level, /* level of the btree block */ 60 int level, /* level of the btree block */
55 struct xfs_buf *bp) /* buffer for block, if any */ 61 struct xfs_buf *bp) /* buffer for block, if any */
56{ 62{
57 int lblock_ok; /* block passes checks */ 63 int lblock_ok = 1; /* block passes checks */
58 struct xfs_mount *mp; /* file system mount point */ 64 struct xfs_mount *mp; /* file system mount point */
59 65
60 mp = cur->bc_mp; 66 mp = cur->bc_mp;
61 lblock_ok = 67
62 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] && 68 if (xfs_sb_version_hascrc(&mp->m_sb)) {
69 lblock_ok = lblock_ok &&
70 uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid) &&
71 block->bb_u.l.bb_blkno == cpu_to_be64(
72 bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
73 }
74
75 lblock_ok = lblock_ok &&
76 be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
63 be16_to_cpu(block->bb_level) == level && 77 be16_to_cpu(block->bb_level) == level &&
64 be16_to_cpu(block->bb_numrecs) <= 78 be16_to_cpu(block->bb_numrecs) <=
65 cur->bc_ops->get_maxrecs(cur, level) && 79 cur->bc_ops->get_maxrecs(cur, level) &&
66 block->bb_u.l.bb_leftsib && 80 block->bb_u.l.bb_leftsib &&
67 (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) || 81 (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
68 XFS_FSB_SANITY_CHECK(mp, 82 XFS_FSB_SANITY_CHECK(mp,
69 be64_to_cpu(block->bb_u.l.bb_leftsib))) && 83 be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
70 block->bb_u.l.bb_rightsib && 84 block->bb_u.l.bb_rightsib &&
71 (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) || 85 (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
72 XFS_FSB_SANITY_CHECK(mp, 86 XFS_FSB_SANITY_CHECK(mp,
73 be64_to_cpu(block->bb_u.l.bb_rightsib))); 87 be64_to_cpu(block->bb_u.l.bb_rightsib)));
88
74 if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, 89 if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
75 XFS_ERRTAG_BTREE_CHECK_LBLOCK, 90 XFS_ERRTAG_BTREE_CHECK_LBLOCK,
76 XFS_RANDOM_BTREE_CHECK_LBLOCK))) { 91 XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
77 if (bp) 92 if (bp)
78 trace_xfs_btree_corrupt(bp, _RET_IP_); 93 trace_xfs_btree_corrupt(bp, _RET_IP_);
79 XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW, 94 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
80 mp);
81 return XFS_ERROR(EFSCORRUPTED); 95 return XFS_ERROR(EFSCORRUPTED);
82 } 96 }
83 return 0; 97 return 0;
@@ -90,16 +104,26 @@ xfs_btree_check_sblock(
90 int level, /* level of the btree block */ 104 int level, /* level of the btree block */
91 struct xfs_buf *bp) /* buffer containing block */ 105 struct xfs_buf *bp) /* buffer containing block */
92{ 106{
107 struct xfs_mount *mp; /* file system mount point */
93 struct xfs_buf *agbp; /* buffer for ag. freespace struct */ 108 struct xfs_buf *agbp; /* buffer for ag. freespace struct */
94 struct xfs_agf *agf; /* ag. freespace structure */ 109 struct xfs_agf *agf; /* ag. freespace structure */
95 xfs_agblock_t agflen; /* native ag. freespace length */ 110 xfs_agblock_t agflen; /* native ag. freespace length */
96 int sblock_ok; /* block passes checks */ 111 int sblock_ok = 1; /* block passes checks */
97 112
113 mp = cur->bc_mp;
98 agbp = cur->bc_private.a.agbp; 114 agbp = cur->bc_private.a.agbp;
99 agf = XFS_BUF_TO_AGF(agbp); 115 agf = XFS_BUF_TO_AGF(agbp);
100 agflen = be32_to_cpu(agf->agf_length); 116 agflen = be32_to_cpu(agf->agf_length);
101 sblock_ok = 117
102 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] && 118 if (xfs_sb_version_hascrc(&mp->m_sb)) {
119 sblock_ok = sblock_ok &&
120 uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid) &&
121 block->bb_u.s.bb_blkno == cpu_to_be64(
122 bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
123 }
124
125 sblock_ok = sblock_ok &&
126 be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
103 be16_to_cpu(block->bb_level) == level && 127 be16_to_cpu(block->bb_level) == level &&
104 be16_to_cpu(block->bb_numrecs) <= 128 be16_to_cpu(block->bb_numrecs) <=
105 cur->bc_ops->get_maxrecs(cur, level) && 129 cur->bc_ops->get_maxrecs(cur, level) &&
@@ -109,13 +133,13 @@ xfs_btree_check_sblock(
109 (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || 133 (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
110 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) && 134 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
111 block->bb_u.s.bb_rightsib; 135 block->bb_u.s.bb_rightsib;
112 if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp, 136
137 if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp,
113 XFS_ERRTAG_BTREE_CHECK_SBLOCK, 138 XFS_ERRTAG_BTREE_CHECK_SBLOCK,
114 XFS_RANDOM_BTREE_CHECK_SBLOCK))) { 139 XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
115 if (bp) 140 if (bp)
116 trace_xfs_btree_corrupt(bp, _RET_IP_); 141 trace_xfs_btree_corrupt(bp, _RET_IP_);
117 XFS_CORRUPTION_ERROR("xfs_btree_check_sblock", 142 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
118 XFS_ERRLEVEL_LOW, cur->bc_mp, block);
119 return XFS_ERROR(EFSCORRUPTED); 143 return XFS_ERROR(EFSCORRUPTED);
120 } 144 }
121 return 0; 145 return 0;
@@ -194,6 +218,72 @@ xfs_btree_check_ptr(
194#endif 218#endif
195 219
196/* 220/*
221 * Calculate CRC on the whole btree block and stuff it into the
222 * long-form btree header.
223 *
224 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
225 * it into the buffer so recovery knows what the last modifcation was that made
226 * it to disk.
227 */
228void
229xfs_btree_lblock_calc_crc(
230 struct xfs_buf *bp)
231{
232 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
233 struct xfs_buf_log_item *bip = bp->b_fspriv;
234
235 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
236 return;
237 if (bip)
238 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
239 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
240 XFS_BTREE_LBLOCK_CRC_OFF);
241}
242
243bool
244xfs_btree_lblock_verify_crc(
245 struct xfs_buf *bp)
246{
247 if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
248 return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
249 XFS_BTREE_LBLOCK_CRC_OFF);
250 return true;
251}
252
253/*
254 * Calculate CRC on the whole btree block and stuff it into the
255 * short-form btree header.
256 *
257 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
258 * it into the buffer so recovery knows what the last modifcation was that made
259 * it to disk.
260 */
261void
262xfs_btree_sblock_calc_crc(
263 struct xfs_buf *bp)
264{
265 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
266 struct xfs_buf_log_item *bip = bp->b_fspriv;
267
268 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
269 return;
270 if (bip)
271 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
272 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
273 XFS_BTREE_SBLOCK_CRC_OFF);
274}
275
276bool
277xfs_btree_sblock_verify_crc(
278 struct xfs_buf *bp)
279{
280 if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
281 return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
282 XFS_BTREE_SBLOCK_CRC_OFF);
283 return true;
284}
285
286/*
197 * Delete the btree cursor. 287 * Delete the btree cursor.
198 */ 288 */
199void 289void
@@ -277,10 +367,8 @@ xfs_btree_dup_cursor(
277 *ncur = NULL; 367 *ncur = NULL;
278 return error; 368 return error;
279 } 369 }
280 new->bc_bufs[i] = bp; 370 }
281 ASSERT(!xfs_buf_geterror(bp)); 371 new->bc_bufs[i] = bp;
282 } else
283 new->bc_bufs[i] = NULL;
284 } 372 }
285 *ncur = new; 373 *ncur = new;
286 return 0; 374 return 0;
@@ -321,9 +409,14 @@ xfs_btree_dup_cursor(
321 */ 409 */
322static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur) 410static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
323{ 411{
324 return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? 412 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
325 XFS_BTREE_LBLOCK_LEN : 413 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
326 XFS_BTREE_SBLOCK_LEN; 414 return XFS_BTREE_LBLOCK_CRC_LEN;
415 return XFS_BTREE_LBLOCK_LEN;
416 }
417 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
418 return XFS_BTREE_SBLOCK_CRC_LEN;
419 return XFS_BTREE_SBLOCK_LEN;
327} 420}
328 421
329/* 422/*
@@ -863,43 +956,85 @@ xfs_btree_set_sibling(
863} 956}
864 957
865void 958void
959xfs_btree_init_block_int(
960 struct xfs_mount *mp,
961 struct xfs_btree_block *buf,
962 xfs_daddr_t blkno,
963 __u32 magic,
964 __u16 level,
965 __u16 numrecs,
966 __u64 owner,
967 unsigned int flags)
968{
969 buf->bb_magic = cpu_to_be32(magic);
970 buf->bb_level = cpu_to_be16(level);
971 buf->bb_numrecs = cpu_to_be16(numrecs);
972
973 if (flags & XFS_BTREE_LONG_PTRS) {
974 buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
975 buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
976 if (flags & XFS_BTREE_CRC_BLOCKS) {
977 buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
978 buf->bb_u.l.bb_owner = cpu_to_be64(owner);
979 uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid);
980 buf->bb_u.l.bb_pad = 0;
981 }
982 } else {
983 /* owner is a 32 bit value on short blocks */
984 __u32 __owner = (__u32)owner;
985
986 buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
987 buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
988 if (flags & XFS_BTREE_CRC_BLOCKS) {
989 buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
990 buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
991 uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid);
992 }
993 }
994}
995
996void
866xfs_btree_init_block( 997xfs_btree_init_block(
867 struct xfs_mount *mp, 998 struct xfs_mount *mp,
868 struct xfs_buf *bp, 999 struct xfs_buf *bp,
869 __u32 magic, 1000 __u32 magic,
870 __u16 level, 1001 __u16 level,
871 __u16 numrecs, 1002 __u16 numrecs,
1003 __u64 owner,
872 unsigned int flags) 1004 unsigned int flags)
873{ 1005{
874 struct xfs_btree_block *new = XFS_BUF_TO_BLOCK(bp); 1006 xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
875 1007 magic, level, numrecs, owner, flags);
876 new->bb_magic = cpu_to_be32(magic);
877 new->bb_level = cpu_to_be16(level);
878 new->bb_numrecs = cpu_to_be16(numrecs);
879
880 if (flags & XFS_BTREE_LONG_PTRS) {
881 new->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
882 new->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
883 } else {
884 new->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
885 new->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
886 }
887} 1008}
888 1009
889STATIC void 1010STATIC void
890xfs_btree_init_block_cur( 1011xfs_btree_init_block_cur(
891 struct xfs_btree_cur *cur, 1012 struct xfs_btree_cur *cur,
1013 struct xfs_buf *bp,
892 int level, 1014 int level,
893 int numrecs, 1015 int numrecs)
894 struct xfs_buf *bp)
895{ 1016{
896 xfs_btree_init_block(cur->bc_mp, bp, xfs_magics[cur->bc_btnum], 1017 __u64 owner;
897 level, numrecs, cur->bc_flags); 1018
1019 /*
1020 * we can pull the owner from the cursor right now as the different
1021 * owners align directly with the pointer size of the btree. This may
1022 * change in future, but is safe for current users of the generic btree
1023 * code.
1024 */
1025 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1026 owner = cur->bc_private.b.ip->i_ino;
1027 else
1028 owner = cur->bc_private.a.agno;
1029
1030 xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
1031 xfs_btree_magic(cur), level, numrecs,
1032 owner, cur->bc_flags);
898} 1033}
899 1034
900/* 1035/*
901 * Return true if ptr is the last record in the btree and 1036 * Return true if ptr is the last record in the btree and
902 * we need to track updateѕ to this record. The decision 1037 * we need to track updates to this record. The decision
903 * will be further refined in the update_lastrec method. 1038 * will be further refined in the update_lastrec method.
904 */ 1039 */
905STATIC int 1040STATIC int
@@ -1147,6 +1282,7 @@ xfs_btree_log_keys(
1147 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); 1282 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1148 1283
1149 if (bp) { 1284 if (bp) {
1285 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1150 xfs_trans_log_buf(cur->bc_tp, bp, 1286 xfs_trans_log_buf(cur->bc_tp, bp,
1151 xfs_btree_key_offset(cur, first), 1287 xfs_btree_key_offset(cur, first),
1152 xfs_btree_key_offset(cur, last + 1) - 1); 1288 xfs_btree_key_offset(cur, last + 1) - 1);
@@ -1171,6 +1307,7 @@ xfs_btree_log_recs(
1171 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1307 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1172 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); 1308 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1173 1309
1310 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1174 xfs_trans_log_buf(cur->bc_tp, bp, 1311 xfs_trans_log_buf(cur->bc_tp, bp,
1175 xfs_btree_rec_offset(cur, first), 1312 xfs_btree_rec_offset(cur, first),
1176 xfs_btree_rec_offset(cur, last + 1) - 1); 1313 xfs_btree_rec_offset(cur, last + 1) - 1);
@@ -1195,6 +1332,7 @@ xfs_btree_log_ptrs(
1195 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 1332 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
1196 int level = xfs_btree_get_level(block); 1333 int level = xfs_btree_get_level(block);
1197 1334
1335 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1198 xfs_trans_log_buf(cur->bc_tp, bp, 1336 xfs_trans_log_buf(cur->bc_tp, bp,
1199 xfs_btree_ptr_offset(cur, first, level), 1337 xfs_btree_ptr_offset(cur, first, level),
1200 xfs_btree_ptr_offset(cur, last + 1, level) - 1); 1338 xfs_btree_ptr_offset(cur, last + 1, level) - 1);
@@ -1223,7 +1361,12 @@ xfs_btree_log_block(
1223 offsetof(struct xfs_btree_block, bb_numrecs), 1361 offsetof(struct xfs_btree_block, bb_numrecs),
1224 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib), 1362 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
1225 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib), 1363 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
1226 XFS_BTREE_SBLOCK_LEN 1364 offsetof(struct xfs_btree_block, bb_u.s.bb_blkno),
1365 offsetof(struct xfs_btree_block, bb_u.s.bb_lsn),
1366 offsetof(struct xfs_btree_block, bb_u.s.bb_uuid),
1367 offsetof(struct xfs_btree_block, bb_u.s.bb_owner),
1368 offsetof(struct xfs_btree_block, bb_u.s.bb_crc),
1369 XFS_BTREE_SBLOCK_CRC_LEN
1227 }; 1370 };
1228 static const short loffsets[] = { /* table of offsets (long) */ 1371 static const short loffsets[] = { /* table of offsets (long) */
1229 offsetof(struct xfs_btree_block, bb_magic), 1372 offsetof(struct xfs_btree_block, bb_magic),
@@ -1231,17 +1374,40 @@ xfs_btree_log_block(
1231 offsetof(struct xfs_btree_block, bb_numrecs), 1374 offsetof(struct xfs_btree_block, bb_numrecs),
1232 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib), 1375 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
1233 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib), 1376 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
1234 XFS_BTREE_LBLOCK_LEN 1377 offsetof(struct xfs_btree_block, bb_u.l.bb_blkno),
1378 offsetof(struct xfs_btree_block, bb_u.l.bb_lsn),
1379 offsetof(struct xfs_btree_block, bb_u.l.bb_uuid),
1380 offsetof(struct xfs_btree_block, bb_u.l.bb_owner),
1381 offsetof(struct xfs_btree_block, bb_u.l.bb_crc),
1382 offsetof(struct xfs_btree_block, bb_u.l.bb_pad),
1383 XFS_BTREE_LBLOCK_CRC_LEN
1235 }; 1384 };
1236 1385
1237 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1386 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1238 XFS_BTREE_TRACE_ARGBI(cur, bp, fields); 1387 XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
1239 1388
1240 if (bp) { 1389 if (bp) {
1390 int nbits;
1391
1392 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
1393 /*
1394 * We don't log the CRC when updating a btree
1395 * block but instead recreate it during log
1396 * recovery. As the log buffers have checksums
1397 * of their own this is safe and avoids logging a crc
1398 * update in a lot of places.
1399 */
1400 if (fields == XFS_BB_ALL_BITS)
1401 fields = XFS_BB_ALL_BITS_CRC;
1402 nbits = XFS_BB_NUM_BITS_CRC;
1403 } else {
1404 nbits = XFS_BB_NUM_BITS;
1405 }
1241 xfs_btree_offsets(fields, 1406 xfs_btree_offsets(fields,
1242 (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? 1407 (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1243 loffsets : soffsets, 1408 loffsets : soffsets,
1244 XFS_BB_NUM_BITS, &first, &last); 1409 nbits, &first, &last);
1410 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1245 xfs_trans_log_buf(cur->bc_tp, bp, first, last); 1411 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1246 } else { 1412 } else {
1247 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, 1413 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
@@ -2204,7 +2370,7 @@ xfs_btree_split(
2204 goto error0; 2370 goto error0;
2205 2371
2206 /* Fill in the btree header for the new right block. */ 2372 /* Fill in the btree header for the new right block. */
2207 xfs_btree_init_block_cur(cur, xfs_btree_get_level(left), 0, rbp); 2373 xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);
2208 2374
2209 /* 2375 /*
2210 * Split the entries between the old and the new block evenly. 2376 * Split the entries between the old and the new block evenly.
@@ -2513,7 +2679,7 @@ xfs_btree_new_root(
2513 nptr = 2; 2679 nptr = 2;
2514 } 2680 }
2515 /* Fill in the new block's btree header and log it. */ 2681 /* Fill in the new block's btree header and log it. */
2516 xfs_btree_init_block_cur(cur, cur->bc_nlevels, 2, nbp); 2682 xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
2517 xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS); 2683 xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
2518 ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) && 2684 ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
2519 !xfs_btree_ptr_is_null(cur, &rptr)); 2685 !xfs_btree_ptr_is_null(cur, &rptr));
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index f932897194eb..6e6c915673fe 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -42,11 +42,15 @@ extern kmem_zone_t *xfs_btree_cur_zone;
42 * Generic btree header. 42 * Generic btree header.
43 * 43 *
44 * This is a combination of the actual format used on disk for short and long 44 * This is a combination of the actual format used on disk for short and long
45 * format btrees. The first three fields are shared by both format, but 45 * format btrees. The first three fields are shared by both format, but the
46 * the pointers are different and should be used with care. 46 * pointers are different and should be used with care.
47 * 47 *
48 * To get the size of the actual short or long form headers please use 48 * To get the size of the actual short or long form headers please use the size
49 * the size macros below. Never use sizeof(xfs_btree_block). 49 * macros below. Never use sizeof(xfs_btree_block).
50 *
51 * The blkno, crc, lsn, owner and uuid fields are only available in filesystems
52 * with the crc feature bit, and all accesses to them must be conditional on
53 * that flag.
50 */ 54 */
51struct xfs_btree_block { 55struct xfs_btree_block {
52 __be32 bb_magic; /* magic number for block type */ 56 __be32 bb_magic; /* magic number for block type */
@@ -56,10 +60,23 @@ struct xfs_btree_block {
56 struct { 60 struct {
57 __be32 bb_leftsib; 61 __be32 bb_leftsib;
58 __be32 bb_rightsib; 62 __be32 bb_rightsib;
63
64 __be64 bb_blkno;
65 __be64 bb_lsn;
66 uuid_t bb_uuid;
67 __be32 bb_owner;
68 __le32 bb_crc;
59 } s; /* short form pointers */ 69 } s; /* short form pointers */
60 struct { 70 struct {
61 __be64 bb_leftsib; 71 __be64 bb_leftsib;
62 __be64 bb_rightsib; 72 __be64 bb_rightsib;
73
74 __be64 bb_blkno;
75 __be64 bb_lsn;
76 uuid_t bb_uuid;
77 __be64 bb_owner;
78 __le32 bb_crc;
79 __be32 bb_pad; /* padding for alignment */
63 } l; /* long form pointers */ 80 } l; /* long form pointers */
64 } bb_u; /* rest */ 81 } bb_u; /* rest */
65}; 82};
@@ -67,6 +84,16 @@ struct xfs_btree_block {
67#define XFS_BTREE_SBLOCK_LEN 16 /* size of a short form block */ 84#define XFS_BTREE_SBLOCK_LEN 16 /* size of a short form block */
68#define XFS_BTREE_LBLOCK_LEN 24 /* size of a long form block */ 85#define XFS_BTREE_LBLOCK_LEN 24 /* size of a long form block */
69 86
87/* sizes of CRC enabled btree blocks */
88#define XFS_BTREE_SBLOCK_CRC_LEN (XFS_BTREE_SBLOCK_LEN + 40)
89#define XFS_BTREE_LBLOCK_CRC_LEN (XFS_BTREE_LBLOCK_LEN + 48)
90
91
92#define XFS_BTREE_SBLOCK_CRC_OFF \
93 offsetof(struct xfs_btree_block, bb_u.s.bb_crc)
94#define XFS_BTREE_LBLOCK_CRC_OFF \
95 offsetof(struct xfs_btree_block, bb_u.l.bb_crc)
96
70 97
71/* 98/*
72 * Generic key, ptr and record wrapper structures. 99 * Generic key, ptr and record wrapper structures.
@@ -101,13 +128,11 @@ union xfs_btree_rec {
101#define XFS_BB_NUMRECS 0x04 128#define XFS_BB_NUMRECS 0x04
102#define XFS_BB_LEFTSIB 0x08 129#define XFS_BB_LEFTSIB 0x08
103#define XFS_BB_RIGHTSIB 0x10 130#define XFS_BB_RIGHTSIB 0x10
131#define XFS_BB_BLKNO 0x20
104#define XFS_BB_NUM_BITS 5 132#define XFS_BB_NUM_BITS 5
105#define XFS_BB_ALL_BITS ((1 << XFS_BB_NUM_BITS) - 1) 133#define XFS_BB_ALL_BITS ((1 << XFS_BB_NUM_BITS) - 1)
106 134#define XFS_BB_NUM_BITS_CRC 8
107/* 135#define XFS_BB_ALL_BITS_CRC ((1 << XFS_BB_NUM_BITS_CRC) - 1)
108 * Magic numbers for btree blocks.
109 */
110extern const __uint32_t xfs_magics[];
111 136
112/* 137/*
113 * Generic stats interface 138 * Generic stats interface
@@ -256,6 +281,7 @@ typedef struct xfs_btree_cur
256#define XFS_BTREE_LONG_PTRS (1<<0) /* pointers are 64bits long */ 281#define XFS_BTREE_LONG_PTRS (1<<0) /* pointers are 64bits long */
257#define XFS_BTREE_ROOT_IN_INODE (1<<1) /* root may be variable size */ 282#define XFS_BTREE_ROOT_IN_INODE (1<<1) /* root may be variable size */
258#define XFS_BTREE_LASTREC_UPDATE (1<<2) /* track last rec externally */ 283#define XFS_BTREE_LASTREC_UPDATE (1<<2) /* track last rec externally */
284#define XFS_BTREE_CRC_BLOCKS (1<<3) /* uses extended btree blocks */
259 285
260 286
261#define XFS_BTREE_NOERROR 0 287#define XFS_BTREE_NOERROR 0
@@ -393,8 +419,20 @@ xfs_btree_init_block(
393 __u32 magic, 419 __u32 magic,
394 __u16 level, 420 __u16 level,
395 __u16 numrecs, 421 __u16 numrecs,
422 __u64 owner,
396 unsigned int flags); 423 unsigned int flags);
397 424
425void
426xfs_btree_init_block_int(
427 struct xfs_mount *mp,
428 struct xfs_btree_block *buf,
429 xfs_daddr_t blkno,
430 __u32 magic,
431 __u16 level,
432 __u16 numrecs,
433 __u64 owner,
434 unsigned int flags);
435
398/* 436/*
399 * Common btree core entry points. 437 * Common btree core entry points.
400 */ 438 */
@@ -408,6 +446,14 @@ int xfs_btree_delete(struct xfs_btree_cur *, int *);
408int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *); 446int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *);
409 447
410/* 448/*
449 * btree block CRC helpers
450 */
451void xfs_btree_lblock_calc_crc(struct xfs_buf *);
452bool xfs_btree_lblock_verify_crc(struct xfs_buf *);
453void xfs_btree_sblock_calc_crc(struct xfs_buf *);
454bool xfs_btree_sblock_verify_crc(struct xfs_buf *);
455
456/*
411 * Internal btree helpers also used by xfs_bmap.c. 457 * Internal btree helpers also used by xfs_bmap.c.
412 */ 458 */
413void xfs_btree_log_block(struct xfs_btree_cur *, struct xfs_buf *, int); 459void xfs_btree_log_block(struct xfs_btree_cur *, struct xfs_buf *, int);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 8459b5d8cb71..82b70bda9f47 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1022,7 +1022,9 @@ xfs_buf_iodone_work(
1022 bool read = !!(bp->b_flags & XBF_READ); 1022 bool read = !!(bp->b_flags & XBF_READ);
1023 1023
1024 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); 1024 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
1025 if (read && bp->b_ops) 1025
1026 /* only validate buffers that were read without errors */
1027 if (read && bp->b_ops && !bp->b_error && (bp->b_flags & XBF_DONE))
1026 bp->b_ops->verify_read(bp); 1028 bp->b_ops->verify_read(bp);
1027 1029
1028 if (bp->b_iodone) 1030 if (bp->b_iodone)
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index ee36c88ecfde..2573d2a75fc8 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -24,19 +24,20 @@ extern kmem_zone_t *xfs_buf_item_zone;
24 * This flag indicates that the buffer contains on disk inodes 24 * This flag indicates that the buffer contains on disk inodes
25 * and requires special recovery handling. 25 * and requires special recovery handling.
26 */ 26 */
27#define XFS_BLF_INODE_BUF 0x1 27#define XFS_BLF_INODE_BUF (1<<0)
28/* 28/*
29 * This flag indicates that the buffer should not be replayed 29 * This flag indicates that the buffer should not be replayed
30 * during recovery because its blocks are being freed. 30 * during recovery because its blocks are being freed.
31 */ 31 */
32#define XFS_BLF_CANCEL 0x2 32#define XFS_BLF_CANCEL (1<<1)
33
33/* 34/*
34 * This flag indicates that the buffer contains on disk 35 * This flag indicates that the buffer contains on disk
35 * user or group dquots and may require special recovery handling. 36 * user or group dquots and may require special recovery handling.
36 */ 37 */
37#define XFS_BLF_UDQUOT_BUF 0x4 38#define XFS_BLF_UDQUOT_BUF (1<<2)
38#define XFS_BLF_PDQUOT_BUF 0x8 39#define XFS_BLF_PDQUOT_BUF (1<<3)
39#define XFS_BLF_GDQUOT_BUF 0x10 40#define XFS_BLF_GDQUOT_BUF (1<<4)
40 41
41#define XFS_BLF_CHUNK 128 42#define XFS_BLF_CHUNK 128
42#define XFS_BLF_SHIFT 7 43#define XFS_BLF_SHIFT 7
@@ -61,6 +62,55 @@ typedef struct xfs_buf_log_format {
61} xfs_buf_log_format_t; 62} xfs_buf_log_format_t;
62 63
63/* 64/*
65 * All buffers now need to tell recovery where the magic number
66 * is so that it can verify and calculate the CRCs on the buffer correctly
67 * once the changes have been replayed into the buffer.
68 *
69 * The type value is held in the upper 5 bits of the blf_flags field, which is
70 * an unsigned 16 bit field. Hence we need to shift it 11 bits up and down.
71 */
72#define XFS_BLFT_BITS 5
73#define XFS_BLFT_SHIFT 11
74#define XFS_BLFT_MASK (((1 << XFS_BLFT_BITS) - 1) << XFS_BLFT_SHIFT)
75
76enum xfs_blft {
77 XFS_BLFT_UNKNOWN_BUF = 0,
78 XFS_BLFT_UDQUOT_BUF,
79 XFS_BLFT_PDQUOT_BUF,
80 XFS_BLFT_GDQUOT_BUF,
81 XFS_BLFT_BTREE_BUF,
82 XFS_BLFT_AGF_BUF,
83 XFS_BLFT_AGFL_BUF,
84 XFS_BLFT_AGI_BUF,
85 XFS_BLFT_DINO_BUF,
86 XFS_BLFT_SYMLINK_BUF,
87 XFS_BLFT_DIR_BLOCK_BUF,
88 XFS_BLFT_DIR_DATA_BUF,
89 XFS_BLFT_DIR_FREE_BUF,
90 XFS_BLFT_DIR_LEAF1_BUF,
91 XFS_BLFT_DIR_LEAFN_BUF,
92 XFS_BLFT_DA_NODE_BUF,
93 XFS_BLFT_ATTR_LEAF_BUF,
94 XFS_BLFT_ATTR_RMT_BUF,
95 XFS_BLFT_SB_BUF,
96 XFS_BLFT_MAX_BUF = (1 << XFS_BLFT_BITS),
97};
98
99static inline void
100xfs_blft_to_flags(struct xfs_buf_log_format *blf, enum xfs_blft type)
101{
102 ASSERT(type > XFS_BLFT_UNKNOWN_BUF && type < XFS_BLFT_MAX_BUF);
103 blf->blf_flags &= ~XFS_BLFT_MASK;
104 blf->blf_flags |= ((type << XFS_BLFT_SHIFT) & XFS_BLFT_MASK);
105}
106
107static inline __uint16_t
108xfs_blft_from_flags(struct xfs_buf_log_format *blf)
109{
110 return (blf->blf_flags & XFS_BLFT_MASK) >> XFS_BLFT_SHIFT;
111}
112
113/*
64 * buf log item flags 114 * buf log item flags
65 */ 115 */
66#define XFS_BLI_HOLD 0x01 116#define XFS_BLI_HOLD 0x01
@@ -113,6 +163,10 @@ void xfs_buf_attach_iodone(struct xfs_buf *,
113void xfs_buf_iodone_callbacks(struct xfs_buf *); 163void xfs_buf_iodone_callbacks(struct xfs_buf *);
114void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); 164void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
115 165
166void xfs_trans_buf_set_type(struct xfs_trans *, struct xfs_buf *,
167 enum xfs_blft);
168void xfs_trans_buf_copy_type(struct xfs_buf *dst_bp, struct xfs_buf *src_bp);
169
116#endif /* __KERNEL__ */ 170#endif /* __KERNEL__ */
117 171
118#endif /* __XFS_BUF_ITEM_H__ */ 172#endif /* __XFS_BUF_ITEM_H__ */
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 4d7696a02418..9b26a99ebfe9 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * Copyright (c) 2013 Red Hat, Inc.
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -38,6 +39,8 @@
38#include "xfs_attr_leaf.h" 39#include "xfs_attr_leaf.h"
39#include "xfs_error.h" 40#include "xfs_error.h"
40#include "xfs_trace.h" 41#include "xfs_trace.h"
42#include "xfs_cksum.h"
43#include "xfs_buf_item.h"
41 44
42/* 45/*
43 * xfs_da_btree.c 46 * xfs_da_btree.c
@@ -52,69 +55,195 @@
52/* 55/*
53 * Routines used for growing the Btree. 56 * Routines used for growing the Btree.
54 */ 57 */
55STATIC int xfs_da_root_split(xfs_da_state_t *state, 58STATIC int xfs_da3_root_split(xfs_da_state_t *state,
56 xfs_da_state_blk_t *existing_root, 59 xfs_da_state_blk_t *existing_root,
57 xfs_da_state_blk_t *new_child); 60 xfs_da_state_blk_t *new_child);
58STATIC int xfs_da_node_split(xfs_da_state_t *state, 61STATIC int xfs_da3_node_split(xfs_da_state_t *state,
59 xfs_da_state_blk_t *existing_blk, 62 xfs_da_state_blk_t *existing_blk,
60 xfs_da_state_blk_t *split_blk, 63 xfs_da_state_blk_t *split_blk,
61 xfs_da_state_blk_t *blk_to_add, 64 xfs_da_state_blk_t *blk_to_add,
62 int treelevel, 65 int treelevel,
63 int *result); 66 int *result);
64STATIC void xfs_da_node_rebalance(xfs_da_state_t *state, 67STATIC void xfs_da3_node_rebalance(xfs_da_state_t *state,
65 xfs_da_state_blk_t *node_blk_1, 68 xfs_da_state_blk_t *node_blk_1,
66 xfs_da_state_blk_t *node_blk_2); 69 xfs_da_state_blk_t *node_blk_2);
67STATIC void xfs_da_node_add(xfs_da_state_t *state, 70STATIC void xfs_da3_node_add(xfs_da_state_t *state,
68 xfs_da_state_blk_t *old_node_blk, 71 xfs_da_state_blk_t *old_node_blk,
69 xfs_da_state_blk_t *new_node_blk); 72 xfs_da_state_blk_t *new_node_blk);
70 73
71/* 74/*
72 * Routines used for shrinking the Btree. 75 * Routines used for shrinking the Btree.
73 */ 76 */
74STATIC int xfs_da_root_join(xfs_da_state_t *state, 77STATIC int xfs_da3_root_join(xfs_da_state_t *state,
75 xfs_da_state_blk_t *root_blk); 78 xfs_da_state_blk_t *root_blk);
76STATIC int xfs_da_node_toosmall(xfs_da_state_t *state, int *retval); 79STATIC int xfs_da3_node_toosmall(xfs_da_state_t *state, int *retval);
77STATIC void xfs_da_node_remove(xfs_da_state_t *state, 80STATIC void xfs_da3_node_remove(xfs_da_state_t *state,
78 xfs_da_state_blk_t *drop_blk); 81 xfs_da_state_blk_t *drop_blk);
79STATIC void xfs_da_node_unbalance(xfs_da_state_t *state, 82STATIC void xfs_da3_node_unbalance(xfs_da_state_t *state,
80 xfs_da_state_blk_t *src_node_blk, 83 xfs_da_state_blk_t *src_node_blk,
81 xfs_da_state_blk_t *dst_node_blk); 84 xfs_da_state_blk_t *dst_node_blk);
82 85
83/* 86/*
84 * Utility routines. 87 * Utility routines.
85 */ 88 */
86STATIC uint xfs_da_node_lasthash(struct xfs_buf *bp, int *count); 89STATIC int xfs_da3_blk_unlink(xfs_da_state_t *state,
87STATIC int xfs_da_node_order(struct xfs_buf *node1_bp,
88 struct xfs_buf *node2_bp);
89STATIC int xfs_da_blk_unlink(xfs_da_state_t *state,
90 xfs_da_state_blk_t *drop_blk, 90 xfs_da_state_blk_t *drop_blk,
91 xfs_da_state_blk_t *save_blk); 91 xfs_da_state_blk_t *save_blk);
92STATIC void xfs_da_state_kill_altpath(xfs_da_state_t *state);
93 92
94static void 93
95xfs_da_node_verify( 94kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */
95
96/*
97 * Allocate a dir-state structure.
98 * We don't put them on the stack since they're large.
99 */
100xfs_da_state_t *
101xfs_da_state_alloc(void)
102{
103 return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS);
104}
105
106/*
107 * Kill the altpath contents of a da-state structure.
108 */
109STATIC void
110xfs_da_state_kill_altpath(xfs_da_state_t *state)
111{
112 int i;
113
114 for (i = 0; i < state->altpath.active; i++)
115 state->altpath.blk[i].bp = NULL;
116 state->altpath.active = 0;
117}
118
119/*
120 * Free a da-state structure.
121 */
122void
123xfs_da_state_free(xfs_da_state_t *state)
124{
125 xfs_da_state_kill_altpath(state);
126#ifdef DEBUG
127 memset((char *)state, 0, sizeof(*state));
128#endif /* DEBUG */
129 kmem_zone_free(xfs_da_state_zone, state);
130}
131
132void
133xfs_da3_node_hdr_from_disk(
134 struct xfs_da3_icnode_hdr *to,
135 struct xfs_da_intnode *from)
136{
137 ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
138 from->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC));
139
140 if (from->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
141 struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)from;
142
143 to->forw = be32_to_cpu(hdr3->info.hdr.forw);
144 to->back = be32_to_cpu(hdr3->info.hdr.back);
145 to->magic = be16_to_cpu(hdr3->info.hdr.magic);
146 to->count = be16_to_cpu(hdr3->__count);
147 to->level = be16_to_cpu(hdr3->__level);
148 return;
149 }
150 to->forw = be32_to_cpu(from->hdr.info.forw);
151 to->back = be32_to_cpu(from->hdr.info.back);
152 to->magic = be16_to_cpu(from->hdr.info.magic);
153 to->count = be16_to_cpu(from->hdr.__count);
154 to->level = be16_to_cpu(from->hdr.__level);
155}
156
157void
158xfs_da3_node_hdr_to_disk(
159 struct xfs_da_intnode *to,
160 struct xfs_da3_icnode_hdr *from)
161{
162 ASSERT(from->magic == XFS_DA_NODE_MAGIC ||
163 from->magic == XFS_DA3_NODE_MAGIC);
164
165 if (from->magic == XFS_DA3_NODE_MAGIC) {
166 struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)to;
167
168 hdr3->info.hdr.forw = cpu_to_be32(from->forw);
169 hdr3->info.hdr.back = cpu_to_be32(from->back);
170 hdr3->info.hdr.magic = cpu_to_be16(from->magic);
171 hdr3->__count = cpu_to_be16(from->count);
172 hdr3->__level = cpu_to_be16(from->level);
173 return;
174 }
175 to->hdr.info.forw = cpu_to_be32(from->forw);
176 to->hdr.info.back = cpu_to_be32(from->back);
177 to->hdr.info.magic = cpu_to_be16(from->magic);
178 to->hdr.__count = cpu_to_be16(from->count);
179 to->hdr.__level = cpu_to_be16(from->level);
180}
181
182static bool
183xfs_da3_node_verify(
96 struct xfs_buf *bp) 184 struct xfs_buf *bp)
97{ 185{
98 struct xfs_mount *mp = bp->b_target->bt_mount; 186 struct xfs_mount *mp = bp->b_target->bt_mount;
99 struct xfs_da_node_hdr *hdr = bp->b_addr; 187 struct xfs_da_intnode *hdr = bp->b_addr;
100 int block_ok = 0; 188 struct xfs_da3_icnode_hdr ichdr;
101 189
102 block_ok = hdr->info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC); 190 xfs_da3_node_hdr_from_disk(&ichdr, hdr);
103 block_ok = block_ok && 191
104 be16_to_cpu(hdr->level) > 0 && 192 if (xfs_sb_version_hascrc(&mp->m_sb)) {
105 be16_to_cpu(hdr->count) > 0 ; 193 struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
106 if (!block_ok) { 194
107 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); 195 if (ichdr.magic != XFS_DA3_NODE_MAGIC)
108 xfs_buf_ioerror(bp, EFSCORRUPTED); 196 return false;
197
198 if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_uuid))
199 return false;
200 if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
201 return false;
202 } else {
203 if (ichdr.magic != XFS_DA_NODE_MAGIC)
204 return false;
109 } 205 }
206 if (ichdr.level == 0)
207 return false;
208 if (ichdr.level > XFS_DA_NODE_MAXDEPTH)
209 return false;
210 if (ichdr.count == 0)
211 return false;
110 212
213 /*
214 * we don't know if the node is for and attribute or directory tree,
215 * so only fail if the count is outside both bounds
216 */
217 if (ichdr.count > mp->m_dir_node_ents &&
218 ichdr.count > mp->m_attr_node_ents)
219 return false;
220
221 /* XXX: hash order check? */
222
223 return true;
111} 224}
112 225
113static void 226static void
114xfs_da_node_write_verify( 227xfs_da3_node_write_verify(
115 struct xfs_buf *bp) 228 struct xfs_buf *bp)
116{ 229{
117 xfs_da_node_verify(bp); 230 struct xfs_mount *mp = bp->b_target->bt_mount;
231 struct xfs_buf_log_item *bip = bp->b_fspriv;
232 struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
233
234 if (!xfs_da3_node_verify(bp)) {
235 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
236 xfs_buf_ioerror(bp, EFSCORRUPTED);
237 return;
238 }
239
240 if (!xfs_sb_version_hascrc(&mp->m_sb))
241 return;
242
243 if (bip)
244 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
245
246 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DA3_NODE_CRC_OFF);
118} 247}
119 248
120/* 249/*
@@ -124,40 +253,47 @@ xfs_da_node_write_verify(
124 * format of the block being read. 253 * format of the block being read.
125 */ 254 */
126static void 255static void
127xfs_da_node_read_verify( 256xfs_da3_node_read_verify(
128 struct xfs_buf *bp) 257 struct xfs_buf *bp)
129{ 258{
130 struct xfs_mount *mp = bp->b_target->bt_mount; 259 struct xfs_mount *mp = bp->b_target->bt_mount;
131 struct xfs_da_blkinfo *info = bp->b_addr; 260 struct xfs_da_blkinfo *info = bp->b_addr;
132 261
133 switch (be16_to_cpu(info->magic)) { 262 switch (be16_to_cpu(info->magic)) {
263 case XFS_DA3_NODE_MAGIC:
264 if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
265 XFS_DA3_NODE_CRC_OFF))
266 break;
267 /* fall through */
134 case XFS_DA_NODE_MAGIC: 268 case XFS_DA_NODE_MAGIC:
135 xfs_da_node_verify(bp); 269 if (!xfs_da3_node_verify(bp))
136 break; 270 break;
271 return;
137 case XFS_ATTR_LEAF_MAGIC: 272 case XFS_ATTR_LEAF_MAGIC:
138 bp->b_ops = &xfs_attr_leaf_buf_ops; 273 bp->b_ops = &xfs_attr3_leaf_buf_ops;
139 bp->b_ops->verify_read(bp); 274 bp->b_ops->verify_read(bp);
140 return; 275 return;
141 case XFS_DIR2_LEAFN_MAGIC: 276 case XFS_DIR2_LEAFN_MAGIC:
142 bp->b_ops = &xfs_dir2_leafn_buf_ops; 277 case XFS_DIR3_LEAFN_MAGIC:
278 bp->b_ops = &xfs_dir3_leafn_buf_ops;
143 bp->b_ops->verify_read(bp); 279 bp->b_ops->verify_read(bp);
144 return; 280 return;
145 default: 281 default:
146 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
147 mp, info);
148 xfs_buf_ioerror(bp, EFSCORRUPTED);
149 break; 282 break;
150 } 283 }
284
285 /* corrupt block */
286 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
287 xfs_buf_ioerror(bp, EFSCORRUPTED);
151} 288}
152 289
153const struct xfs_buf_ops xfs_da_node_buf_ops = { 290const struct xfs_buf_ops xfs_da3_node_buf_ops = {
154 .verify_read = xfs_da_node_read_verify, 291 .verify_read = xfs_da3_node_read_verify,
155 .verify_write = xfs_da_node_write_verify, 292 .verify_write = xfs_da3_node_write_verify,
156}; 293};
157 294
158
159int 295int
160xfs_da_node_read( 296xfs_da3_node_read(
161 struct xfs_trans *tp, 297 struct xfs_trans *tp,
162 struct xfs_inode *dp, 298 struct xfs_inode *dp,
163 xfs_dablk_t bno, 299 xfs_dablk_t bno,
@@ -165,8 +301,35 @@ xfs_da_node_read(
165 struct xfs_buf **bpp, 301 struct xfs_buf **bpp,
166 int which_fork) 302 int which_fork)
167{ 303{
168 return xfs_da_read_buf(tp, dp, bno, mappedbno, bpp, 304 int err;
169 which_fork, &xfs_da_node_buf_ops); 305
306 err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp,
307 which_fork, &xfs_da3_node_buf_ops);
308 if (!err && tp) {
309 struct xfs_da_blkinfo *info = (*bpp)->b_addr;
310 int type;
311
312 switch (be16_to_cpu(info->magic)) {
313 case XFS_DA_NODE_MAGIC:
314 case XFS_DA3_NODE_MAGIC:
315 type = XFS_BLFT_DA_NODE_BUF;
316 break;
317 case XFS_ATTR_LEAF_MAGIC:
318 case XFS_ATTR3_LEAF_MAGIC:
319 type = XFS_BLFT_ATTR_LEAF_BUF;
320 break;
321 case XFS_DIR2_LEAFN_MAGIC:
322 case XFS_DIR3_LEAFN_MAGIC:
323 type = XFS_BLFT_DIR_LEAFN_BUF;
324 break;
325 default:
326 type = 0;
327 ASSERT(0);
328 break;
329 }
330 xfs_trans_buf_set_type(tp, *bpp, type);
331 }
332 return err;
170} 333}
171 334
172/*======================================================================== 335/*========================================================================
@@ -177,33 +340,46 @@ xfs_da_node_read(
177 * Create the initial contents of an intermediate node. 340 * Create the initial contents of an intermediate node.
178 */ 341 */
179int 342int
180xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level, 343xfs_da3_node_create(
181 struct xfs_buf **bpp, int whichfork) 344 struct xfs_da_args *args,
345 xfs_dablk_t blkno,
346 int level,
347 struct xfs_buf **bpp,
348 int whichfork)
182{ 349{
183 xfs_da_intnode_t *node; 350 struct xfs_da_intnode *node;
184 struct xfs_buf *bp; 351 struct xfs_trans *tp = args->trans;
185 int error; 352 struct xfs_mount *mp = tp->t_mountp;
186 xfs_trans_t *tp; 353 struct xfs_da3_icnode_hdr ichdr = {0};
354 struct xfs_buf *bp;
355 int error;
187 356
188 trace_xfs_da_node_create(args); 357 trace_xfs_da_node_create(args);
358 ASSERT(level <= XFS_DA_NODE_MAXDEPTH);
189 359
190 tp = args->trans;
191 error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork); 360 error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork);
192 if (error) 361 if (error)
193 return(error); 362 return(error);
194 ASSERT(bp != NULL); 363 bp->b_ops = &xfs_da3_node_buf_ops;
364 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
195 node = bp->b_addr; 365 node = bp->b_addr;
196 node->hdr.info.forw = 0;
197 node->hdr.info.back = 0;
198 node->hdr.info.magic = cpu_to_be16(XFS_DA_NODE_MAGIC);
199 node->hdr.info.pad = 0;
200 node->hdr.count = 0;
201 node->hdr.level = cpu_to_be16(level);
202 366
367 if (xfs_sb_version_hascrc(&mp->m_sb)) {
368 struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
369
370 ichdr.magic = XFS_DA3_NODE_MAGIC;
371 hdr3->info.blkno = cpu_to_be64(bp->b_bn);
372 hdr3->info.owner = cpu_to_be64(args->dp->i_ino);
373 uuid_copy(&hdr3->info.uuid, &mp->m_sb.sb_uuid);
374 } else {
375 ichdr.magic = XFS_DA_NODE_MAGIC;
376 }
377 ichdr.level = level;
378
379 xfs_da3_node_hdr_to_disk(node, &ichdr);
203 xfs_trans_log_buf(tp, bp, 380 xfs_trans_log_buf(tp, bp,
204 XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr))); 381 XFS_DA_LOGRANGE(node, &node->hdr, xfs_da3_node_hdr_size(node)));
205 382
206 bp->b_ops = &xfs_da_node_buf_ops;
207 *bpp = bp; 383 *bpp = bp;
208 return(0); 384 return(0);
209} 385}
@@ -213,12 +389,18 @@ xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
213 * intermediate nodes, rebalance, etc. 389 * intermediate nodes, rebalance, etc.
214 */ 390 */
215int /* error */ 391int /* error */
216xfs_da_split(xfs_da_state_t *state) 392xfs_da3_split(
393 struct xfs_da_state *state)
217{ 394{
218 xfs_da_state_blk_t *oldblk, *newblk, *addblk; 395 struct xfs_da_state_blk *oldblk;
219 xfs_da_intnode_t *node; 396 struct xfs_da_state_blk *newblk;
220 struct xfs_buf *bp; 397 struct xfs_da_state_blk *addblk;
221 int max, action, error, i; 398 struct xfs_da_intnode *node;
399 struct xfs_buf *bp;
400 int max;
401 int action;
402 int error;
403 int i;
222 404
223 trace_xfs_da_split(state->args); 405 trace_xfs_da_split(state->args);
224 406
@@ -246,7 +428,7 @@ xfs_da_split(xfs_da_state_t *state)
246 */ 428 */
247 switch (oldblk->magic) { 429 switch (oldblk->magic) {
248 case XFS_ATTR_LEAF_MAGIC: 430 case XFS_ATTR_LEAF_MAGIC:
249 error = xfs_attr_leaf_split(state, oldblk, newblk); 431 error = xfs_attr3_leaf_split(state, oldblk, newblk);
250 if ((error != 0) && (error != ENOSPC)) { 432 if ((error != 0) && (error != ENOSPC)) {
251 return(error); /* GROT: attr is inconsistent */ 433 return(error); /* GROT: attr is inconsistent */
252 } 434 }
@@ -261,12 +443,12 @@ xfs_da_split(xfs_da_state_t *state)
261 if (state->inleaf) { 443 if (state->inleaf) {
262 state->extraafter = 0; /* before newblk */ 444 state->extraafter = 0; /* before newblk */
263 trace_xfs_attr_leaf_split_before(state->args); 445 trace_xfs_attr_leaf_split_before(state->args);
264 error = xfs_attr_leaf_split(state, oldblk, 446 error = xfs_attr3_leaf_split(state, oldblk,
265 &state->extrablk); 447 &state->extrablk);
266 } else { 448 } else {
267 state->extraafter = 1; /* after newblk */ 449 state->extraafter = 1; /* after newblk */
268 trace_xfs_attr_leaf_split_after(state->args); 450 trace_xfs_attr_leaf_split_after(state->args);
269 error = xfs_attr_leaf_split(state, newblk, 451 error = xfs_attr3_leaf_split(state, newblk,
270 &state->extrablk); 452 &state->extrablk);
271 } 453 }
272 if (error) 454 if (error)
@@ -280,7 +462,7 @@ xfs_da_split(xfs_da_state_t *state)
280 addblk = newblk; 462 addblk = newblk;
281 break; 463 break;
282 case XFS_DA_NODE_MAGIC: 464 case XFS_DA_NODE_MAGIC:
283 error = xfs_da_node_split(state, oldblk, newblk, addblk, 465 error = xfs_da3_node_split(state, oldblk, newblk, addblk,
284 max - i, &action); 466 max - i, &action);
285 addblk->bp = NULL; 467 addblk->bp = NULL;
286 if (error) 468 if (error)
@@ -298,7 +480,7 @@ xfs_da_split(xfs_da_state_t *state)
298 /* 480 /*
299 * Update the btree to show the new hashval for this child. 481 * Update the btree to show the new hashval for this child.
300 */ 482 */
301 xfs_da_fixhashpath(state, &state->path); 483 xfs_da3_fixhashpath(state, &state->path);
302 } 484 }
303 if (!addblk) 485 if (!addblk)
304 return(0); 486 return(0);
@@ -308,7 +490,7 @@ xfs_da_split(xfs_da_state_t *state)
308 */ 490 */
309 ASSERT(state->path.active == 0); 491 ASSERT(state->path.active == 0);
310 oldblk = &state->path.blk[0]; 492 oldblk = &state->path.blk[0];
311 error = xfs_da_root_split(state, oldblk, addblk); 493 error = xfs_da3_root_split(state, oldblk, addblk);
312 if (error) { 494 if (error) {
313 addblk->bp = NULL; 495 addblk->bp = NULL;
314 return(error); /* GROT: dir is inconsistent */ 496 return(error); /* GROT: dir is inconsistent */
@@ -319,8 +501,12 @@ xfs_da_split(xfs_da_state_t *state)
319 * just got bumped because of the addition of a new root node. 501 * just got bumped because of the addition of a new root node.
320 * There might be three blocks involved if a double split occurred, 502 * There might be three blocks involved if a double split occurred,
321 * and the original block 0 could be at any position in the list. 503 * and the original block 0 could be at any position in the list.
504 *
505 * Note: the magic numbers and sibling pointers are in the same
506 * physical place for both v2 and v3 headers (by design). Hence it
507 * doesn't matter which version of the xfs_da_intnode structure we use
508 * here as the result will be the same using either structure.
322 */ 509 */
323
324 node = oldblk->bp->b_addr; 510 node = oldblk->bp->b_addr;
325 if (node->hdr.info.forw) { 511 if (node->hdr.info.forw) {
326 if (be32_to_cpu(node->hdr.info.forw) == addblk->blkno) { 512 if (be32_to_cpu(node->hdr.info.forw) == addblk->blkno) {
@@ -359,18 +545,25 @@ xfs_da_split(xfs_da_state_t *state)
359 * the EOF, extending the inode in process. 545 * the EOF, extending the inode in process.
360 */ 546 */
361STATIC int /* error */ 547STATIC int /* error */
362xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, 548xfs_da3_root_split(
363 xfs_da_state_blk_t *blk2) 549 struct xfs_da_state *state,
550 struct xfs_da_state_blk *blk1,
551 struct xfs_da_state_blk *blk2)
364{ 552{
365 xfs_da_intnode_t *node, *oldroot; 553 struct xfs_da_intnode *node;
366 xfs_da_args_t *args; 554 struct xfs_da_intnode *oldroot;
367 xfs_dablk_t blkno; 555 struct xfs_da_node_entry *btree;
368 struct xfs_buf *bp; 556 struct xfs_da3_icnode_hdr nodehdr;
369 int error, size; 557 struct xfs_da_args *args;
370 xfs_inode_t *dp; 558 struct xfs_buf *bp;
371 xfs_trans_t *tp; 559 struct xfs_inode *dp;
372 xfs_mount_t *mp; 560 struct xfs_trans *tp;
373 xfs_dir2_leaf_t *leaf; 561 struct xfs_mount *mp;
562 struct xfs_dir2_leaf *leaf;
563 xfs_dablk_t blkno;
564 int level;
565 int error;
566 int size;
374 567
375 trace_xfs_da_root_split(state->args); 568 trace_xfs_da_root_split(state->args);
376 569
@@ -379,29 +572,65 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
379 * to a free space somewhere. 572 * to a free space somewhere.
380 */ 573 */
381 args = state->args; 574 args = state->args;
382 ASSERT(args != NULL);
383 error = xfs_da_grow_inode(args, &blkno); 575 error = xfs_da_grow_inode(args, &blkno);
384 if (error) 576 if (error)
385 return(error); 577 return error;
578
386 dp = args->dp; 579 dp = args->dp;
387 tp = args->trans; 580 tp = args->trans;
388 mp = state->mp; 581 mp = state->mp;
389 error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork); 582 error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork);
390 if (error) 583 if (error)
391 return(error); 584 return error;
392 ASSERT(bp != NULL);
393 node = bp->b_addr; 585 node = bp->b_addr;
394 oldroot = blk1->bp->b_addr; 586 oldroot = blk1->bp->b_addr;
395 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) { 587 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
396 size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] - 588 oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
397 (char *)oldroot); 589 struct xfs_da3_icnode_hdr nodehdr;
590
591 xfs_da3_node_hdr_from_disk(&nodehdr, oldroot);
592 btree = xfs_da3_node_tree_p(oldroot);
593 size = (int)((char *)&btree[nodehdr.count] - (char *)oldroot);
594 level = nodehdr.level;
595
596 /*
597 * we are about to copy oldroot to bp, so set up the type
598 * of bp while we know exactly what it will be.
599 */
600 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
398 } else { 601 } else {
399 ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 602 struct xfs_dir3_icleaf_hdr leafhdr;
603 struct xfs_dir2_leaf_entry *ents;
604
400 leaf = (xfs_dir2_leaf_t *)oldroot; 605 leaf = (xfs_dir2_leaf_t *)oldroot;
401 size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] - 606 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
402 (char *)leaf); 607 ents = xfs_dir3_leaf_ents_p(leaf);
608
609 ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
610 leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
611 size = (int)((char *)&ents[leafhdr.count] - (char *)leaf);
612 level = 0;
613
614 /*
615 * we are about to copy oldroot to bp, so set up the type
616 * of bp while we know exactly what it will be.
617 */
618 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF);
403 } 619 }
620
621 /*
622 * we can copy most of the information in the node from one block to
623 * another, but for CRC enabled headers we have to make sure that the
624 * block specific identifiers are kept intact. We update the buffer
625 * directly for this.
626 */
404 memcpy(node, oldroot, size); 627 memcpy(node, oldroot, size);
628 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||
629 oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
630 struct xfs_da3_intnode *node3 = (struct xfs_da3_intnode *)node;
631
632 node3->hdr.info.blkno = cpu_to_be64(bp->b_bn);
633 }
405 xfs_trans_log_buf(tp, bp, 0, size - 1); 634 xfs_trans_log_buf(tp, bp, 0, size - 1);
406 635
407 bp->b_ops = blk1->bp->b_ops; 636 bp->b_ops = blk1->bp->b_ops;
@@ -411,20 +640,25 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
411 /* 640 /*
412 * Set up the new root node. 641 * Set up the new root node.
413 */ 642 */
414 error = xfs_da_node_create(args, 643 error = xfs_da3_node_create(args,
415 (args->whichfork == XFS_DATA_FORK) ? mp->m_dirleafblk : 0, 644 (args->whichfork == XFS_DATA_FORK) ? mp->m_dirleafblk : 0,
416 be16_to_cpu(node->hdr.level) + 1, &bp, args->whichfork); 645 level + 1, &bp, args->whichfork);
417 if (error) 646 if (error)
418 return(error); 647 return error;
648
419 node = bp->b_addr; 649 node = bp->b_addr;
420 node->btree[0].hashval = cpu_to_be32(blk1->hashval); 650 xfs_da3_node_hdr_from_disk(&nodehdr, node);
421 node->btree[0].before = cpu_to_be32(blk1->blkno); 651 btree = xfs_da3_node_tree_p(node);
422 node->btree[1].hashval = cpu_to_be32(blk2->hashval); 652 btree[0].hashval = cpu_to_be32(blk1->hashval);
423 node->btree[1].before = cpu_to_be32(blk2->blkno); 653 btree[0].before = cpu_to_be32(blk1->blkno);
424 node->hdr.count = cpu_to_be16(2); 654 btree[1].hashval = cpu_to_be32(blk2->hashval);
655 btree[1].before = cpu_to_be32(blk2->blkno);
656 nodehdr.count = 2;
657 xfs_da3_node_hdr_to_disk(node, &nodehdr);
425 658
426#ifdef DEBUG 659#ifdef DEBUG
427 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) { 660 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
661 oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
428 ASSERT(blk1->blkno >= mp->m_dirleafblk && 662 ASSERT(blk1->blkno >= mp->m_dirleafblk &&
429 blk1->blkno < mp->m_dirfreeblk); 663 blk1->blkno < mp->m_dirfreeblk);
430 ASSERT(blk2->blkno >= mp->m_dirleafblk && 664 ASSERT(blk2->blkno >= mp->m_dirleafblk &&
@@ -434,30 +668,34 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
434 668
435 /* Header is already logged by xfs_da_node_create */ 669 /* Header is already logged by xfs_da_node_create */
436 xfs_trans_log_buf(tp, bp, 670 xfs_trans_log_buf(tp, bp,
437 XFS_DA_LOGRANGE(node, node->btree, 671 XFS_DA_LOGRANGE(node, btree, sizeof(xfs_da_node_entry_t) * 2));
438 sizeof(xfs_da_node_entry_t) * 2));
439 672
440 return(0); 673 return 0;
441} 674}
442 675
443/* 676/*
444 * Split the node, rebalance, then add the new entry. 677 * Split the node, rebalance, then add the new entry.
445 */ 678 */
446STATIC int /* error */ 679STATIC int /* error */
447xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, 680xfs_da3_node_split(
448 xfs_da_state_blk_t *newblk, 681 struct xfs_da_state *state,
449 xfs_da_state_blk_t *addblk, 682 struct xfs_da_state_blk *oldblk,
450 int treelevel, int *result) 683 struct xfs_da_state_blk *newblk,
684 struct xfs_da_state_blk *addblk,
685 int treelevel,
686 int *result)
451{ 687{
452 xfs_da_intnode_t *node; 688 struct xfs_da_intnode *node;
453 xfs_dablk_t blkno; 689 struct xfs_da3_icnode_hdr nodehdr;
454 int newcount, error; 690 xfs_dablk_t blkno;
455 int useextra; 691 int newcount;
692 int error;
693 int useextra;
456 694
457 trace_xfs_da_node_split(state->args); 695 trace_xfs_da_node_split(state->args);
458 696
459 node = oldblk->bp->b_addr; 697 node = oldblk->bp->b_addr;
460 ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 698 xfs_da3_node_hdr_from_disk(&nodehdr, node);
461 699
462 /* 700 /*
463 * With V2 dirs the extra block is data or freespace. 701 * With V2 dirs the extra block is data or freespace.
@@ -467,7 +705,7 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
467 /* 705 /*
468 * Do we have to split the node? 706 * Do we have to split the node?
469 */ 707 */
470 if ((be16_to_cpu(node->hdr.count) + newcount) > state->node_ents) { 708 if (nodehdr.count + newcount > state->node_ents) {
471 /* 709 /*
472 * Allocate a new node, add to the doubly linked chain of 710 * Allocate a new node, add to the doubly linked chain of
473 * nodes, then move some of our excess entries into it. 711 * nodes, then move some of our excess entries into it.
@@ -476,14 +714,14 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
476 if (error) 714 if (error)
477 return(error); /* GROT: dir is inconsistent */ 715 return(error); /* GROT: dir is inconsistent */
478 716
479 error = xfs_da_node_create(state->args, blkno, treelevel, 717 error = xfs_da3_node_create(state->args, blkno, treelevel,
480 &newblk->bp, state->args->whichfork); 718 &newblk->bp, state->args->whichfork);
481 if (error) 719 if (error)
482 return(error); /* GROT: dir is inconsistent */ 720 return(error); /* GROT: dir is inconsistent */
483 newblk->blkno = blkno; 721 newblk->blkno = blkno;
484 newblk->magic = XFS_DA_NODE_MAGIC; 722 newblk->magic = XFS_DA_NODE_MAGIC;
485 xfs_da_node_rebalance(state, oldblk, newblk); 723 xfs_da3_node_rebalance(state, oldblk, newblk);
486 error = xfs_da_blk_link(state, oldblk, newblk); 724 error = xfs_da3_blk_link(state, oldblk, newblk);
487 if (error) 725 if (error)
488 return(error); 726 return(error);
489 *result = 1; 727 *result = 1;
@@ -495,7 +733,7 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
495 * Insert the new entry(s) into the correct block 733 * Insert the new entry(s) into the correct block
496 * (updating last hashval in the process). 734 * (updating last hashval in the process).
497 * 735 *
498 * xfs_da_node_add() inserts BEFORE the given index, 736 * xfs_da3_node_add() inserts BEFORE the given index,
499 * and as a result of using node_lookup_int() we always 737 * and as a result of using node_lookup_int() we always
500 * point to a valid entry (not after one), but a split 738 * point to a valid entry (not after one), but a split
501 * operation always results in a new block whose hashvals 739 * operation always results in a new block whose hashvals
@@ -504,22 +742,23 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
504 * If we had double-split op below us, then add the extra block too. 742 * If we had double-split op below us, then add the extra block too.
505 */ 743 */
506 node = oldblk->bp->b_addr; 744 node = oldblk->bp->b_addr;
507 if (oldblk->index <= be16_to_cpu(node->hdr.count)) { 745 xfs_da3_node_hdr_from_disk(&nodehdr, node);
746 if (oldblk->index <= nodehdr.count) {
508 oldblk->index++; 747 oldblk->index++;
509 xfs_da_node_add(state, oldblk, addblk); 748 xfs_da3_node_add(state, oldblk, addblk);
510 if (useextra) { 749 if (useextra) {
511 if (state->extraafter) 750 if (state->extraafter)
512 oldblk->index++; 751 oldblk->index++;
513 xfs_da_node_add(state, oldblk, &state->extrablk); 752 xfs_da3_node_add(state, oldblk, &state->extrablk);
514 state->extravalid = 0; 753 state->extravalid = 0;
515 } 754 }
516 } else { 755 } else {
517 newblk->index++; 756 newblk->index++;
518 xfs_da_node_add(state, newblk, addblk); 757 xfs_da3_node_add(state, newblk, addblk);
519 if (useextra) { 758 if (useextra) {
520 if (state->extraafter) 759 if (state->extraafter)
521 newblk->index++; 760 newblk->index++;
522 xfs_da_node_add(state, newblk, &state->extrablk); 761 xfs_da3_node_add(state, newblk, &state->extrablk);
523 state->extravalid = 0; 762 state->extravalid = 0;
524 } 763 }
525 } 764 }
@@ -534,33 +773,53 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
534 * NOTE: if blk2 is empty, then it will get the upper half of blk1. 773 * NOTE: if blk2 is empty, then it will get the upper half of blk1.
535 */ 774 */
536STATIC void 775STATIC void
537xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, 776xfs_da3_node_rebalance(
538 xfs_da_state_blk_t *blk2) 777 struct xfs_da_state *state,
778 struct xfs_da_state_blk *blk1,
779 struct xfs_da_state_blk *blk2)
539{ 780{
540 xfs_da_intnode_t *node1, *node2, *tmpnode; 781 struct xfs_da_intnode *node1;
541 xfs_da_node_entry_t *btree_s, *btree_d; 782 struct xfs_da_intnode *node2;
542 int count, tmp; 783 struct xfs_da_intnode *tmpnode;
543 xfs_trans_t *tp; 784 struct xfs_da_node_entry *btree1;
785 struct xfs_da_node_entry *btree2;
786 struct xfs_da_node_entry *btree_s;
787 struct xfs_da_node_entry *btree_d;
788 struct xfs_da3_icnode_hdr nodehdr1;
789 struct xfs_da3_icnode_hdr nodehdr2;
790 struct xfs_trans *tp;
791 int count;
792 int tmp;
793 int swap = 0;
544 794
545 trace_xfs_da_node_rebalance(state->args); 795 trace_xfs_da_node_rebalance(state->args);
546 796
547 node1 = blk1->bp->b_addr; 797 node1 = blk1->bp->b_addr;
548 node2 = blk2->bp->b_addr; 798 node2 = blk2->bp->b_addr;
799 xfs_da3_node_hdr_from_disk(&nodehdr1, node1);
800 xfs_da3_node_hdr_from_disk(&nodehdr2, node2);
801 btree1 = xfs_da3_node_tree_p(node1);
802 btree2 = xfs_da3_node_tree_p(node2);
803
549 /* 804 /*
550 * Figure out how many entries need to move, and in which direction. 805 * Figure out how many entries need to move, and in which direction.
551 * Swap the nodes around if that makes it simpler. 806 * Swap the nodes around if that makes it simpler.
552 */ 807 */
553 if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) && 808 if (nodehdr1.count > 0 && nodehdr2.count > 0 &&
554 ((be32_to_cpu(node2->btree[0].hashval) < be32_to_cpu(node1->btree[0].hashval)) || 809 ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) ||
555 (be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval) < 810 (be32_to_cpu(btree2[nodehdr2.count - 1].hashval) <
556 be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval)))) { 811 be32_to_cpu(btree1[nodehdr1.count - 1].hashval)))) {
557 tmpnode = node1; 812 tmpnode = node1;
558 node1 = node2; 813 node1 = node2;
559 node2 = tmpnode; 814 node2 = tmpnode;
815 xfs_da3_node_hdr_from_disk(&nodehdr1, node1);
816 xfs_da3_node_hdr_from_disk(&nodehdr2, node2);
817 btree1 = xfs_da3_node_tree_p(node1);
818 btree2 = xfs_da3_node_tree_p(node2);
819 swap = 1;
560 } 820 }
561 ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 821
562 ASSERT(node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 822 count = (nodehdr1.count - nodehdr2.count) / 2;
563 count = (be16_to_cpu(node1->hdr.count) - be16_to_cpu(node2->hdr.count)) / 2;
564 if (count == 0) 823 if (count == 0)
565 return; 824 return;
566 tp = state->args->trans; 825 tp = state->args->trans;
@@ -571,10 +830,11 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
571 /* 830 /*
572 * Move elements in node2 up to make a hole. 831 * Move elements in node2 up to make a hole.
573 */ 832 */
574 if ((tmp = be16_to_cpu(node2->hdr.count)) > 0) { 833 tmp = nodehdr2.count;
834 if (tmp > 0) {
575 tmp *= (uint)sizeof(xfs_da_node_entry_t); 835 tmp *= (uint)sizeof(xfs_da_node_entry_t);
576 btree_s = &node2->btree[0]; 836 btree_s = &btree2[0];
577 btree_d = &node2->btree[count]; 837 btree_d = &btree2[count];
578 memmove(btree_d, btree_s, tmp); 838 memmove(btree_d, btree_s, tmp);
579 } 839 }
580 840
@@ -582,12 +842,12 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
582 * Move the req'd B-tree elements from high in node1 to 842 * Move the req'd B-tree elements from high in node1 to
583 * low in node2. 843 * low in node2.
584 */ 844 */
585 be16_add_cpu(&node2->hdr.count, count); 845 nodehdr2.count += count;
586 tmp = count * (uint)sizeof(xfs_da_node_entry_t); 846 tmp = count * (uint)sizeof(xfs_da_node_entry_t);
587 btree_s = &node1->btree[be16_to_cpu(node1->hdr.count) - count]; 847 btree_s = &btree1[nodehdr1.count - count];
588 btree_d = &node2->btree[0]; 848 btree_d = &btree2[0];
589 memcpy(btree_d, btree_s, tmp); 849 memcpy(btree_d, btree_s, tmp);
590 be16_add_cpu(&node1->hdr.count, -count); 850 nodehdr1.count -= count;
591 } else { 851 } else {
592 /* 852 /*
593 * Move the req'd B-tree elements from low in node2 to 853 * Move the req'd B-tree elements from low in node2 to
@@ -595,49 +855,60 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
595 */ 855 */
596 count = -count; 856 count = -count;
597 tmp = count * (uint)sizeof(xfs_da_node_entry_t); 857 tmp = count * (uint)sizeof(xfs_da_node_entry_t);
598 btree_s = &node2->btree[0]; 858 btree_s = &btree2[0];
599 btree_d = &node1->btree[be16_to_cpu(node1->hdr.count)]; 859 btree_d = &btree1[nodehdr1.count];
600 memcpy(btree_d, btree_s, tmp); 860 memcpy(btree_d, btree_s, tmp);
601 be16_add_cpu(&node1->hdr.count, count); 861 nodehdr1.count += count;
862
602 xfs_trans_log_buf(tp, blk1->bp, 863 xfs_trans_log_buf(tp, blk1->bp,
603 XFS_DA_LOGRANGE(node1, btree_d, tmp)); 864 XFS_DA_LOGRANGE(node1, btree_d, tmp));
604 865
605 /* 866 /*
606 * Move elements in node2 down to fill the hole. 867 * Move elements in node2 down to fill the hole.
607 */ 868 */
608 tmp = be16_to_cpu(node2->hdr.count) - count; 869 tmp = nodehdr2.count - count;
609 tmp *= (uint)sizeof(xfs_da_node_entry_t); 870 tmp *= (uint)sizeof(xfs_da_node_entry_t);
610 btree_s = &node2->btree[count]; 871 btree_s = &btree2[count];
611 btree_d = &node2->btree[0]; 872 btree_d = &btree2[0];
612 memmove(btree_d, btree_s, tmp); 873 memmove(btree_d, btree_s, tmp);
613 be16_add_cpu(&node2->hdr.count, -count); 874 nodehdr2.count -= count;
614 } 875 }
615 876
616 /* 877 /*
617 * Log header of node 1 and all current bits of node 2. 878 * Log header of node 1 and all current bits of node 2.
618 */ 879 */
880 xfs_da3_node_hdr_to_disk(node1, &nodehdr1);
619 xfs_trans_log_buf(tp, blk1->bp, 881 xfs_trans_log_buf(tp, blk1->bp,
620 XFS_DA_LOGRANGE(node1, &node1->hdr, sizeof(node1->hdr))); 882 XFS_DA_LOGRANGE(node1, &node1->hdr,
883 xfs_da3_node_hdr_size(node1)));
884
885 xfs_da3_node_hdr_to_disk(node2, &nodehdr2);
621 xfs_trans_log_buf(tp, blk2->bp, 886 xfs_trans_log_buf(tp, blk2->bp,
622 XFS_DA_LOGRANGE(node2, &node2->hdr, 887 XFS_DA_LOGRANGE(node2, &node2->hdr,
623 sizeof(node2->hdr) + 888 xfs_da3_node_hdr_size(node2) +
624 sizeof(node2->btree[0]) * be16_to_cpu(node2->hdr.count))); 889 (sizeof(btree2[0]) * nodehdr2.count)));
625 890
626 /* 891 /*
627 * Record the last hashval from each block for upward propagation. 892 * Record the last hashval from each block for upward propagation.
628 * (note: don't use the swapped node pointers) 893 * (note: don't use the swapped node pointers)
629 */ 894 */
630 node1 = blk1->bp->b_addr; 895 if (swap) {
631 node2 = blk2->bp->b_addr; 896 node1 = blk1->bp->b_addr;
632 blk1->hashval = be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval); 897 node2 = blk2->bp->b_addr;
633 blk2->hashval = be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval); 898 xfs_da3_node_hdr_from_disk(&nodehdr1, node1);
899 xfs_da3_node_hdr_from_disk(&nodehdr2, node2);
900 btree1 = xfs_da3_node_tree_p(node1);
901 btree2 = xfs_da3_node_tree_p(node2);
902 }
903 blk1->hashval = be32_to_cpu(btree1[nodehdr1.count - 1].hashval);
904 blk2->hashval = be32_to_cpu(btree2[nodehdr2.count - 1].hashval);
634 905
635 /* 906 /*
636 * Adjust the expected index for insertion. 907 * Adjust the expected index for insertion.
637 */ 908 */
638 if (blk1->index >= be16_to_cpu(node1->hdr.count)) { 909 if (blk1->index >= nodehdr1.count) {
639 blk2->index = blk1->index - be16_to_cpu(node1->hdr.count); 910 blk2->index = blk1->index - nodehdr1.count;
640 blk1->index = be16_to_cpu(node1->hdr.count) + 1; /* make it invalid */ 911 blk1->index = nodehdr1.count + 1; /* make it invalid */
641 } 912 }
642} 913}
643 914
@@ -645,18 +916,23 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
645 * Add a new entry to an intermediate node. 916 * Add a new entry to an intermediate node.
646 */ 917 */
647STATIC void 918STATIC void
648xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, 919xfs_da3_node_add(
649 xfs_da_state_blk_t *newblk) 920 struct xfs_da_state *state,
921 struct xfs_da_state_blk *oldblk,
922 struct xfs_da_state_blk *newblk)
650{ 923{
651 xfs_da_intnode_t *node; 924 struct xfs_da_intnode *node;
652 xfs_da_node_entry_t *btree; 925 struct xfs_da3_icnode_hdr nodehdr;
653 int tmp; 926 struct xfs_da_node_entry *btree;
927 int tmp;
654 928
655 trace_xfs_da_node_add(state->args); 929 trace_xfs_da_node_add(state->args);
656 930
657 node = oldblk->bp->b_addr; 931 node = oldblk->bp->b_addr;
658 ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 932 xfs_da3_node_hdr_from_disk(&nodehdr, node);
659 ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count))); 933 btree = xfs_da3_node_tree_p(node);
934
935 ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count);
660 ASSERT(newblk->blkno != 0); 936 ASSERT(newblk->blkno != 0);
661 if (state->args->whichfork == XFS_DATA_FORK) 937 if (state->args->whichfork == XFS_DATA_FORK)
662 ASSERT(newblk->blkno >= state->mp->m_dirleafblk && 938 ASSERT(newblk->blkno >= state->mp->m_dirleafblk &&
@@ -666,23 +942,25 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
666 * We may need to make some room before we insert the new node. 942 * We may need to make some room before we insert the new node.
667 */ 943 */
668 tmp = 0; 944 tmp = 0;
669 btree = &node->btree[ oldblk->index ]; 945 if (oldblk->index < nodehdr.count) {
670 if (oldblk->index < be16_to_cpu(node->hdr.count)) { 946 tmp = (nodehdr.count - oldblk->index) * (uint)sizeof(*btree);
671 tmp = (be16_to_cpu(node->hdr.count) - oldblk->index) * (uint)sizeof(*btree); 947 memmove(&btree[oldblk->index + 1], &btree[oldblk->index], tmp);
672 memmove(btree + 1, btree, tmp);
673 } 948 }
674 btree->hashval = cpu_to_be32(newblk->hashval); 949 btree[oldblk->index].hashval = cpu_to_be32(newblk->hashval);
675 btree->before = cpu_to_be32(newblk->blkno); 950 btree[oldblk->index].before = cpu_to_be32(newblk->blkno);
676 xfs_trans_log_buf(state->args->trans, oldblk->bp, 951 xfs_trans_log_buf(state->args->trans, oldblk->bp,
677 XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree))); 952 XFS_DA_LOGRANGE(node, &btree[oldblk->index],
678 be16_add_cpu(&node->hdr.count, 1); 953 tmp + sizeof(*btree)));
954
955 nodehdr.count += 1;
956 xfs_da3_node_hdr_to_disk(node, &nodehdr);
679 xfs_trans_log_buf(state->args->trans, oldblk->bp, 957 xfs_trans_log_buf(state->args->trans, oldblk->bp,
680 XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr))); 958 XFS_DA_LOGRANGE(node, &node->hdr, xfs_da3_node_hdr_size(node)));
681 959
682 /* 960 /*
683 * Copy the last hash value from the oldblk to propagate upwards. 961 * Copy the last hash value from the oldblk to propagate upwards.
684 */ 962 */
685 oldblk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1 ].hashval); 963 oldblk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval);
686} 964}
687 965
688/*======================================================================== 966/*========================================================================
@@ -694,14 +972,16 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
694 * possibly deallocating that block, etc... 972 * possibly deallocating that block, etc...
695 */ 973 */
696int 974int
697xfs_da_join(xfs_da_state_t *state) 975xfs_da3_join(
976 struct xfs_da_state *state)
698{ 977{
699 xfs_da_state_blk_t *drop_blk, *save_blk; 978 struct xfs_da_state_blk *drop_blk;
700 int action, error; 979 struct xfs_da_state_blk *save_blk;
980 int action = 0;
981 int error;
701 982
702 trace_xfs_da_join(state->args); 983 trace_xfs_da_join(state->args);
703 984
704 action = 0;
705 drop_blk = &state->path.blk[ state->path.active-1 ]; 985 drop_blk = &state->path.blk[ state->path.active-1 ];
706 save_blk = &state->altpath.blk[ state->path.active-1 ]; 986 save_blk = &state->altpath.blk[ state->path.active-1 ];
707 ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC); 987 ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC);
@@ -722,12 +1002,12 @@ xfs_da_join(xfs_da_state_t *state)
722 */ 1002 */
723 switch (drop_blk->magic) { 1003 switch (drop_blk->magic) {
724 case XFS_ATTR_LEAF_MAGIC: 1004 case XFS_ATTR_LEAF_MAGIC:
725 error = xfs_attr_leaf_toosmall(state, &action); 1005 error = xfs_attr3_leaf_toosmall(state, &action);
726 if (error) 1006 if (error)
727 return(error); 1007 return(error);
728 if (action == 0) 1008 if (action == 0)
729 return(0); 1009 return(0);
730 xfs_attr_leaf_unbalance(state, drop_blk, save_blk); 1010 xfs_attr3_leaf_unbalance(state, drop_blk, save_blk);
731 break; 1011 break;
732 case XFS_DIR2_LEAFN_MAGIC: 1012 case XFS_DIR2_LEAFN_MAGIC:
733 error = xfs_dir2_leafn_toosmall(state, &action); 1013 error = xfs_dir2_leafn_toosmall(state, &action);
@@ -742,18 +1022,18 @@ xfs_da_join(xfs_da_state_t *state)
742 * Remove the offending node, fixup hashvals, 1022 * Remove the offending node, fixup hashvals,
743 * check for a toosmall neighbor. 1023 * check for a toosmall neighbor.
744 */ 1024 */
745 xfs_da_node_remove(state, drop_blk); 1025 xfs_da3_node_remove(state, drop_blk);
746 xfs_da_fixhashpath(state, &state->path); 1026 xfs_da3_fixhashpath(state, &state->path);
747 error = xfs_da_node_toosmall(state, &action); 1027 error = xfs_da3_node_toosmall(state, &action);
748 if (error) 1028 if (error)
749 return(error); 1029 return(error);
750 if (action == 0) 1030 if (action == 0)
751 return 0; 1031 return 0;
752 xfs_da_node_unbalance(state, drop_blk, save_blk); 1032 xfs_da3_node_unbalance(state, drop_blk, save_blk);
753 break; 1033 break;
754 } 1034 }
755 xfs_da_fixhashpath(state, &state->altpath); 1035 xfs_da3_fixhashpath(state, &state->altpath);
756 error = xfs_da_blk_unlink(state, drop_blk, save_blk); 1036 error = xfs_da3_blk_unlink(state, drop_blk, save_blk);
757 xfs_da_state_kill_altpath(state); 1037 xfs_da_state_kill_altpath(state);
758 if (error) 1038 if (error)
759 return(error); 1039 return(error);
@@ -768,9 +1048,9 @@ xfs_da_join(xfs_da_state_t *state)
768 * we only have one entry in the root, make the child block 1048 * we only have one entry in the root, make the child block
769 * the new root. 1049 * the new root.
770 */ 1050 */
771 xfs_da_node_remove(state, drop_blk); 1051 xfs_da3_node_remove(state, drop_blk);
772 xfs_da_fixhashpath(state, &state->path); 1052 xfs_da3_fixhashpath(state, &state->path);
773 error = xfs_da_root_join(state, &state->path.blk[0]); 1053 error = xfs_da3_root_join(state, &state->path.blk[0]);
774 return(error); 1054 return(error);
775} 1055}
776 1056
@@ -782,9 +1062,13 @@ xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level)
782 1062
783 if (level == 1) { 1063 if (level == 1) {
784 ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || 1064 ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
785 magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 1065 magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) ||
786 } else 1066 magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) ||
787 ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 1067 magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC));
1068 } else {
1069 ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
1070 magic == cpu_to_be16(XFS_DA3_NODE_MAGIC));
1071 }
788 ASSERT(!blkinfo->forw); 1072 ASSERT(!blkinfo->forw);
789 ASSERT(!blkinfo->back); 1073 ASSERT(!blkinfo->back);
790} 1074}
@@ -797,52 +1081,61 @@ xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level)
797 * the old root to block 0 as the new root node. 1081 * the old root to block 0 as the new root node.
798 */ 1082 */
799STATIC int 1083STATIC int
800xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk) 1084xfs_da3_root_join(
1085 struct xfs_da_state *state,
1086 struct xfs_da_state_blk *root_blk)
801{ 1087{
802 xfs_da_intnode_t *oldroot; 1088 struct xfs_da_intnode *oldroot;
803 xfs_da_args_t *args; 1089 struct xfs_da_args *args;
804 xfs_dablk_t child; 1090 xfs_dablk_t child;
805 struct xfs_buf *bp; 1091 struct xfs_buf *bp;
806 int error; 1092 struct xfs_da3_icnode_hdr oldroothdr;
1093 struct xfs_da_node_entry *btree;
1094 int error;
807 1095
808 trace_xfs_da_root_join(state->args); 1096 trace_xfs_da_root_join(state->args);
809 1097
810 args = state->args;
811 ASSERT(args != NULL);
812 ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC); 1098 ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
1099
1100 args = state->args;
813 oldroot = root_blk->bp->b_addr; 1101 oldroot = root_blk->bp->b_addr;
814 ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 1102 xfs_da3_node_hdr_from_disk(&oldroothdr, oldroot);
815 ASSERT(!oldroot->hdr.info.forw); 1103 ASSERT(oldroothdr.forw == 0);
816 ASSERT(!oldroot->hdr.info.back); 1104 ASSERT(oldroothdr.back == 0);
817 1105
818 /* 1106 /*
819 * If the root has more than one child, then don't do anything. 1107 * If the root has more than one child, then don't do anything.
820 */ 1108 */
821 if (be16_to_cpu(oldroot->hdr.count) > 1) 1109 if (oldroothdr.count > 1)
822 return(0); 1110 return 0;
823 1111
824 /* 1112 /*
825 * Read in the (only) child block, then copy those bytes into 1113 * Read in the (only) child block, then copy those bytes into
826 * the root block's buffer and free the original child block. 1114 * the root block's buffer and free the original child block.
827 */ 1115 */
828 child = be32_to_cpu(oldroot->btree[0].before); 1116 btree = xfs_da3_node_tree_p(oldroot);
1117 child = be32_to_cpu(btree[0].before);
829 ASSERT(child != 0); 1118 ASSERT(child != 0);
830 error = xfs_da_node_read(args->trans, args->dp, child, -1, &bp, 1119 error = xfs_da3_node_read(args->trans, args->dp, child, -1, &bp,
831 args->whichfork); 1120 args->whichfork);
832 if (error) 1121 if (error)
833 return(error); 1122 return error;
834 ASSERT(bp != NULL); 1123 xfs_da_blkinfo_onlychild_validate(bp->b_addr, oldroothdr.level);
835 xfs_da_blkinfo_onlychild_validate(bp->b_addr,
836 be16_to_cpu(oldroot->hdr.level));
837 1124
838 /* 1125 /*
839 * This could be copying a leaf back into the root block in the case of 1126 * This could be copying a leaf back into the root block in the case of
840 * there only being a single leaf block left in the tree. Hence we have 1127 * there only being a single leaf block left in the tree. Hence we have
841 * to update the b_ops pointer as well to match the buffer type change 1128 * to update the b_ops pointer as well to match the buffer type change
842 * that could occur. 1129 * that could occur. For dir3 blocks we also need to update the block
1130 * number in the buffer header.
843 */ 1131 */
844 memcpy(root_blk->bp->b_addr, bp->b_addr, state->blocksize); 1132 memcpy(root_blk->bp->b_addr, bp->b_addr, state->blocksize);
845 root_blk->bp->b_ops = bp->b_ops; 1133 root_blk->bp->b_ops = bp->b_ops;
1134 xfs_trans_buf_copy_type(root_blk->bp, bp);
1135 if (oldroothdr.magic == XFS_DA3_NODE_MAGIC) {
1136 struct xfs_da3_blkinfo *da3 = root_blk->bp->b_addr;
1137 da3->blkno = cpu_to_be64(root_blk->bp->b_bn);
1138 }
846 xfs_trans_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1); 1139 xfs_trans_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
847 error = xfs_da_shrink_inode(args, child, bp); 1140 error = xfs_da_shrink_inode(args, child, bp);
848 return(error); 1141 return(error);
@@ -858,14 +1151,21 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
858 * If nothing can be done, return 0. 1151 * If nothing can be done, return 0.
859 */ 1152 */
860STATIC int 1153STATIC int
861xfs_da_node_toosmall(xfs_da_state_t *state, int *action) 1154xfs_da3_node_toosmall(
1155 struct xfs_da_state *state,
1156 int *action)
862{ 1157{
863 xfs_da_intnode_t *node; 1158 struct xfs_da_intnode *node;
864 xfs_da_state_blk_t *blk; 1159 struct xfs_da_state_blk *blk;
865 xfs_da_blkinfo_t *info; 1160 struct xfs_da_blkinfo *info;
866 int count, forward, error, retval, i; 1161 xfs_dablk_t blkno;
867 xfs_dablk_t blkno; 1162 struct xfs_buf *bp;
868 struct xfs_buf *bp; 1163 struct xfs_da3_icnode_hdr nodehdr;
1164 int count;
1165 int forward;
1166 int error;
1167 int retval;
1168 int i;
869 1169
870 trace_xfs_da_node_toosmall(state->args); 1170 trace_xfs_da_node_toosmall(state->args);
871 1171
@@ -876,10 +1176,9 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
876 */ 1176 */
877 blk = &state->path.blk[ state->path.active-1 ]; 1177 blk = &state->path.blk[ state->path.active-1 ];
878 info = blk->bp->b_addr; 1178 info = blk->bp->b_addr;
879 ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
880 node = (xfs_da_intnode_t *)info; 1179 node = (xfs_da_intnode_t *)info;
881 count = be16_to_cpu(node->hdr.count); 1180 xfs_da3_node_hdr_from_disk(&nodehdr, node);
882 if (count > (state->node_ents >> 1)) { 1181 if (nodehdr.count > (state->node_ents >> 1)) {
883 *action = 0; /* blk over 50%, don't try to join */ 1182 *action = 0; /* blk over 50%, don't try to join */
884 return(0); /* blk over 50%, don't try to join */ 1183 return(0); /* blk over 50%, don't try to join */
885 } 1184 }
@@ -890,14 +1189,14 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
890 * coalesce it with a sibling block. We choose (arbitrarily) 1189 * coalesce it with a sibling block. We choose (arbitrarily)
891 * to merge with the forward block unless it is NULL. 1190 * to merge with the forward block unless it is NULL.
892 */ 1191 */
893 if (count == 0) { 1192 if (nodehdr.count == 0) {
894 /* 1193 /*
895 * Make altpath point to the block we want to keep and 1194 * Make altpath point to the block we want to keep and
896 * path point to the block we want to drop (this one). 1195 * path point to the block we want to drop (this one).
897 */ 1196 */
898 forward = (info->forw != 0); 1197 forward = (info->forw != 0);
899 memcpy(&state->altpath, &state->path, sizeof(state->path)); 1198 memcpy(&state->altpath, &state->path, sizeof(state->path));
900 error = xfs_da_path_shift(state, &state->altpath, forward, 1199 error = xfs_da3_path_shift(state, &state->altpath, forward,
901 0, &retval); 1200 0, &retval);
902 if (error) 1201 if (error)
903 return(error); 1202 return(error);
@@ -916,35 +1215,34 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
916 * We prefer coalescing with the lower numbered sibling so as 1215 * We prefer coalescing with the lower numbered sibling so as
917 * to shrink a directory over time. 1216 * to shrink a directory over time.
918 */ 1217 */
1218 count = state->node_ents;
1219 count -= state->node_ents >> 2;
1220 count -= nodehdr.count;
1221
919 /* start with smaller blk num */ 1222 /* start with smaller blk num */
920 forward = (be32_to_cpu(info->forw) < be32_to_cpu(info->back)); 1223 forward = nodehdr.forw < nodehdr.back;
921 for (i = 0; i < 2; forward = !forward, i++) { 1224 for (i = 0; i < 2; forward = !forward, i++) {
922 if (forward) 1225 if (forward)
923 blkno = be32_to_cpu(info->forw); 1226 blkno = nodehdr.forw;
924 else 1227 else
925 blkno = be32_to_cpu(info->back); 1228 blkno = nodehdr.back;
926 if (blkno == 0) 1229 if (blkno == 0)
927 continue; 1230 continue;
928 error = xfs_da_node_read(state->args->trans, state->args->dp, 1231 error = xfs_da3_node_read(state->args->trans, state->args->dp,
929 blkno, -1, &bp, state->args->whichfork); 1232 blkno, -1, &bp, state->args->whichfork);
930 if (error) 1233 if (error)
931 return(error); 1234 return(error);
932 ASSERT(bp != NULL);
933 1235
934 node = (xfs_da_intnode_t *)info;
935 count = state->node_ents;
936 count -= state->node_ents >> 2;
937 count -= be16_to_cpu(node->hdr.count);
938 node = bp->b_addr; 1236 node = bp->b_addr;
939 ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 1237 xfs_da3_node_hdr_from_disk(&nodehdr, node);
940 count -= be16_to_cpu(node->hdr.count);
941 xfs_trans_brelse(state->args->trans, bp); 1238 xfs_trans_brelse(state->args->trans, bp);
942 if (count >= 0) 1239
1240 if (count - nodehdr.count >= 0)
943 break; /* fits with at least 25% to spare */ 1241 break; /* fits with at least 25% to spare */
944 } 1242 }
945 if (i >= 2) { 1243 if (i >= 2) {
946 *action = 0; 1244 *action = 0;
947 return(0); 1245 return 0;
948 } 1246 }
949 1247
950 /* 1248 /*
@@ -953,28 +1251,42 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
953 */ 1251 */
954 memcpy(&state->altpath, &state->path, sizeof(state->path)); 1252 memcpy(&state->altpath, &state->path, sizeof(state->path));
955 if (blkno < blk->blkno) { 1253 if (blkno < blk->blkno) {
956 error = xfs_da_path_shift(state, &state->altpath, forward, 1254 error = xfs_da3_path_shift(state, &state->altpath, forward,
957 0, &retval); 1255 0, &retval);
958 if (error) {
959 return(error);
960 }
961 if (retval) {
962 *action = 0;
963 return(0);
964 }
965 } else { 1256 } else {
966 error = xfs_da_path_shift(state, &state->path, forward, 1257 error = xfs_da3_path_shift(state, &state->path, forward,
967 0, &retval); 1258 0, &retval);
968 if (error) { 1259 }
969 return(error); 1260 if (error)
970 } 1261 return error;
971 if (retval) { 1262 if (retval) {
972 *action = 0; 1263 *action = 0;
973 return(0); 1264 return 0;
974 }
975 } 1265 }
976 *action = 1; 1266 *action = 1;
977 return(0); 1267 return 0;
1268}
1269
1270/*
1271 * Pick up the last hashvalue from an intermediate node.
1272 */
1273STATIC uint
1274xfs_da3_node_lasthash(
1275 struct xfs_buf *bp,
1276 int *count)
1277{
1278 struct xfs_da_intnode *node;
1279 struct xfs_da_node_entry *btree;
1280 struct xfs_da3_icnode_hdr nodehdr;
1281
1282 node = bp->b_addr;
1283 xfs_da3_node_hdr_from_disk(&nodehdr, node);
1284 if (count)
1285 *count = nodehdr.count;
1286 if (!nodehdr.count)
1287 return 0;
1288 btree = xfs_da3_node_tree_p(node);
1289 return be32_to_cpu(btree[nodehdr.count - 1].hashval);
978} 1290}
979 1291
980/* 1292/*
@@ -982,13 +1294,16 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
982 * when we stop making changes, return. 1294 * when we stop making changes, return.
983 */ 1295 */
984void 1296void
985xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path) 1297xfs_da3_fixhashpath(
1298 struct xfs_da_state *state,
1299 struct xfs_da_state_path *path)
986{ 1300{
987 xfs_da_state_blk_t *blk; 1301 struct xfs_da_state_blk *blk;
988 xfs_da_intnode_t *node; 1302 struct xfs_da_intnode *node;
989 xfs_da_node_entry_t *btree; 1303 struct xfs_da_node_entry *btree;
990 xfs_dahash_t lasthash=0; 1304 xfs_dahash_t lasthash=0;
991 int level, count; 1305 int level;
1306 int count;
992 1307
993 trace_xfs_da_fixhashpath(state->args); 1308 trace_xfs_da_fixhashpath(state->args);
994 1309
@@ -1006,23 +1321,26 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
1006 return; 1321 return;
1007 break; 1322 break;
1008 case XFS_DA_NODE_MAGIC: 1323 case XFS_DA_NODE_MAGIC:
1009 lasthash = xfs_da_node_lasthash(blk->bp, &count); 1324 lasthash = xfs_da3_node_lasthash(blk->bp, &count);
1010 if (count == 0) 1325 if (count == 0)
1011 return; 1326 return;
1012 break; 1327 break;
1013 } 1328 }
1014 for (blk--, level--; level >= 0; blk--, level--) { 1329 for (blk--, level--; level >= 0; blk--, level--) {
1330 struct xfs_da3_icnode_hdr nodehdr;
1331
1015 node = blk->bp->b_addr; 1332 node = blk->bp->b_addr;
1016 ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 1333 xfs_da3_node_hdr_from_disk(&nodehdr, node);
1017 btree = &node->btree[ blk->index ]; 1334 btree = xfs_da3_node_tree_p(node);
1018 if (be32_to_cpu(btree->hashval) == lasthash) 1335 if (be32_to_cpu(btree->hashval) == lasthash)
1019 break; 1336 break;
1020 blk->hashval = lasthash; 1337 blk->hashval = lasthash;
1021 btree->hashval = cpu_to_be32(lasthash); 1338 btree[blk->index].hashval = cpu_to_be32(lasthash);
1022 xfs_trans_log_buf(state->args->trans, blk->bp, 1339 xfs_trans_log_buf(state->args->trans, blk->bp,
1023 XFS_DA_LOGRANGE(node, btree, sizeof(*btree))); 1340 XFS_DA_LOGRANGE(node, &btree[blk->index],
1341 sizeof(*btree)));
1024 1342
1025 lasthash = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval); 1343 lasthash = be32_to_cpu(btree[nodehdr.count - 1].hashval);
1026 } 1344 }
1027} 1345}
1028 1346
@@ -1030,104 +1348,120 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
1030 * Remove an entry from an intermediate node. 1348 * Remove an entry from an intermediate node.
1031 */ 1349 */
1032STATIC void 1350STATIC void
1033xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk) 1351xfs_da3_node_remove(
1352 struct xfs_da_state *state,
1353 struct xfs_da_state_blk *drop_blk)
1034{ 1354{
1035 xfs_da_intnode_t *node; 1355 struct xfs_da_intnode *node;
1036 xfs_da_node_entry_t *btree; 1356 struct xfs_da3_icnode_hdr nodehdr;
1037 int tmp; 1357 struct xfs_da_node_entry *btree;
1358 int index;
1359 int tmp;
1038 1360
1039 trace_xfs_da_node_remove(state->args); 1361 trace_xfs_da_node_remove(state->args);
1040 1362
1041 node = drop_blk->bp->b_addr; 1363 node = drop_blk->bp->b_addr;
1042 ASSERT(drop_blk->index < be16_to_cpu(node->hdr.count)); 1364 xfs_da3_node_hdr_from_disk(&nodehdr, node);
1365 ASSERT(drop_blk->index < nodehdr.count);
1043 ASSERT(drop_blk->index >= 0); 1366 ASSERT(drop_blk->index >= 0);
1044 1367
1045 /* 1368 /*
1046 * Copy over the offending entry, or just zero it out. 1369 * Copy over the offending entry, or just zero it out.
1047 */ 1370 */
1048 btree = &node->btree[drop_blk->index]; 1371 index = drop_blk->index;
1049 if (drop_blk->index < (be16_to_cpu(node->hdr.count)-1)) { 1372 btree = xfs_da3_node_tree_p(node);
1050 tmp = be16_to_cpu(node->hdr.count) - drop_blk->index - 1; 1373 if (index < nodehdr.count - 1) {
1374 tmp = nodehdr.count - index - 1;
1051 tmp *= (uint)sizeof(xfs_da_node_entry_t); 1375 tmp *= (uint)sizeof(xfs_da_node_entry_t);
1052 memmove(btree, btree + 1, tmp); 1376 memmove(&btree[index], &btree[index + 1], tmp);
1053 xfs_trans_log_buf(state->args->trans, drop_blk->bp, 1377 xfs_trans_log_buf(state->args->trans, drop_blk->bp,
1054 XFS_DA_LOGRANGE(node, btree, tmp)); 1378 XFS_DA_LOGRANGE(node, &btree[index], tmp));
1055 btree = &node->btree[be16_to_cpu(node->hdr.count)-1]; 1379 index = nodehdr.count - 1;
1056 } 1380 }
1057 memset((char *)btree, 0, sizeof(xfs_da_node_entry_t)); 1381 memset(&btree[index], 0, sizeof(xfs_da_node_entry_t));
1058 xfs_trans_log_buf(state->args->trans, drop_blk->bp, 1382 xfs_trans_log_buf(state->args->trans, drop_blk->bp,
1059 XFS_DA_LOGRANGE(node, btree, sizeof(*btree))); 1383 XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index])));
1060 be16_add_cpu(&node->hdr.count, -1); 1384 nodehdr.count -= 1;
1385 xfs_da3_node_hdr_to_disk(node, &nodehdr);
1061 xfs_trans_log_buf(state->args->trans, drop_blk->bp, 1386 xfs_trans_log_buf(state->args->trans, drop_blk->bp,
1062 XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr))); 1387 XFS_DA_LOGRANGE(node, &node->hdr, xfs_da3_node_hdr_size(node)));
1063 1388
1064 /* 1389 /*
1065 * Copy the last hash value from the block to propagate upwards. 1390 * Copy the last hash value from the block to propagate upwards.
1066 */ 1391 */
1067 btree--; 1392 drop_blk->hashval = be32_to_cpu(btree[index - 1].hashval);
1068 drop_blk->hashval = be32_to_cpu(btree->hashval);
1069} 1393}
1070 1394
1071/* 1395/*
1072 * Unbalance the btree elements between two intermediate nodes, 1396 * Unbalance the elements between two intermediate nodes,
1073 * move all Btree elements from one node into another. 1397 * move all Btree elements from one node into another.
1074 */ 1398 */
1075STATIC void 1399STATIC void
1076xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, 1400xfs_da3_node_unbalance(
1077 xfs_da_state_blk_t *save_blk) 1401 struct xfs_da_state *state,
1402 struct xfs_da_state_blk *drop_blk,
1403 struct xfs_da_state_blk *save_blk)
1078{ 1404{
1079 xfs_da_intnode_t *drop_node, *save_node; 1405 struct xfs_da_intnode *drop_node;
1080 xfs_da_node_entry_t *btree; 1406 struct xfs_da_intnode *save_node;
1081 int tmp; 1407 struct xfs_da_node_entry *drop_btree;
1082 xfs_trans_t *tp; 1408 struct xfs_da_node_entry *save_btree;
1409 struct xfs_da3_icnode_hdr drop_hdr;
1410 struct xfs_da3_icnode_hdr save_hdr;
1411 struct xfs_trans *tp;
1412 int sindex;
1413 int tmp;
1083 1414
1084 trace_xfs_da_node_unbalance(state->args); 1415 trace_xfs_da_node_unbalance(state->args);
1085 1416
1086 drop_node = drop_blk->bp->b_addr; 1417 drop_node = drop_blk->bp->b_addr;
1087 save_node = save_blk->bp->b_addr; 1418 save_node = save_blk->bp->b_addr;
1088 ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 1419 xfs_da3_node_hdr_from_disk(&drop_hdr, drop_node);
1089 ASSERT(save_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 1420 xfs_da3_node_hdr_from_disk(&save_hdr, save_node);
1421 drop_btree = xfs_da3_node_tree_p(drop_node);
1422 save_btree = xfs_da3_node_tree_p(save_node);
1090 tp = state->args->trans; 1423 tp = state->args->trans;
1091 1424
1092 /* 1425 /*
1093 * If the dying block has lower hashvals, then move all the 1426 * If the dying block has lower hashvals, then move all the
1094 * elements in the remaining block up to make a hole. 1427 * elements in the remaining block up to make a hole.
1095 */ 1428 */
1096 if ((be32_to_cpu(drop_node->btree[0].hashval) < be32_to_cpu(save_node->btree[ 0 ].hashval)) || 1429 if ((be32_to_cpu(drop_btree[0].hashval) <
1097 (be32_to_cpu(drop_node->btree[be16_to_cpu(drop_node->hdr.count)-1].hashval) < 1430 be32_to_cpu(save_btree[0].hashval)) ||
1098 be32_to_cpu(save_node->btree[be16_to_cpu(save_node->hdr.count)-1].hashval))) 1431 (be32_to_cpu(drop_btree[drop_hdr.count - 1].hashval) <
1099 { 1432 be32_to_cpu(save_btree[save_hdr.count - 1].hashval))) {
1100 btree = &save_node->btree[be16_to_cpu(drop_node->hdr.count)]; 1433 /* XXX: check this - is memmove dst correct? */
1101 tmp = be16_to_cpu(save_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t); 1434 tmp = save_hdr.count * sizeof(xfs_da_node_entry_t);
1102 memmove(btree, &save_node->btree[0], tmp); 1435 memmove(&save_btree[drop_hdr.count], &save_btree[0], tmp);
1103 btree = &save_node->btree[0]; 1436
1437 sindex = 0;
1104 xfs_trans_log_buf(tp, save_blk->bp, 1438 xfs_trans_log_buf(tp, save_blk->bp,
1105 XFS_DA_LOGRANGE(save_node, btree, 1439 XFS_DA_LOGRANGE(save_node, &save_btree[0],
1106 (be16_to_cpu(save_node->hdr.count) + be16_to_cpu(drop_node->hdr.count)) * 1440 (save_hdr.count + drop_hdr.count) *
1107 sizeof(xfs_da_node_entry_t))); 1441 sizeof(xfs_da_node_entry_t)));
1108 } else { 1442 } else {
1109 btree = &save_node->btree[be16_to_cpu(save_node->hdr.count)]; 1443 sindex = save_hdr.count;
1110 xfs_trans_log_buf(tp, save_blk->bp, 1444 xfs_trans_log_buf(tp, save_blk->bp,
1111 XFS_DA_LOGRANGE(save_node, btree, 1445 XFS_DA_LOGRANGE(save_node, &save_btree[sindex],
1112 be16_to_cpu(drop_node->hdr.count) * 1446 drop_hdr.count * sizeof(xfs_da_node_entry_t)));
1113 sizeof(xfs_da_node_entry_t)));
1114 } 1447 }
1115 1448
1116 /* 1449 /*
1117 * Move all the B-tree elements from drop_blk to save_blk. 1450 * Move all the B-tree elements from drop_blk to save_blk.
1118 */ 1451 */
1119 tmp = be16_to_cpu(drop_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t); 1452 tmp = drop_hdr.count * (uint)sizeof(xfs_da_node_entry_t);
1120 memcpy(btree, &drop_node->btree[0], tmp); 1453 memcpy(&save_btree[sindex], &drop_btree[0], tmp);
1121 be16_add_cpu(&save_node->hdr.count, be16_to_cpu(drop_node->hdr.count)); 1454 save_hdr.count += drop_hdr.count;
1122 1455
1456 xfs_da3_node_hdr_to_disk(save_node, &save_hdr);
1123 xfs_trans_log_buf(tp, save_blk->bp, 1457 xfs_trans_log_buf(tp, save_blk->bp,
1124 XFS_DA_LOGRANGE(save_node, &save_node->hdr, 1458 XFS_DA_LOGRANGE(save_node, &save_node->hdr,
1125 sizeof(save_node->hdr))); 1459 xfs_da3_node_hdr_size(save_node)));
1126 1460
1127 /* 1461 /*
1128 * Save the last hashval in the remaining block for upward propagation. 1462 * Save the last hashval in the remaining block for upward propagation.
1129 */ 1463 */
1130 save_blk->hashval = be32_to_cpu(save_node->btree[be16_to_cpu(save_node->hdr.count)-1].hashval); 1464 save_blk->hashval = be32_to_cpu(save_btree[save_hdr.count - 1].hashval);
1131} 1465}
1132 1466
1133/*======================================================================== 1467/*========================================================================
@@ -1146,16 +1480,24 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1146 * pruned depth-first tree search. 1480 * pruned depth-first tree search.
1147 */ 1481 */
1148int /* error */ 1482int /* error */
1149xfs_da_node_lookup_int(xfs_da_state_t *state, int *result) 1483xfs_da3_node_lookup_int(
1484 struct xfs_da_state *state,
1485 int *result)
1150{ 1486{
1151 xfs_da_state_blk_t *blk; 1487 struct xfs_da_state_blk *blk;
1152 xfs_da_blkinfo_t *curr; 1488 struct xfs_da_blkinfo *curr;
1153 xfs_da_intnode_t *node; 1489 struct xfs_da_intnode *node;
1154 xfs_da_node_entry_t *btree; 1490 struct xfs_da_node_entry *btree;
1155 xfs_dablk_t blkno; 1491 struct xfs_da3_icnode_hdr nodehdr;
1156 int probe, span, max, error, retval; 1492 struct xfs_da_args *args;
1157 xfs_dahash_t hashval, btreehashval; 1493 xfs_dablk_t blkno;
1158 xfs_da_args_t *args; 1494 xfs_dahash_t hashval;
1495 xfs_dahash_t btreehashval;
1496 int probe;
1497 int span;
1498 int max;
1499 int error;
1500 int retval;
1159 1501
1160 args = state->args; 1502 args = state->args;
1161 1503
@@ -1171,7 +1513,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1171 * Read the next node down in the tree. 1513 * Read the next node down in the tree.
1172 */ 1514 */
1173 blk->blkno = blkno; 1515 blk->blkno = blkno;
1174 error = xfs_da_node_read(args->trans, args->dp, blkno, 1516 error = xfs_da3_node_read(args->trans, args->dp, blkno,
1175 -1, &blk->bp, args->whichfork); 1517 -1, &blk->bp, args->whichfork);
1176 if (error) { 1518 if (error) {
1177 blk->blkno = 0; 1519 blk->blkno = 0;
@@ -1180,66 +1522,75 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1180 } 1522 }
1181 curr = blk->bp->b_addr; 1523 curr = blk->bp->b_addr;
1182 blk->magic = be16_to_cpu(curr->magic); 1524 blk->magic = be16_to_cpu(curr->magic);
1183 ASSERT(blk->magic == XFS_DA_NODE_MAGIC || 1525
1184 blk->magic == XFS_DIR2_LEAFN_MAGIC || 1526 if (blk->magic == XFS_ATTR_LEAF_MAGIC ||
1185 blk->magic == XFS_ATTR_LEAF_MAGIC); 1527 blk->magic == XFS_ATTR3_LEAF_MAGIC) {
1528 blk->magic = XFS_ATTR_LEAF_MAGIC;
1529 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
1530 break;
1531 }
1532
1533 if (blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1534 blk->magic == XFS_DIR3_LEAFN_MAGIC) {
1535 blk->magic = XFS_DIR2_LEAFN_MAGIC;
1536 blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL);
1537 break;
1538 }
1539
1540 blk->magic = XFS_DA_NODE_MAGIC;
1541
1186 1542
1187 /* 1543 /*
1188 * Search an intermediate node for a match. 1544 * Search an intermediate node for a match.
1189 */ 1545 */
1190 if (blk->magic == XFS_DA_NODE_MAGIC) { 1546 node = blk->bp->b_addr;
1191 node = blk->bp->b_addr; 1547 xfs_da3_node_hdr_from_disk(&nodehdr, node);
1192 max = be16_to_cpu(node->hdr.count); 1548 btree = xfs_da3_node_tree_p(node);
1193 blk->hashval = be32_to_cpu(node->btree[max-1].hashval);
1194 1549
1195 /* 1550 max = nodehdr.count;
1196 * Binary search. (note: small blocks will skip loop) 1551 blk->hashval = be32_to_cpu(btree[max - 1].hashval);
1197 */
1198 probe = span = max / 2;
1199 hashval = args->hashval;
1200 for (btree = &node->btree[probe]; span > 4;
1201 btree = &node->btree[probe]) {
1202 span /= 2;
1203 btreehashval = be32_to_cpu(btree->hashval);
1204 if (btreehashval < hashval)
1205 probe += span;
1206 else if (btreehashval > hashval)
1207 probe -= span;
1208 else
1209 break;
1210 }
1211 ASSERT((probe >= 0) && (probe < max));
1212 ASSERT((span <= 4) || (be32_to_cpu(btree->hashval) == hashval));
1213 1552
1214 /* 1553 /*
1215 * Since we may have duplicate hashval's, find the first 1554 * Binary search. (note: small blocks will skip loop)
1216 * matching hashval in the node. 1555 */
1217 */ 1556 probe = span = max / 2;
1218 while ((probe > 0) && (be32_to_cpu(btree->hashval) >= hashval)) { 1557 hashval = args->hashval;
1219 btree--; 1558 while (span > 4) {
1220 probe--; 1559 span /= 2;
1221 } 1560 btreehashval = be32_to_cpu(btree[probe].hashval);
1222 while ((probe < max) && (be32_to_cpu(btree->hashval) < hashval)) { 1561 if (btreehashval < hashval)
1223 btree++; 1562 probe += span;
1224 probe++; 1563 else if (btreehashval > hashval)
1225 } 1564 probe -= span;
1565 else
1566 break;
1567 }
1568 ASSERT((probe >= 0) && (probe < max));
1569 ASSERT((span <= 4) ||
1570 (be32_to_cpu(btree[probe].hashval) == hashval));
1226 1571
1227 /* 1572 /*
1228 * Pick the right block to descend on. 1573 * Since we may have duplicate hashval's, find the first
1229 */ 1574 * matching hashval in the node.
1230 if (probe == max) { 1575 */
1231 blk->index = max-1; 1576 while (probe > 0 &&
1232 blkno = be32_to_cpu(node->btree[max-1].before); 1577 be32_to_cpu(btree[probe].hashval) >= hashval) {
1233 } else { 1578 probe--;
1234 blk->index = probe; 1579 }
1235 blkno = be32_to_cpu(btree->before); 1580 while (probe < max &&
1236 } 1581 be32_to_cpu(btree[probe].hashval) < hashval) {
1237 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { 1582 probe++;
1238 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); 1583 }
1239 break; 1584
1240 } else if (blk->magic == XFS_DIR2_LEAFN_MAGIC) { 1585 /*
1241 blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL); 1586 * Pick the right block to descend on.
1242 break; 1587 */
1588 if (probe == max) {
1589 blk->index = max - 1;
1590 blkno = be32_to_cpu(btree[max - 1].before);
1591 } else {
1592 blk->index = probe;
1593 blkno = be32_to_cpu(btree[probe].before);
1243 } 1594 }
1244 } 1595 }
1245 1596
@@ -1254,7 +1605,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1254 retval = xfs_dir2_leafn_lookup_int(blk->bp, args, 1605 retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
1255 &blk->index, state); 1606 &blk->index, state);
1256 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { 1607 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1257 retval = xfs_attr_leaf_lookup_int(blk->bp, args); 1608 retval = xfs_attr3_leaf_lookup_int(blk->bp, args);
1258 blk->index = args->index; 1609 blk->index = args->index;
1259 args->blkno = blk->blkno; 1610 args->blkno = blk->blkno;
1260 } else { 1611 } else {
@@ -1263,7 +1614,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1263 } 1614 }
1264 if (((retval == ENOENT) || (retval == ENOATTR)) && 1615 if (((retval == ENOENT) || (retval == ENOATTR)) &&
1265 (blk->hashval == args->hashval)) { 1616 (blk->hashval == args->hashval)) {
1266 error = xfs_da_path_shift(state, &state->path, 1, 1, 1617 error = xfs_da3_path_shift(state, &state->path, 1, 1,
1267 &retval); 1618 &retval);
1268 if (error) 1619 if (error)
1269 return(error); 1620 return(error);
@@ -1285,16 +1636,52 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1285 *========================================================================*/ 1636 *========================================================================*/
1286 1637
1287/* 1638/*
1639 * Compare two intermediate nodes for "order".
1640 */
1641STATIC int
1642xfs_da3_node_order(
1643 struct xfs_buf *node1_bp,
1644 struct xfs_buf *node2_bp)
1645{
1646 struct xfs_da_intnode *node1;
1647 struct xfs_da_intnode *node2;
1648 struct xfs_da_node_entry *btree1;
1649 struct xfs_da_node_entry *btree2;
1650 struct xfs_da3_icnode_hdr node1hdr;
1651 struct xfs_da3_icnode_hdr node2hdr;
1652
1653 node1 = node1_bp->b_addr;
1654 node2 = node2_bp->b_addr;
1655 xfs_da3_node_hdr_from_disk(&node1hdr, node1);
1656 xfs_da3_node_hdr_from_disk(&node2hdr, node2);
1657 btree1 = xfs_da3_node_tree_p(node1);
1658 btree2 = xfs_da3_node_tree_p(node2);
1659
1660 if (node1hdr.count > 0 && node2hdr.count > 0 &&
1661 ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) ||
1662 (be32_to_cpu(btree2[node2hdr.count - 1].hashval) <
1663 be32_to_cpu(btree1[node1hdr.count - 1].hashval)))) {
1664 return 1;
1665 }
1666 return 0;
1667}
1668
1669/*
1288 * Link a new block into a doubly linked list of blocks (of whatever type). 1670 * Link a new block into a doubly linked list of blocks (of whatever type).
1289 */ 1671 */
1290int /* error */ 1672int /* error */
1291xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, 1673xfs_da3_blk_link(
1292 xfs_da_state_blk_t *new_blk) 1674 struct xfs_da_state *state,
1675 struct xfs_da_state_blk *old_blk,
1676 struct xfs_da_state_blk *new_blk)
1293{ 1677{
1294 xfs_da_blkinfo_t *old_info, *new_info, *tmp_info; 1678 struct xfs_da_blkinfo *old_info;
1295 xfs_da_args_t *args; 1679 struct xfs_da_blkinfo *new_info;
1296 int before=0, error; 1680 struct xfs_da_blkinfo *tmp_info;
1297 struct xfs_buf *bp; 1681 struct xfs_da_args *args;
1682 struct xfs_buf *bp;
1683 int before = 0;
1684 int error;
1298 1685
1299 /* 1686 /*
1300 * Set up environment. 1687 * Set up environment.
@@ -1306,9 +1693,6 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
1306 ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC || 1693 ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
1307 old_blk->magic == XFS_DIR2_LEAFN_MAGIC || 1694 old_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1308 old_blk->magic == XFS_ATTR_LEAF_MAGIC); 1695 old_blk->magic == XFS_ATTR_LEAF_MAGIC);
1309 ASSERT(old_blk->magic == be16_to_cpu(old_info->magic));
1310 ASSERT(new_blk->magic == be16_to_cpu(new_info->magic));
1311 ASSERT(old_blk->magic == new_blk->magic);
1312 1696
1313 switch (old_blk->magic) { 1697 switch (old_blk->magic) {
1314 case XFS_ATTR_LEAF_MAGIC: 1698 case XFS_ATTR_LEAF_MAGIC:
@@ -1318,7 +1702,7 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
1318 before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp); 1702 before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp);
1319 break; 1703 break;
1320 case XFS_DA_NODE_MAGIC: 1704 case XFS_DA_NODE_MAGIC:
1321 before = xfs_da_node_order(old_blk->bp, new_blk->bp); 1705 before = xfs_da3_node_order(old_blk->bp, new_blk->bp);
1322 break; 1706 break;
1323 } 1707 }
1324 1708
@@ -1333,14 +1717,14 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
1333 new_info->forw = cpu_to_be32(old_blk->blkno); 1717 new_info->forw = cpu_to_be32(old_blk->blkno);
1334 new_info->back = old_info->back; 1718 new_info->back = old_info->back;
1335 if (old_info->back) { 1719 if (old_info->back) {
1336 error = xfs_da_node_read(args->trans, args->dp, 1720 error = xfs_da3_node_read(args->trans, args->dp,
1337 be32_to_cpu(old_info->back), 1721 be32_to_cpu(old_info->back),
1338 -1, &bp, args->whichfork); 1722 -1, &bp, args->whichfork);
1339 if (error) 1723 if (error)
1340 return(error); 1724 return(error);
1341 ASSERT(bp != NULL); 1725 ASSERT(bp != NULL);
1342 tmp_info = bp->b_addr; 1726 tmp_info = bp->b_addr;
1343 ASSERT(be16_to_cpu(tmp_info->magic) == be16_to_cpu(old_info->magic)); 1727 ASSERT(tmp_info->magic == old_info->magic);
1344 ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno); 1728 ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno);
1345 tmp_info->forw = cpu_to_be32(new_blk->blkno); 1729 tmp_info->forw = cpu_to_be32(new_blk->blkno);
1346 xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); 1730 xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
@@ -1354,7 +1738,7 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
1354 new_info->forw = old_info->forw; 1738 new_info->forw = old_info->forw;
1355 new_info->back = cpu_to_be32(old_blk->blkno); 1739 new_info->back = cpu_to_be32(old_blk->blkno);
1356 if (old_info->forw) { 1740 if (old_info->forw) {
1357 error = xfs_da_node_read(args->trans, args->dp, 1741 error = xfs_da3_node_read(args->trans, args->dp,
1358 be32_to_cpu(old_info->forw), 1742 be32_to_cpu(old_info->forw),
1359 -1, &bp, args->whichfork); 1743 -1, &bp, args->whichfork);
1360 if (error) 1744 if (error)
@@ -1375,59 +1759,20 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
1375} 1759}
1376 1760
1377/* 1761/*
1378 * Compare two intermediate nodes for "order".
1379 */
1380STATIC int
1381xfs_da_node_order(
1382 struct xfs_buf *node1_bp,
1383 struct xfs_buf *node2_bp)
1384{
1385 xfs_da_intnode_t *node1, *node2;
1386
1387 node1 = node1_bp->b_addr;
1388 node2 = node2_bp->b_addr;
1389 ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) &&
1390 node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
1391 if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) &&
1392 ((be32_to_cpu(node2->btree[0].hashval) <
1393 be32_to_cpu(node1->btree[0].hashval)) ||
1394 (be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval) <
1395 be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval)))) {
1396 return(1);
1397 }
1398 return(0);
1399}
1400
1401/*
1402 * Pick up the last hashvalue from an intermediate node.
1403 */
1404STATIC uint
1405xfs_da_node_lasthash(
1406 struct xfs_buf *bp,
1407 int *count)
1408{
1409 xfs_da_intnode_t *node;
1410
1411 node = bp->b_addr;
1412 ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
1413 if (count)
1414 *count = be16_to_cpu(node->hdr.count);
1415 if (!node->hdr.count)
1416 return(0);
1417 return be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
1418}
1419
1420/*
1421 * Unlink a block from a doubly linked list of blocks. 1762 * Unlink a block from a doubly linked list of blocks.
1422 */ 1763 */
1423STATIC int /* error */ 1764STATIC int /* error */
1424xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, 1765xfs_da3_blk_unlink(
1425 xfs_da_state_blk_t *save_blk) 1766 struct xfs_da_state *state,
1767 struct xfs_da_state_blk *drop_blk,
1768 struct xfs_da_state_blk *save_blk)
1426{ 1769{
1427 xfs_da_blkinfo_t *drop_info, *save_info, *tmp_info; 1770 struct xfs_da_blkinfo *drop_info;
1428 xfs_da_args_t *args; 1771 struct xfs_da_blkinfo *save_info;
1429 struct xfs_buf *bp; 1772 struct xfs_da_blkinfo *tmp_info;
1430 int error; 1773 struct xfs_da_args *args;
1774 struct xfs_buf *bp;
1775 int error;
1431 1776
1432 /* 1777 /*
1433 * Set up environment. 1778 * Set up environment.
@@ -1439,8 +1784,6 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1439 ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC || 1784 ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
1440 save_blk->magic == XFS_DIR2_LEAFN_MAGIC || 1785 save_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1441 save_blk->magic == XFS_ATTR_LEAF_MAGIC); 1786 save_blk->magic == XFS_ATTR_LEAF_MAGIC);
1442 ASSERT(save_blk->magic == be16_to_cpu(save_info->magic));
1443 ASSERT(drop_blk->magic == be16_to_cpu(drop_info->magic));
1444 ASSERT(save_blk->magic == drop_blk->magic); 1787 ASSERT(save_blk->magic == drop_blk->magic);
1445 ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) || 1788 ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) ||
1446 (be32_to_cpu(save_info->back) == drop_blk->blkno)); 1789 (be32_to_cpu(save_info->back) == drop_blk->blkno));
@@ -1454,7 +1797,7 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1454 trace_xfs_da_unlink_back(args); 1797 trace_xfs_da_unlink_back(args);
1455 save_info->back = drop_info->back; 1798 save_info->back = drop_info->back;
1456 if (drop_info->back) { 1799 if (drop_info->back) {
1457 error = xfs_da_node_read(args->trans, args->dp, 1800 error = xfs_da3_node_read(args->trans, args->dp,
1458 be32_to_cpu(drop_info->back), 1801 be32_to_cpu(drop_info->back),
1459 -1, &bp, args->whichfork); 1802 -1, &bp, args->whichfork);
1460 if (error) 1803 if (error)
@@ -1471,7 +1814,7 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1471 trace_xfs_da_unlink_forward(args); 1814 trace_xfs_da_unlink_forward(args);
1472 save_info->forw = drop_info->forw; 1815 save_info->forw = drop_info->forw;
1473 if (drop_info->forw) { 1816 if (drop_info->forw) {
1474 error = xfs_da_node_read(args->trans, args->dp, 1817 error = xfs_da3_node_read(args->trans, args->dp,
1475 be32_to_cpu(drop_info->forw), 1818 be32_to_cpu(drop_info->forw),
1476 -1, &bp, args->whichfork); 1819 -1, &bp, args->whichfork);
1477 if (error) 1820 if (error)
@@ -1499,15 +1842,22 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1499 * the new bottom and the root. 1842 * the new bottom and the root.
1500 */ 1843 */
1501int /* error */ 1844int /* error */
1502xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, 1845xfs_da3_path_shift(
1503 int forward, int release, int *result) 1846 struct xfs_da_state *state,
1847 struct xfs_da_state_path *path,
1848 int forward,
1849 int release,
1850 int *result)
1504{ 1851{
1505 xfs_da_state_blk_t *blk; 1852 struct xfs_da_state_blk *blk;
1506 xfs_da_blkinfo_t *info; 1853 struct xfs_da_blkinfo *info;
1507 xfs_da_intnode_t *node; 1854 struct xfs_da_intnode *node;
1508 xfs_da_args_t *args; 1855 struct xfs_da_args *args;
1509 xfs_dablk_t blkno=0; 1856 struct xfs_da_node_entry *btree;
1510 int level, error; 1857 struct xfs_da3_icnode_hdr nodehdr;
1858 xfs_dablk_t blkno = 0;
1859 int level;
1860 int error;
1511 1861
1512 trace_xfs_da_path_shift(state->args); 1862 trace_xfs_da_path_shift(state->args);
1513 1863
@@ -1522,16 +1872,17 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
1522 ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); 1872 ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1523 level = (path->active-1) - 1; /* skip bottom layer in path */ 1873 level = (path->active-1) - 1; /* skip bottom layer in path */
1524 for (blk = &path->blk[level]; level >= 0; blk--, level--) { 1874 for (blk = &path->blk[level]; level >= 0; blk--, level--) {
1525 ASSERT(blk->bp != NULL);
1526 node = blk->bp->b_addr; 1875 node = blk->bp->b_addr;
1527 ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 1876 xfs_da3_node_hdr_from_disk(&nodehdr, node);
1528 if (forward && (blk->index < be16_to_cpu(node->hdr.count)-1)) { 1877 btree = xfs_da3_node_tree_p(node);
1878
1879 if (forward && (blk->index < nodehdr.count - 1)) {
1529 blk->index++; 1880 blk->index++;
1530 blkno = be32_to_cpu(node->btree[blk->index].before); 1881 blkno = be32_to_cpu(btree[blk->index].before);
1531 break; 1882 break;
1532 } else if (!forward && (blk->index > 0)) { 1883 } else if (!forward && (blk->index > 0)) {
1533 blk->index--; 1884 blk->index--;
1534 blkno = be32_to_cpu(node->btree[blk->index].before); 1885 blkno = be32_to_cpu(btree[blk->index].before);
1535 break; 1886 break;
1536 } 1887 }
1537 } 1888 }
@@ -1557,45 +1908,60 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
1557 * Read the next child block. 1908 * Read the next child block.
1558 */ 1909 */
1559 blk->blkno = blkno; 1910 blk->blkno = blkno;
1560 error = xfs_da_node_read(args->trans, args->dp, blkno, -1, 1911 error = xfs_da3_node_read(args->trans, args->dp, blkno, -1,
1561 &blk->bp, args->whichfork); 1912 &blk->bp, args->whichfork);
1562 if (error) 1913 if (error)
1563 return(error); 1914 return(error);
1564 ASSERT(blk->bp != NULL);
1565 info = blk->bp->b_addr; 1915 info = blk->bp->b_addr;
1566 ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || 1916 ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
1917 info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||
1567 info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || 1918 info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
1568 info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 1919 info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) ||
1569 blk->magic = be16_to_cpu(info->magic); 1920 info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) ||
1570 if (blk->magic == XFS_DA_NODE_MAGIC) { 1921 info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC));
1922
1923
1924 /*
1925 * Note: we flatten the magic number to a single type so we
1926 * don't have to compare against crc/non-crc types elsewhere.
1927 */
1928 switch (be16_to_cpu(info->magic)) {
1929 case XFS_DA_NODE_MAGIC:
1930 case XFS_DA3_NODE_MAGIC:
1931 blk->magic = XFS_DA_NODE_MAGIC;
1571 node = (xfs_da_intnode_t *)info; 1932 node = (xfs_da_intnode_t *)info;
1572 blk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval); 1933 xfs_da3_node_hdr_from_disk(&nodehdr, node);
1934 btree = xfs_da3_node_tree_p(node);
1935 blk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval);
1573 if (forward) 1936 if (forward)
1574 blk->index = 0; 1937 blk->index = 0;
1575 else 1938 else
1576 blk->index = be16_to_cpu(node->hdr.count)-1; 1939 blk->index = nodehdr.count - 1;
1577 blkno = be32_to_cpu(node->btree[blk->index].before); 1940 blkno = be32_to_cpu(btree[blk->index].before);
1578 } else { 1941 break;
1942 case XFS_ATTR_LEAF_MAGIC:
1943 case XFS_ATTR3_LEAF_MAGIC:
1944 blk->magic = XFS_ATTR_LEAF_MAGIC;
1579 ASSERT(level == path->active-1); 1945 ASSERT(level == path->active-1);
1580 blk->index = 0; 1946 blk->index = 0;
1581 switch(blk->magic) { 1947 blk->hashval = xfs_attr_leaf_lasthash(blk->bp,
1582 case XFS_ATTR_LEAF_MAGIC: 1948 NULL);
1583 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, 1949 break;
1584 NULL); 1950 case XFS_DIR2_LEAFN_MAGIC:
1585 break; 1951 case XFS_DIR3_LEAFN_MAGIC:
1586 case XFS_DIR2_LEAFN_MAGIC: 1952 blk->magic = XFS_DIR2_LEAFN_MAGIC;
1587 blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, 1953 ASSERT(level == path->active-1);
1588 NULL); 1954 blk->index = 0;
1589 break; 1955 blk->hashval = xfs_dir2_leafn_lasthash(blk->bp,
1590 default: 1956 NULL);
1591 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC || 1957 break;
1592 blk->magic == XFS_DIR2_LEAFN_MAGIC); 1958 default:
1593 break; 1959 ASSERT(0);
1594 } 1960 break;
1595 } 1961 }
1596 } 1962 }
1597 *result = 0; 1963 *result = 0;
1598 return(0); 1964 return 0;
1599} 1965}
1600 1966
1601 1967
@@ -1782,22 +2148,36 @@ xfs_da_grow_inode(
1782 * a bmap btree split to do that. 2148 * a bmap btree split to do that.
1783 */ 2149 */
1784STATIC int 2150STATIC int
1785xfs_da_swap_lastblock( 2151xfs_da3_swap_lastblock(
1786 xfs_da_args_t *args, 2152 struct xfs_da_args *args,
1787 xfs_dablk_t *dead_blknop, 2153 xfs_dablk_t *dead_blknop,
1788 struct xfs_buf **dead_bufp) 2154 struct xfs_buf **dead_bufp)
1789{ 2155{
1790 xfs_dablk_t dead_blkno, last_blkno, sib_blkno, par_blkno; 2156 struct xfs_da_blkinfo *dead_info;
1791 struct xfs_buf *dead_buf, *last_buf, *sib_buf, *par_buf; 2157 struct xfs_da_blkinfo *sib_info;
1792 xfs_fileoff_t lastoff; 2158 struct xfs_da_intnode *par_node;
1793 xfs_inode_t *ip; 2159 struct xfs_da_intnode *dead_node;
1794 xfs_trans_t *tp; 2160 struct xfs_dir2_leaf *dead_leaf2;
1795 xfs_mount_t *mp; 2161 struct xfs_da_node_entry *btree;
1796 int error, w, entno, level, dead_level; 2162 struct xfs_da3_icnode_hdr par_hdr;
1797 xfs_da_blkinfo_t *dead_info, *sib_info; 2163 struct xfs_inode *ip;
1798 xfs_da_intnode_t *par_node, *dead_node; 2164 struct xfs_trans *tp;
1799 xfs_dir2_leaf_t *dead_leaf2; 2165 struct xfs_mount *mp;
1800 xfs_dahash_t dead_hash; 2166 struct xfs_buf *dead_buf;
2167 struct xfs_buf *last_buf;
2168 struct xfs_buf *sib_buf;
2169 struct xfs_buf *par_buf;
2170 xfs_dahash_t dead_hash;
2171 xfs_fileoff_t lastoff;
2172 xfs_dablk_t dead_blkno;
2173 xfs_dablk_t last_blkno;
2174 xfs_dablk_t sib_blkno;
2175 xfs_dablk_t par_blkno;
2176 int error;
2177 int w;
2178 int entno;
2179 int level;
2180 int dead_level;
1801 2181
1802 trace_xfs_da_swap_lastblock(args); 2182 trace_xfs_da_swap_lastblock(args);
1803 2183
@@ -1821,7 +2201,7 @@ xfs_da_swap_lastblock(
1821 * Read the last block in the btree space. 2201 * Read the last block in the btree space.
1822 */ 2202 */
1823 last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs; 2203 last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs;
1824 error = xfs_da_node_read(tp, ip, last_blkno, -1, &last_buf, w); 2204 error = xfs_da3_node_read(tp, ip, last_blkno, -1, &last_buf, w);
1825 if (error) 2205 if (error)
1826 return error; 2206 return error;
1827 /* 2207 /*
@@ -1833,22 +2213,31 @@ xfs_da_swap_lastblock(
1833 /* 2213 /*
1834 * Get values from the moved block. 2214 * Get values from the moved block.
1835 */ 2215 */
1836 if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) { 2216 if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
2217 dead_info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
2218 struct xfs_dir3_icleaf_hdr leafhdr;
2219 struct xfs_dir2_leaf_entry *ents;
2220
1837 dead_leaf2 = (xfs_dir2_leaf_t *)dead_info; 2221 dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
2222 xfs_dir3_leaf_hdr_from_disk(&leafhdr, dead_leaf2);
2223 ents = xfs_dir3_leaf_ents_p(dead_leaf2);
1838 dead_level = 0; 2224 dead_level = 0;
1839 dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval); 2225 dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval);
1840 } else { 2226 } else {
1841 ASSERT(dead_info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 2227 struct xfs_da3_icnode_hdr deadhdr;
2228
1842 dead_node = (xfs_da_intnode_t *)dead_info; 2229 dead_node = (xfs_da_intnode_t *)dead_info;
1843 dead_level = be16_to_cpu(dead_node->hdr.level); 2230 xfs_da3_node_hdr_from_disk(&deadhdr, dead_node);
1844 dead_hash = be32_to_cpu(dead_node->btree[be16_to_cpu(dead_node->hdr.count) - 1].hashval); 2231 btree = xfs_da3_node_tree_p(dead_node);
2232 dead_level = deadhdr.level;
2233 dead_hash = be32_to_cpu(btree[deadhdr.count - 1].hashval);
1845 } 2234 }
1846 sib_buf = par_buf = NULL; 2235 sib_buf = par_buf = NULL;
1847 /* 2236 /*
1848 * If the moved block has a left sibling, fix up the pointers. 2237 * If the moved block has a left sibling, fix up the pointers.
1849 */ 2238 */
1850 if ((sib_blkno = be32_to_cpu(dead_info->back))) { 2239 if ((sib_blkno = be32_to_cpu(dead_info->back))) {
1851 error = xfs_da_node_read(tp, ip, sib_blkno, -1, &sib_buf, w); 2240 error = xfs_da3_node_read(tp, ip, sib_blkno, -1, &sib_buf, w);
1852 if (error) 2241 if (error)
1853 goto done; 2242 goto done;
1854 sib_info = sib_buf->b_addr; 2243 sib_info = sib_buf->b_addr;
@@ -1870,7 +2259,7 @@ xfs_da_swap_lastblock(
1870 * If the moved block has a right sibling, fix up the pointers. 2259 * If the moved block has a right sibling, fix up the pointers.
1871 */ 2260 */
1872 if ((sib_blkno = be32_to_cpu(dead_info->forw))) { 2261 if ((sib_blkno = be32_to_cpu(dead_info->forw))) {
1873 error = xfs_da_node_read(tp, ip, sib_blkno, -1, &sib_buf, w); 2262 error = xfs_da3_node_read(tp, ip, sib_blkno, -1, &sib_buf, w);
1874 if (error) 2263 if (error)
1875 goto done; 2264 goto done;
1876 sib_info = sib_buf->b_addr; 2265 sib_info = sib_buf->b_addr;
@@ -1894,31 +2283,31 @@ xfs_da_swap_lastblock(
1894 * Walk down the tree looking for the parent of the moved block. 2283 * Walk down the tree looking for the parent of the moved block.
1895 */ 2284 */
1896 for (;;) { 2285 for (;;) {
1897 error = xfs_da_node_read(tp, ip, par_blkno, -1, &par_buf, w); 2286 error = xfs_da3_node_read(tp, ip, par_blkno, -1, &par_buf, w);
1898 if (error) 2287 if (error)
1899 goto done; 2288 goto done;
1900 par_node = par_buf->b_addr; 2289 par_node = par_buf->b_addr;
1901 if (unlikely(par_node->hdr.info.magic != 2290 xfs_da3_node_hdr_from_disk(&par_hdr, par_node);
1902 cpu_to_be16(XFS_DA_NODE_MAGIC) || 2291 if (level >= 0 && level != par_hdr.level + 1) {
1903 (level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 1))) {
1904 XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)", 2292 XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
1905 XFS_ERRLEVEL_LOW, mp); 2293 XFS_ERRLEVEL_LOW, mp);
1906 error = XFS_ERROR(EFSCORRUPTED); 2294 error = XFS_ERROR(EFSCORRUPTED);
1907 goto done; 2295 goto done;
1908 } 2296 }
1909 level = be16_to_cpu(par_node->hdr.level); 2297 level = par_hdr.level;
2298 btree = xfs_da3_node_tree_p(par_node);
1910 for (entno = 0; 2299 for (entno = 0;
1911 entno < be16_to_cpu(par_node->hdr.count) && 2300 entno < par_hdr.count &&
1912 be32_to_cpu(par_node->btree[entno].hashval) < dead_hash; 2301 be32_to_cpu(btree[entno].hashval) < dead_hash;
1913 entno++) 2302 entno++)
1914 continue; 2303 continue;
1915 if (unlikely(entno == be16_to_cpu(par_node->hdr.count))) { 2304 if (entno == par_hdr.count) {
1916 XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)", 2305 XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)",
1917 XFS_ERRLEVEL_LOW, mp); 2306 XFS_ERRLEVEL_LOW, mp);
1918 error = XFS_ERROR(EFSCORRUPTED); 2307 error = XFS_ERROR(EFSCORRUPTED);
1919 goto done; 2308 goto done;
1920 } 2309 }
1921 par_blkno = be32_to_cpu(par_node->btree[entno].before); 2310 par_blkno = be32_to_cpu(btree[entno].before);
1922 if (level == dead_level + 1) 2311 if (level == dead_level + 1)
1923 break; 2312 break;
1924 xfs_trans_brelse(tp, par_buf); 2313 xfs_trans_brelse(tp, par_buf);
@@ -1930,13 +2319,13 @@ xfs_da_swap_lastblock(
1930 */ 2319 */
1931 for (;;) { 2320 for (;;) {
1932 for (; 2321 for (;
1933 entno < be16_to_cpu(par_node->hdr.count) && 2322 entno < par_hdr.count &&
1934 be32_to_cpu(par_node->btree[entno].before) != last_blkno; 2323 be32_to_cpu(btree[entno].before) != last_blkno;
1935 entno++) 2324 entno++)
1936 continue; 2325 continue;
1937 if (entno < be16_to_cpu(par_node->hdr.count)) 2326 if (entno < par_hdr.count)
1938 break; 2327 break;
1939 par_blkno = be32_to_cpu(par_node->hdr.info.forw); 2328 par_blkno = par_hdr.forw;
1940 xfs_trans_brelse(tp, par_buf); 2329 xfs_trans_brelse(tp, par_buf);
1941 par_buf = NULL; 2330 par_buf = NULL;
1942 if (unlikely(par_blkno == 0)) { 2331 if (unlikely(par_blkno == 0)) {
@@ -1945,27 +2334,27 @@ xfs_da_swap_lastblock(
1945 error = XFS_ERROR(EFSCORRUPTED); 2334 error = XFS_ERROR(EFSCORRUPTED);
1946 goto done; 2335 goto done;
1947 } 2336 }
1948 error = xfs_da_node_read(tp, ip, par_blkno, -1, &par_buf, w); 2337 error = xfs_da3_node_read(tp, ip, par_blkno, -1, &par_buf, w);
1949 if (error) 2338 if (error)
1950 goto done; 2339 goto done;
1951 par_node = par_buf->b_addr; 2340 par_node = par_buf->b_addr;
1952 if (unlikely( 2341 xfs_da3_node_hdr_from_disk(&par_hdr, par_node);
1953 be16_to_cpu(par_node->hdr.level) != level || 2342 if (par_hdr.level != level) {
1954 par_node->hdr.info.magic != cpu_to_be16(XFS_DA_NODE_MAGIC))) {
1955 XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)", 2343 XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
1956 XFS_ERRLEVEL_LOW, mp); 2344 XFS_ERRLEVEL_LOW, mp);
1957 error = XFS_ERROR(EFSCORRUPTED); 2345 error = XFS_ERROR(EFSCORRUPTED);
1958 goto done; 2346 goto done;
1959 } 2347 }
2348 btree = xfs_da3_node_tree_p(par_node);
1960 entno = 0; 2349 entno = 0;
1961 } 2350 }
1962 /* 2351 /*
1963 * Update the parent entry pointing to the moved block. 2352 * Update the parent entry pointing to the moved block.
1964 */ 2353 */
1965 par_node->btree[entno].before = cpu_to_be32(dead_blkno); 2354 btree[entno].before = cpu_to_be32(dead_blkno);
1966 xfs_trans_log_buf(tp, par_buf, 2355 xfs_trans_log_buf(tp, par_buf,
1967 XFS_DA_LOGRANGE(par_node, &par_node->btree[entno].before, 2356 XFS_DA_LOGRANGE(par_node, &btree[entno].before,
1968 sizeof(par_node->btree[entno].before))); 2357 sizeof(btree[entno].before)));
1969 *dead_blknop = last_blkno; 2358 *dead_blknop = last_blkno;
1970 *dead_bufp = last_buf; 2359 *dead_bufp = last_buf;
1971 return 0; 2360 return 0;
@@ -2007,14 +2396,15 @@ xfs_da_shrink_inode(
2007 * Remove extents. If we get ENOSPC for a dir we have to move 2396 * Remove extents. If we get ENOSPC for a dir we have to move
2008 * the last block to the place we want to kill. 2397 * the last block to the place we want to kill.
2009 */ 2398 */
2010 if ((error = xfs_bunmapi(tp, dp, dead_blkno, count, 2399 error = xfs_bunmapi(tp, dp, dead_blkno, count,
2011 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, 2400 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
2012 0, args->firstblock, args->flist, 2401 0, args->firstblock, args->flist, &done);
2013 &done)) == ENOSPC) { 2402 if (error == ENOSPC) {
2014 if (w != XFS_DATA_FORK) 2403 if (w != XFS_DATA_FORK)
2015 break; 2404 break;
2016 if ((error = xfs_da_swap_lastblock(args, &dead_blkno, 2405 error = xfs_da3_swap_lastblock(args, &dead_blkno,
2017 &dead_buf))) 2406 &dead_buf);
2407 if (error)
2018 break; 2408 break;
2019 } else { 2409 } else {
2020 break; 2410 break;
@@ -2279,12 +2669,21 @@ xfs_da_read_buf(
2279 magic1 = be32_to_cpu(hdr->magic); 2669 magic1 = be32_to_cpu(hdr->magic);
2280 if (unlikely( 2670 if (unlikely(
2281 XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) && 2671 XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
2672 (magic != XFS_DA3_NODE_MAGIC) &&
2282 (magic != XFS_ATTR_LEAF_MAGIC) && 2673 (magic != XFS_ATTR_LEAF_MAGIC) &&
2674 (magic != XFS_ATTR3_LEAF_MAGIC) &&
2283 (magic != XFS_DIR2_LEAF1_MAGIC) && 2675 (magic != XFS_DIR2_LEAF1_MAGIC) &&
2676 (magic != XFS_DIR3_LEAF1_MAGIC) &&
2284 (magic != XFS_DIR2_LEAFN_MAGIC) && 2677 (magic != XFS_DIR2_LEAFN_MAGIC) &&
2678 (magic != XFS_DIR3_LEAFN_MAGIC) &&
2285 (magic1 != XFS_DIR2_BLOCK_MAGIC) && 2679 (magic1 != XFS_DIR2_BLOCK_MAGIC) &&
2680 (magic1 != XFS_DIR3_BLOCK_MAGIC) &&
2286 (magic1 != XFS_DIR2_DATA_MAGIC) && 2681 (magic1 != XFS_DIR2_DATA_MAGIC) &&
2287 (free->hdr.magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)), 2682 (magic1 != XFS_DIR3_DATA_MAGIC) &&
2683 (free->hdr.magic !=
2684 cpu_to_be32(XFS_DIR2_FREE_MAGIC)) &&
2685 (free->hdr.magic !=
2686 cpu_to_be32(XFS_DIR3_FREE_MAGIC)),
2288 mp, XFS_ERRTAG_DA_READ_BUF, 2687 mp, XFS_ERRTAG_DA_READ_BUF,
2289 XFS_RANDOM_DA_READ_BUF))) { 2688 XFS_RANDOM_DA_READ_BUF))) {
2290 trace_xfs_da_btree_corrupt(bp, _RET_IP_); 2689 trace_xfs_da_btree_corrupt(bp, _RET_IP_);
@@ -2342,41 +2741,3 @@ out_free:
2342 return -1; 2741 return -1;
2343 return mappedbno; 2742 return mappedbno;
2344} 2743}
2345
2346kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */
2347
2348/*
2349 * Allocate a dir-state structure.
2350 * We don't put them on the stack since they're large.
2351 */
2352xfs_da_state_t *
2353xfs_da_state_alloc(void)
2354{
2355 return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS);
2356}
2357
2358/*
2359 * Kill the altpath contents of a da-state structure.
2360 */
2361STATIC void
2362xfs_da_state_kill_altpath(xfs_da_state_t *state)
2363{
2364 int i;
2365
2366 for (i = 0; i < state->altpath.active; i++)
2367 state->altpath.blk[i].bp = NULL;
2368 state->altpath.active = 0;
2369}
2370
2371/*
2372 * Free a da-state structure.
2373 */
2374void
2375xfs_da_state_free(xfs_da_state_t *state)
2376{
2377 xfs_da_state_kill_altpath(state);
2378#ifdef DEBUG
2379 memset((char *)state, 0, sizeof(*state));
2380#endif /* DEBUG */
2381 kmem_zone_free(xfs_da_state_zone, state);
2382}
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index ee5170c46ae1..6fb3371c63cf 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
3 * Copyright (c) 2013 Red Hat, Inc.
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -20,7 +21,6 @@
20 21
21struct xfs_bmap_free; 22struct xfs_bmap_free;
22struct xfs_inode; 23struct xfs_inode;
23struct xfs_mount;
24struct xfs_trans; 24struct xfs_trans;
25struct zone; 25struct zone;
26 26
@@ -47,6 +47,33 @@ typedef struct xfs_da_blkinfo {
47} xfs_da_blkinfo_t; 47} xfs_da_blkinfo_t;
48 48
49/* 49/*
50 * CRC enabled directory structure types
51 *
52 * The headers change size for the additional verification information, but
53 * otherwise the tree layouts and contents are unchanged. Hence the da btree
54 * code can use the struct xfs_da_blkinfo for manipulating the tree links and
55 * magic numbers without modification for both v2 and v3 nodes.
56 */
57#define XFS_DA3_NODE_MAGIC 0x3ebe /* magic number: non-leaf blocks */
58#define XFS_ATTR3_LEAF_MAGIC 0x3bee /* magic number: attribute leaf blks */
59#define XFS_DIR3_LEAF1_MAGIC 0x3df1 /* magic number: v2 dirlf single blks */
60#define XFS_DIR3_LEAFN_MAGIC 0x3dff /* magic number: v2 dirlf multi blks */
61
62struct xfs_da3_blkinfo {
63 /*
64 * the node link manipulation code relies on the fact that the first
65 * element of this structure is the struct xfs_da_blkinfo so it can
66 * ignore the differences in the rest of the structures.
67 */
68 struct xfs_da_blkinfo hdr;
69 __be32 crc; /* CRC of block */
70 __be64 blkno; /* first block of the buffer */
71 __be64 lsn; /* sequence number of last write */
72 uuid_t uuid; /* filesystem we belong to */
73 __be64 owner; /* inode that owns the block */
74};
75
76/*
50 * This is the structure of the root and intermediate nodes in the Btree. 77 * This is the structure of the root and intermediate nodes in the Btree.
51 * The leaf nodes are defined above. 78 * The leaf nodes are defined above.
52 * 79 *
@@ -57,19 +84,76 @@ typedef struct xfs_da_blkinfo {
57 */ 84 */
58#define XFS_DA_NODE_MAXDEPTH 5 /* max depth of Btree */ 85#define XFS_DA_NODE_MAXDEPTH 5 /* max depth of Btree */
59 86
87typedef struct xfs_da_node_hdr {
88 struct xfs_da_blkinfo info; /* block type, links, etc. */
89 __be16 __count; /* count of active entries */
90 __be16 __level; /* level above leaves (leaf == 0) */
91} xfs_da_node_hdr_t;
92
93struct xfs_da3_node_hdr {
94 struct xfs_da3_blkinfo info; /* block type, links, etc. */
95 __be16 __count; /* count of active entries */
96 __be16 __level; /* level above leaves (leaf == 0) */
97 __be32 __pad32;
98};
99
100#define XFS_DA3_NODE_CRC_OFF (offsetof(struct xfs_da3_node_hdr, info.crc))
101
102typedef struct xfs_da_node_entry {
103 __be32 hashval; /* hash value for this descendant */
104 __be32 before; /* Btree block before this key */
105} xfs_da_node_entry_t;
106
60typedef struct xfs_da_intnode { 107typedef struct xfs_da_intnode {
61 struct xfs_da_node_hdr { /* constant-structure header block */ 108 struct xfs_da_node_hdr hdr;
62 xfs_da_blkinfo_t info; /* block type, links, etc. */ 109 struct xfs_da_node_entry __btree[];
63 __be16 count; /* count of active entries */
64 __be16 level; /* level above leaves (leaf == 0) */
65 } hdr;
66 struct xfs_da_node_entry {
67 __be32 hashval; /* hash value for this descendant */
68 __be32 before; /* Btree block before this key */
69 } btree[1]; /* variable sized array of keys */
70} xfs_da_intnode_t; 110} xfs_da_intnode_t;
71typedef struct xfs_da_node_hdr xfs_da_node_hdr_t; 111
72typedef struct xfs_da_node_entry xfs_da_node_entry_t; 112struct xfs_da3_intnode {
113 struct xfs_da3_node_hdr hdr;
114 struct xfs_da_node_entry __btree[];
115};
116
117/*
118 * In-core version of the node header to abstract the differences in the v2 and
119 * v3 disk format of the headers. Callers need to convert to/from disk format as
120 * appropriate.
121 */
122struct xfs_da3_icnode_hdr {
123 __uint32_t forw;
124 __uint32_t back;
125 __uint16_t magic;
126 __uint16_t count;
127 __uint16_t level;
128};
129
130extern void xfs_da3_node_hdr_from_disk(struct xfs_da3_icnode_hdr *to,
131 struct xfs_da_intnode *from);
132extern void xfs_da3_node_hdr_to_disk(struct xfs_da_intnode *to,
133 struct xfs_da3_icnode_hdr *from);
134
135static inline int
136xfs_da3_node_hdr_size(struct xfs_da_intnode *dap)
137{
138 if (dap->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC))
139 return sizeof(struct xfs_da3_node_hdr);
140 return sizeof(struct xfs_da_node_hdr);
141}
142
143static inline struct xfs_da_node_entry *
144xfs_da3_node_tree_p(struct xfs_da_intnode *dap)
145{
146 if (dap->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
147 struct xfs_da3_intnode *dap3 = (struct xfs_da3_intnode *)dap;
148 return dap3->__btree;
149 }
150 return dap->__btree;
151}
152
153extern void xfs_da3_intnode_from_disk(struct xfs_da3_icnode_hdr *to,
154 struct xfs_da_intnode *from);
155extern void xfs_da3_intnode_to_disk(struct xfs_da_intnode *to,
156 struct xfs_da3_icnode_hdr *from);
73 157
74#define XFS_LBSIZE(mp) (mp)->m_sb.sb_blocksize 158#define XFS_LBSIZE(mp) (mp)->m_sb.sb_blocksize
75 159
@@ -191,32 +275,34 @@ struct xfs_nameops {
191/* 275/*
192 * Routines used for growing the Btree. 276 * Routines used for growing the Btree.
193 */ 277 */
194int xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level, 278int xfs_da3_node_create(struct xfs_da_args *args, xfs_dablk_t blkno,
195 struct xfs_buf **bpp, int whichfork); 279 int level, struct xfs_buf **bpp, int whichfork);
196int xfs_da_split(xfs_da_state_t *state); 280int xfs_da3_split(xfs_da_state_t *state);
197 281
198/* 282/*
199 * Routines used for shrinking the Btree. 283 * Routines used for shrinking the Btree.
200 */ 284 */
201int xfs_da_join(xfs_da_state_t *state); 285int xfs_da3_join(xfs_da_state_t *state);
202void xfs_da_fixhashpath(xfs_da_state_t *state, 286void xfs_da3_fixhashpath(struct xfs_da_state *state,
203 xfs_da_state_path_t *path_to_to_fix); 287 struct xfs_da_state_path *path_to_to_fix);
204 288
205/* 289/*
206 * Routines used for finding things in the Btree. 290 * Routines used for finding things in the Btree.
207 */ 291 */
208int xfs_da_node_lookup_int(xfs_da_state_t *state, int *result); 292int xfs_da3_node_lookup_int(xfs_da_state_t *state, int *result);
209int xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, 293int xfs_da3_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
210 int forward, int release, int *result); 294 int forward, int release, int *result);
211/* 295/*
212 * Utility routines. 296 * Utility routines.
213 */ 297 */
214int xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, 298int xfs_da3_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
215 xfs_da_state_blk_t *new_blk); 299 xfs_da_state_blk_t *new_blk);
216int xfs_da_node_read(struct xfs_trans *tp, struct xfs_inode *dp, 300int xfs_da3_node_read(struct xfs_trans *tp, struct xfs_inode *dp,
217 xfs_dablk_t bno, xfs_daddr_t mappedbno, 301 xfs_dablk_t bno, xfs_daddr_t mappedbno,
218 struct xfs_buf **bpp, int which_fork); 302 struct xfs_buf **bpp, int which_fork);
219 303
304extern const struct xfs_buf_ops xfs_da3_node_buf_ops;
305
220/* 306/*
221 * Utility routines. 307 * Utility routines.
222 */ 308 */
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index 1d9643b3dce6..f7a0e95d197a 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -19,7 +19,7 @@
19#define __XFS_DINODE_H__ 19#define __XFS_DINODE_H__
20 20
21#define XFS_DINODE_MAGIC 0x494e /* 'IN' */ 21#define XFS_DINODE_MAGIC 0x494e /* 'IN' */
22#define XFS_DINODE_GOOD_VERSION(v) (((v) == 1 || (v) == 2)) 22#define XFS_DINODE_GOOD_VERSION(v) ((v) >= 1 && (v) <= 3)
23 23
24typedef struct xfs_timestamp { 24typedef struct xfs_timestamp {
25 __be32 t_sec; /* timestamp seconds */ 25 __be32 t_sec; /* timestamp seconds */
@@ -70,11 +70,36 @@ typedef struct xfs_dinode {
70 70
71 /* di_next_unlinked is the only non-core field in the old dinode */ 71 /* di_next_unlinked is the only non-core field in the old dinode */
72 __be32 di_next_unlinked;/* agi unlinked list ptr */ 72 __be32 di_next_unlinked;/* agi unlinked list ptr */
73} __attribute__((packed)) xfs_dinode_t; 73
74 /* start of the extended dinode, writable fields */
75 __le32 di_crc; /* CRC of the inode */
76 __be64 di_changecount; /* number of attribute changes */
77 __be64 di_lsn; /* flush sequence */
78 __be64 di_flags2; /* more random flags */
79 __u8 di_pad2[16]; /* more padding for future expansion */
80
81 /* fields only written to during inode creation */
82 xfs_timestamp_t di_crtime; /* time created */
83 __be64 di_ino; /* inode number */
84 uuid_t di_uuid; /* UUID of the filesystem */
85
86 /* structure must be padded to 64 bit alignment */
87} xfs_dinode_t;
74 88
75#define DI_MAX_FLUSH 0xffff 89#define DI_MAX_FLUSH 0xffff
76 90
77/* 91/*
92 * Size of the core inode on disk. Version 1 and 2 inodes have
93 * the same size, but version 3 has grown a few additional fields.
94 */
95static inline uint xfs_dinode_size(int version)
96{
97 if (version == 3)
98 return sizeof(struct xfs_dinode);
99 return offsetof(struct xfs_dinode, di_crc);
100}
101
102/*
78 * The 32 bit link count in the inode theoretically maxes out at UINT_MAX. 103 * The 32 bit link count in the inode theoretically maxes out at UINT_MAX.
79 * Since the pathconf interface is signed, we use 2^31 - 1 instead. 104 * Since the pathconf interface is signed, we use 2^31 - 1 instead.
80 * The old inode format had a 16 bit link count, so its maximum is USHRT_MAX. 105 * The old inode format had a 16 bit link count, so its maximum is USHRT_MAX.
@@ -104,11 +129,11 @@ typedef enum xfs_dinode_fmt {
104/* 129/*
105 * Inode size for given fs. 130 * Inode size for given fs.
106 */ 131 */
107#define XFS_LITINO(mp) \ 132#define XFS_LITINO(mp, version) \
108 ((int)(((mp)->m_sb.sb_inodesize) - sizeof(struct xfs_dinode))) 133 ((int)(((mp)->m_sb.sb_inodesize) - xfs_dinode_size(version)))
109 134
110#define XFS_BROOT_SIZE_ADJ \ 135#define XFS_BROOT_SIZE_ADJ(ip) \
111 (XFS_BTREE_LBLOCK_LEN - sizeof(xfs_bmdr_block_t)) 136 (XFS_BMBT_BLOCK_LEN((ip)->i_mount) - sizeof(xfs_bmdr_block_t))
112 137
113/* 138/*
114 * Inode data & attribute fork sizes, per inode. 139 * Inode data & attribute fork sizes, per inode.
@@ -119,10 +144,10 @@ typedef enum xfs_dinode_fmt {
119#define XFS_DFORK_DSIZE(dip,mp) \ 144#define XFS_DFORK_DSIZE(dip,mp) \
120 (XFS_DFORK_Q(dip) ? \ 145 (XFS_DFORK_Q(dip) ? \
121 XFS_DFORK_BOFF(dip) : \ 146 XFS_DFORK_BOFF(dip) : \
122 XFS_LITINO(mp)) 147 XFS_LITINO(mp, (dip)->di_version))
123#define XFS_DFORK_ASIZE(dip,mp) \ 148#define XFS_DFORK_ASIZE(dip,mp) \
124 (XFS_DFORK_Q(dip) ? \ 149 (XFS_DFORK_Q(dip) ? \
125 XFS_LITINO(mp) - XFS_DFORK_BOFF(dip) : \ 150 XFS_LITINO(mp, (dip)->di_version) - XFS_DFORK_BOFF(dip) : \
126 0) 151 0)
127#define XFS_DFORK_SIZE(dip,mp,w) \ 152#define XFS_DFORK_SIZE(dip,mp,w) \
128 ((w) == XFS_DATA_FORK ? \ 153 ((w) == XFS_DATA_FORK ? \
@@ -133,7 +158,7 @@ typedef enum xfs_dinode_fmt {
133 * Return pointers to the data or attribute forks. 158 * Return pointers to the data or attribute forks.
134 */ 159 */
135#define XFS_DFORK_DPTR(dip) \ 160#define XFS_DFORK_DPTR(dip) \
136 ((char *)(dip) + sizeof(struct xfs_dinode)) 161 ((char *)dip + xfs_dinode_size(dip->di_version))
137#define XFS_DFORK_APTR(dip) \ 162#define XFS_DFORK_APTR(dip) \
138 (XFS_DFORK_DPTR(dip) + XFS_DFORK_BOFF(dip)) 163 (XFS_DFORK_DPTR(dip) + XFS_DFORK_BOFF(dip))
139#define XFS_DFORK_PTR(dip,w) \ 164#define XFS_DFORK_PTR(dip,w) \
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 12afe07a91d7..e59f5fc816fe 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * Copyright (c) 2013 Red Hat, Inc.
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -28,11 +29,13 @@
28#include "xfs_dinode.h" 29#include "xfs_dinode.h"
29#include "xfs_inode.h" 30#include "xfs_inode.h"
30#include "xfs_inode_item.h" 31#include "xfs_inode_item.h"
32#include "xfs_buf_item.h"
31#include "xfs_dir2.h" 33#include "xfs_dir2.h"
32#include "xfs_dir2_format.h" 34#include "xfs_dir2_format.h"
33#include "xfs_dir2_priv.h" 35#include "xfs_dir2_priv.h"
34#include "xfs_error.h" 36#include "xfs_error.h"
35#include "xfs_trace.h" 37#include "xfs_trace.h"
38#include "xfs_cksum.h"
36 39
37/* 40/*
38 * Local function prototypes. 41 * Local function prototypes.
@@ -56,52 +59,110 @@ xfs_dir_startup(void)
56 xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2); 59 xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2);
57} 60}
58 61
59static void 62static bool
60xfs_dir2_block_verify( 63xfs_dir3_block_verify(
61 struct xfs_buf *bp) 64 struct xfs_buf *bp)
62{ 65{
63 struct xfs_mount *mp = bp->b_target->bt_mount; 66 struct xfs_mount *mp = bp->b_target->bt_mount;
64 struct xfs_dir2_data_hdr *hdr = bp->b_addr; 67 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
65 int block_ok = 0; 68
66 69 if (xfs_sb_version_hascrc(&mp->m_sb)) {
67 block_ok = hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); 70 if (hdr3->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC))
68 block_ok = block_ok && __xfs_dir2_data_check(NULL, bp) == 0; 71 return false;
69 72 if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid))
70 if (!block_ok) { 73 return false;
71 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); 74 if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
72 xfs_buf_ioerror(bp, EFSCORRUPTED); 75 return false;
76 } else {
77 if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))
78 return false;
73 } 79 }
80 if (__xfs_dir3_data_check(NULL, bp))
81 return false;
82 return true;
74} 83}
75 84
76static void 85static void
77xfs_dir2_block_read_verify( 86xfs_dir3_block_read_verify(
78 struct xfs_buf *bp) 87 struct xfs_buf *bp)
79{ 88{
80 xfs_dir2_block_verify(bp); 89 struct xfs_mount *mp = bp->b_target->bt_mount;
90
91 if ((xfs_sb_version_hascrc(&mp->m_sb) &&
92 !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
93 XFS_DIR3_DATA_CRC_OFF)) ||
94 !xfs_dir3_block_verify(bp)) {
95 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
96 xfs_buf_ioerror(bp, EFSCORRUPTED);
97 }
81} 98}
82 99
83static void 100static void
84xfs_dir2_block_write_verify( 101xfs_dir3_block_write_verify(
85 struct xfs_buf *bp) 102 struct xfs_buf *bp)
86{ 103{
87 xfs_dir2_block_verify(bp); 104 struct xfs_mount *mp = bp->b_target->bt_mount;
105 struct xfs_buf_log_item *bip = bp->b_fspriv;
106 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
107
108 if (!xfs_dir3_block_verify(bp)) {
109 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
110 xfs_buf_ioerror(bp, EFSCORRUPTED);
111 return;
112 }
113
114 if (!xfs_sb_version_hascrc(&mp->m_sb))
115 return;
116
117 if (bip)
118 hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
119
120 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF);
88} 121}
89 122
90const struct xfs_buf_ops xfs_dir2_block_buf_ops = { 123const struct xfs_buf_ops xfs_dir3_block_buf_ops = {
91 .verify_read = xfs_dir2_block_read_verify, 124 .verify_read = xfs_dir3_block_read_verify,
92 .verify_write = xfs_dir2_block_write_verify, 125 .verify_write = xfs_dir3_block_write_verify,
93}; 126};
94 127
95static int 128static int
96xfs_dir2_block_read( 129xfs_dir3_block_read(
97 struct xfs_trans *tp, 130 struct xfs_trans *tp,
98 struct xfs_inode *dp, 131 struct xfs_inode *dp,
99 struct xfs_buf **bpp) 132 struct xfs_buf **bpp)
100{ 133{
101 struct xfs_mount *mp = dp->i_mount; 134 struct xfs_mount *mp = dp->i_mount;
135 int err;
102 136
103 return xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, bpp, 137 err = xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, bpp,
104 XFS_DATA_FORK, &xfs_dir2_block_buf_ops); 138 XFS_DATA_FORK, &xfs_dir3_block_buf_ops);
139 if (!err && tp)
140 xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF);
141 return err;
142}
143
144static void
145xfs_dir3_block_init(
146 struct xfs_mount *mp,
147 struct xfs_trans *tp,
148 struct xfs_buf *bp,
149 struct xfs_inode *dp)
150{
151 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
152
153 bp->b_ops = &xfs_dir3_block_buf_ops;
154 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_BLOCK_BUF);
155
156 if (xfs_sb_version_hascrc(&mp->m_sb)) {
157 memset(hdr3, 0, sizeof(*hdr3));
158 hdr3->magic = cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
159 hdr3->blkno = cpu_to_be64(bp->b_bn);
160 hdr3->owner = cpu_to_be64(dp->i_ino);
161 uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid);
162 return;
163
164 }
165 hdr3->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
105} 166}
106 167
107static void 168static void
@@ -121,7 +182,7 @@ xfs_dir2_block_need_space(
121 struct xfs_dir2_data_unused *enddup = NULL; 182 struct xfs_dir2_data_unused *enddup = NULL;
122 183
123 *compact = 0; 184 *compact = 0;
124 bf = hdr->bestfree; 185 bf = xfs_dir3_data_bestfree_p(hdr);
125 186
126 /* 187 /*
127 * If there are stale entries we'll use one for the leaf. 188 * If there are stale entries we'll use one for the leaf.
@@ -303,7 +364,7 @@ xfs_dir2_block_addname(
303 mp = dp->i_mount; 364 mp = dp->i_mount;
304 365
305 /* Read the (one and only) directory block into bp. */ 366 /* Read the (one and only) directory block into bp. */
306 error = xfs_dir2_block_read(tp, dp, &bp); 367 error = xfs_dir3_block_read(tp, dp, &bp);
307 if (error) 368 if (error)
308 return error; 369 return error;
309 370
@@ -498,7 +559,7 @@ xfs_dir2_block_addname(
498 xfs_dir2_data_log_header(tp, bp); 559 xfs_dir2_data_log_header(tp, bp);
499 xfs_dir2_block_log_tail(tp, bp); 560 xfs_dir2_block_log_tail(tp, bp);
500 xfs_dir2_data_log_entry(tp, bp, dep); 561 xfs_dir2_data_log_entry(tp, bp, dep);
501 xfs_dir2_data_check(dp, bp); 562 xfs_dir3_data_check(dp, bp);
502 return 0; 563 return 0;
503} 564}
504 565
@@ -531,7 +592,7 @@ xfs_dir2_block_getdents(
531 if (xfs_dir2_dataptr_to_db(mp, *offset) > mp->m_dirdatablk) 592 if (xfs_dir2_dataptr_to_db(mp, *offset) > mp->m_dirdatablk)
532 return 0; 593 return 0;
533 594
534 error = xfs_dir2_block_read(NULL, dp, &bp); 595 error = xfs_dir3_block_read(NULL, dp, &bp);
535 if (error) 596 if (error)
536 return error; 597 return error;
537 598
@@ -541,12 +602,12 @@ xfs_dir2_block_getdents(
541 */ 602 */
542 wantoff = xfs_dir2_dataptr_to_off(mp, *offset); 603 wantoff = xfs_dir2_dataptr_to_off(mp, *offset);
543 hdr = bp->b_addr; 604 hdr = bp->b_addr;
544 xfs_dir2_data_check(dp, bp); 605 xfs_dir3_data_check(dp, bp);
545 /* 606 /*
546 * Set up values for the loop. 607 * Set up values for the loop.
547 */ 608 */
548 btp = xfs_dir2_block_tail_p(mp, hdr); 609 btp = xfs_dir2_block_tail_p(mp, hdr);
549 ptr = (char *)(hdr + 1); 610 ptr = (char *)xfs_dir3_data_entry_p(hdr);
550 endptr = (char *)xfs_dir2_block_leaf_p(btp); 611 endptr = (char *)xfs_dir2_block_leaf_p(btp);
551 612
552 /* 613 /*
@@ -665,7 +726,7 @@ xfs_dir2_block_lookup(
665 dp = args->dp; 726 dp = args->dp;
666 mp = dp->i_mount; 727 mp = dp->i_mount;
667 hdr = bp->b_addr; 728 hdr = bp->b_addr;
668 xfs_dir2_data_check(dp, bp); 729 xfs_dir3_data_check(dp, bp);
669 btp = xfs_dir2_block_tail_p(mp, hdr); 730 btp = xfs_dir2_block_tail_p(mp, hdr);
670 blp = xfs_dir2_block_leaf_p(btp); 731 blp = xfs_dir2_block_leaf_p(btp);
671 /* 732 /*
@@ -711,12 +772,12 @@ xfs_dir2_block_lookup_int(
711 tp = args->trans; 772 tp = args->trans;
712 mp = dp->i_mount; 773 mp = dp->i_mount;
713 774
714 error = xfs_dir2_block_read(tp, dp, &bp); 775 error = xfs_dir3_block_read(tp, dp, &bp);
715 if (error) 776 if (error)
716 return error; 777 return error;
717 778
718 hdr = bp->b_addr; 779 hdr = bp->b_addr;
719 xfs_dir2_data_check(dp, bp); 780 xfs_dir3_data_check(dp, bp);
720 btp = xfs_dir2_block_tail_p(mp, hdr); 781 btp = xfs_dir2_block_tail_p(mp, hdr);
721 blp = xfs_dir2_block_leaf_p(btp); 782 blp = xfs_dir2_block_leaf_p(btp);
722 /* 783 /*
@@ -853,7 +914,7 @@ xfs_dir2_block_removename(
853 xfs_dir2_data_freescan(mp, hdr, &needlog); 914 xfs_dir2_data_freescan(mp, hdr, &needlog);
854 if (needlog) 915 if (needlog)
855 xfs_dir2_data_log_header(tp, bp); 916 xfs_dir2_data_log_header(tp, bp);
856 xfs_dir2_data_check(dp, bp); 917 xfs_dir3_data_check(dp, bp);
857 /* 918 /*
858 * See if the size as a shortform is good enough. 919 * See if the size as a shortform is good enough.
859 */ 920 */
@@ -910,7 +971,7 @@ xfs_dir2_block_replace(
910 */ 971 */
911 dep->inumber = cpu_to_be64(args->inumber); 972 dep->inumber = cpu_to_be64(args->inumber);
912 xfs_dir2_data_log_entry(args->trans, bp, dep); 973 xfs_dir2_data_log_entry(args->trans, bp, dep);
913 xfs_dir2_data_check(dp, bp); 974 xfs_dir3_data_check(dp, bp);
914 return 0; 975 return 0;
915} 976}
916 977
@@ -958,6 +1019,8 @@ xfs_dir2_leaf_to_block(
958 __be16 *tagp; /* end of entry (tag) */ 1019 __be16 *tagp; /* end of entry (tag) */
959 int to; /* block/leaf to index */ 1020 int to; /* block/leaf to index */
960 xfs_trans_t *tp; /* transaction pointer */ 1021 xfs_trans_t *tp; /* transaction pointer */
1022 struct xfs_dir2_leaf_entry *ents;
1023 struct xfs_dir3_icleaf_hdr leafhdr;
961 1024
962 trace_xfs_dir2_leaf_to_block(args); 1025 trace_xfs_dir2_leaf_to_block(args);
963 1026
@@ -965,8 +1028,12 @@ xfs_dir2_leaf_to_block(
965 tp = args->trans; 1028 tp = args->trans;
966 mp = dp->i_mount; 1029 mp = dp->i_mount;
967 leaf = lbp->b_addr; 1030 leaf = lbp->b_addr;
968 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); 1031 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
1032 ents = xfs_dir3_leaf_ents_p(leaf);
969 ltp = xfs_dir2_leaf_tail_p(mp, leaf); 1033 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1034
1035 ASSERT(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC ||
1036 leafhdr.magic == XFS_DIR3_LEAF1_MAGIC);
970 /* 1037 /*
971 * If there are data blocks other than the first one, take this 1038 * If there are data blocks other than the first one, take this
972 * opportunity to remove trailing empty data blocks that may have 1039 * opportunity to remove trailing empty data blocks that may have
@@ -974,9 +1041,12 @@ xfs_dir2_leaf_to_block(
974 * These will show up in the leaf bests table. 1041 * These will show up in the leaf bests table.
975 */ 1042 */
976 while (dp->i_d.di_size > mp->m_dirblksize) { 1043 while (dp->i_d.di_size > mp->m_dirblksize) {
1044 int hdrsz;
1045
1046 hdrsz = xfs_dir3_data_hdr_size(xfs_sb_version_hascrc(&mp->m_sb));
977 bestsp = xfs_dir2_leaf_bests_p(ltp); 1047 bestsp = xfs_dir2_leaf_bests_p(ltp);
978 if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) == 1048 if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) ==
979 mp->m_dirblksize - (uint)sizeof(*hdr)) { 1049 mp->m_dirblksize - hdrsz) {
980 if ((error = 1050 if ((error =
981 xfs_dir2_leaf_trim_data(args, lbp, 1051 xfs_dir2_leaf_trim_data(args, lbp,
982 (xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1)))) 1052 (xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1))))
@@ -988,17 +1058,19 @@ xfs_dir2_leaf_to_block(
988 * Read the data block if we don't already have it, give up if it fails. 1058 * Read the data block if we don't already have it, give up if it fails.
989 */ 1059 */
990 if (!dbp) { 1060 if (!dbp) {
991 error = xfs_dir2_data_read(tp, dp, mp->m_dirdatablk, -1, &dbp); 1061 error = xfs_dir3_data_read(tp, dp, mp->m_dirdatablk, -1, &dbp);
992 if (error) 1062 if (error)
993 return error; 1063 return error;
994 } 1064 }
995 hdr = dbp->b_addr; 1065 hdr = dbp->b_addr;
996 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)); 1066 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
1067 hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC));
1068
997 /* 1069 /*
998 * Size of the "leaf" area in the block. 1070 * Size of the "leaf" area in the block.
999 */ 1071 */
1000 size = (uint)sizeof(xfs_dir2_block_tail_t) + 1072 size = (uint)sizeof(xfs_dir2_block_tail_t) +
1001 (uint)sizeof(*lep) * (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)); 1073 (uint)sizeof(*lep) * (leafhdr.count - leafhdr.stale);
1002 /* 1074 /*
1003 * Look at the last data entry. 1075 * Look at the last data entry.
1004 */ 1076 */
@@ -1014,8 +1086,8 @@ xfs_dir2_leaf_to_block(
1014 /* 1086 /*
1015 * Start converting it to block form. 1087 * Start converting it to block form.
1016 */ 1088 */
1017 dbp->b_ops = &xfs_dir2_block_buf_ops; 1089 xfs_dir3_block_init(mp, tp, dbp, dp);
1018 hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); 1090
1019 needlog = 1; 1091 needlog = 1;
1020 needscan = 0; 1092 needscan = 0;
1021 /* 1093 /*
@@ -1027,18 +1099,17 @@ xfs_dir2_leaf_to_block(
1027 * Initialize the block tail. 1099 * Initialize the block tail.
1028 */ 1100 */
1029 btp = xfs_dir2_block_tail_p(mp, hdr); 1101 btp = xfs_dir2_block_tail_p(mp, hdr);
1030 btp->count = cpu_to_be32(be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)); 1102 btp->count = cpu_to_be32(leafhdr.count - leafhdr.stale);
1031 btp->stale = 0; 1103 btp->stale = 0;
1032 xfs_dir2_block_log_tail(tp, dbp); 1104 xfs_dir2_block_log_tail(tp, dbp);
1033 /* 1105 /*
1034 * Initialize the block leaf area. We compact out stale entries. 1106 * Initialize the block leaf area. We compact out stale entries.
1035 */ 1107 */
1036 lep = xfs_dir2_block_leaf_p(btp); 1108 lep = xfs_dir2_block_leaf_p(btp);
1037 for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) { 1109 for (from = to = 0; from < leafhdr.count; from++) {
1038 if (leaf->ents[from].address == 1110 if (ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
1039 cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
1040 continue; 1111 continue;
1041 lep[to++] = leaf->ents[from]; 1112 lep[to++] = ents[from];
1042 } 1113 }
1043 ASSERT(to == be32_to_cpu(btp->count)); 1114 ASSERT(to == be32_to_cpu(btp->count));
1044 xfs_dir2_block_log_leaf(tp, dbp, 0, be32_to_cpu(btp->count) - 1); 1115 xfs_dir2_block_log_leaf(tp, dbp, 0, be32_to_cpu(btp->count) - 1);
@@ -1137,16 +1208,16 @@ xfs_dir2_sf_to_block(
1137 return error; 1208 return error;
1138 } 1209 }
1139 /* 1210 /*
1140 * Initialize the data block. 1211 * Initialize the data block, then convert it to block format.
1141 */ 1212 */
1142 error = xfs_dir2_data_init(args, blkno, &bp); 1213 error = xfs_dir3_data_init(args, blkno, &bp);
1143 if (error) { 1214 if (error) {
1144 kmem_free(sfp); 1215 kmem_free(sfp);
1145 return error; 1216 return error;
1146 } 1217 }
1147 bp->b_ops = &xfs_dir2_block_buf_ops; 1218 xfs_dir3_block_init(mp, tp, bp, dp);
1148 hdr = bp->b_addr; 1219 hdr = bp->b_addr;
1149 hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); 1220
1150 /* 1221 /*
1151 * Compute size of block "tail" area. 1222 * Compute size of block "tail" area.
1152 */ 1223 */
@@ -1156,7 +1227,7 @@ xfs_dir2_sf_to_block(
1156 * The whole thing is initialized to free by the init routine. 1227 * The whole thing is initialized to free by the init routine.
1157 * Say we're using the leaf and tail area. 1228 * Say we're using the leaf and tail area.
1158 */ 1229 */
1159 dup = (xfs_dir2_data_unused_t *)(hdr + 1); 1230 dup = xfs_dir3_data_unused_p(hdr);
1160 needlog = needscan = 0; 1231 needlog = needscan = 0;
1161 xfs_dir2_data_use_free(tp, bp, dup, mp->m_dirblksize - i, i, &needlog, 1232 xfs_dir2_data_use_free(tp, bp, dup, mp->m_dirblksize - i, i, &needlog,
1162 &needscan); 1233 &needscan);
@@ -1178,8 +1249,7 @@ xfs_dir2_sf_to_block(
1178 /* 1249 /*
1179 * Create entry for . 1250 * Create entry for .
1180 */ 1251 */
1181 dep = (xfs_dir2_data_entry_t *) 1252 dep = xfs_dir3_data_dot_entry_p(hdr);
1182 ((char *)hdr + XFS_DIR2_DATA_DOT_OFFSET);
1183 dep->inumber = cpu_to_be64(dp->i_ino); 1253 dep->inumber = cpu_to_be64(dp->i_ino);
1184 dep->namelen = 1; 1254 dep->namelen = 1;
1185 dep->name[0] = '.'; 1255 dep->name[0] = '.';
@@ -1192,8 +1262,7 @@ xfs_dir2_sf_to_block(
1192 /* 1262 /*
1193 * Create entry for .. 1263 * Create entry for ..
1194 */ 1264 */
1195 dep = (xfs_dir2_data_entry_t *) 1265 dep = xfs_dir3_data_dotdot_entry_p(hdr);
1196 ((char *)hdr + XFS_DIR2_DATA_DOTDOT_OFFSET);
1197 dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp)); 1266 dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp));
1198 dep->namelen = 2; 1267 dep->namelen = 2;
1199 dep->name[0] = dep->name[1] = '.'; 1268 dep->name[0] = dep->name[1] = '.';
@@ -1203,7 +1272,7 @@ xfs_dir2_sf_to_block(
1203 blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot); 1272 blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
1204 blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, 1273 blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
1205 (char *)dep - (char *)hdr)); 1274 (char *)dep - (char *)hdr));
1206 offset = XFS_DIR2_DATA_FIRST_OFFSET; 1275 offset = xfs_dir3_data_first_offset(hdr);
1207 /* 1276 /*
1208 * Loop over existing entries, stuff them in. 1277 * Loop over existing entries, stuff them in.
1209 */ 1278 */
@@ -1273,6 +1342,6 @@ xfs_dir2_sf_to_block(
1273 ASSERT(needscan == 0); 1342 ASSERT(needscan == 0);
1274 xfs_dir2_block_log_leaf(tp, bp, 0, be32_to_cpu(btp->count) - 1); 1343 xfs_dir2_block_log_leaf(tp, bp, 0, be32_to_cpu(btp->count) - 1);
1275 xfs_dir2_block_log_tail(tp, bp); 1344 xfs_dir2_block_log_tail(tp, bp);
1276 xfs_dir2_data_check(dp, bp); 1345 xfs_dir3_data_check(dp, bp);
1277 return 0; 1346 return 0;
1278} 1347}
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index ffcf1774152e..c2930238005c 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * Copyright (c) 2013 Red Hat, Inc.
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -30,6 +31,8 @@
30#include "xfs_dir2_format.h" 31#include "xfs_dir2_format.h"
31#include "xfs_dir2_priv.h" 32#include "xfs_dir2_priv.h"
32#include "xfs_error.h" 33#include "xfs_error.h"
34#include "xfs_buf_item.h"
35#include "xfs_cksum.h"
33 36
34STATIC xfs_dir2_data_free_t * 37STATIC xfs_dir2_data_free_t *
35xfs_dir2_data_freefind(xfs_dir2_data_hdr_t *hdr, xfs_dir2_data_unused_t *dup); 38xfs_dir2_data_freefind(xfs_dir2_data_hdr_t *hdr, xfs_dir2_data_unused_t *dup);
@@ -40,7 +43,7 @@ xfs_dir2_data_freefind(xfs_dir2_data_hdr_t *hdr, xfs_dir2_data_unused_t *dup);
40 * Return 0 is the buffer is good, otherwise an error. 43 * Return 0 is the buffer is good, otherwise an error.
41 */ 44 */
42int 45int
43__xfs_dir2_data_check( 46__xfs_dir3_data_check(
44 struct xfs_inode *dp, /* incore inode pointer */ 47 struct xfs_inode *dp, /* incore inode pointer */
45 struct xfs_buf *bp) /* data block's buffer */ 48 struct xfs_buf *bp) /* data block's buffer */
46{ 49{
@@ -65,15 +68,17 @@ __xfs_dir2_data_check(
65 68
66 mp = bp->b_target->bt_mount; 69 mp = bp->b_target->bt_mount;
67 hdr = bp->b_addr; 70 hdr = bp->b_addr;
68 bf = hdr->bestfree; 71 bf = xfs_dir3_data_bestfree_p(hdr);
69 p = (char *)(hdr + 1); 72 p = (char *)xfs_dir3_data_entry_p(hdr);
70 73
71 switch (hdr->magic) { 74 switch (hdr->magic) {
75 case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
72 case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): 76 case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
73 btp = xfs_dir2_block_tail_p(mp, hdr); 77 btp = xfs_dir2_block_tail_p(mp, hdr);
74 lep = xfs_dir2_block_leaf_p(btp); 78 lep = xfs_dir2_block_leaf_p(btp);
75 endp = (char *)lep; 79 endp = (char *)lep;
76 break; 80 break;
81 case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
77 case cpu_to_be32(XFS_DIR2_DATA_MAGIC): 82 case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
78 endp = (char *)hdr + mp->m_dirblksize; 83 endp = (char *)hdr + mp->m_dirblksize;
79 break; 84 break;
@@ -148,7 +153,8 @@ __xfs_dir2_data_check(
148 (char *)dep - (char *)hdr); 153 (char *)dep - (char *)hdr);
149 count++; 154 count++;
150 lastfree = 0; 155 lastfree = 0;
151 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) { 156 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
157 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
152 addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, 158 addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
153 (xfs_dir2_data_aoff_t) 159 (xfs_dir2_data_aoff_t)
154 ((char *)dep - (char *)hdr)); 160 ((char *)dep - (char *)hdr));
@@ -168,7 +174,8 @@ __xfs_dir2_data_check(
168 * Need to have seen all the entries and all the bestfree slots. 174 * Need to have seen all the entries and all the bestfree slots.
169 */ 175 */
170 XFS_WANT_CORRUPTED_RETURN(freeseen == 7); 176 XFS_WANT_CORRUPTED_RETURN(freeseen == 7);
171 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) { 177 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
178 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
172 for (i = stale = 0; i < be32_to_cpu(btp->count); i++) { 179 for (i = stale = 0; i < be32_to_cpu(btp->count); i++) {
173 if (lep[i].address == 180 if (lep[i].address ==
174 cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) 181 cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
@@ -185,21 +192,27 @@ __xfs_dir2_data_check(
185 return 0; 192 return 0;
186} 193}
187 194
188static void 195static bool
189xfs_dir2_data_verify( 196xfs_dir3_data_verify(
190 struct xfs_buf *bp) 197 struct xfs_buf *bp)
191{ 198{
192 struct xfs_mount *mp = bp->b_target->bt_mount; 199 struct xfs_mount *mp = bp->b_target->bt_mount;
193 struct xfs_dir2_data_hdr *hdr = bp->b_addr; 200 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
194 int block_ok = 0;
195 201
196 block_ok = hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC); 202 if (xfs_sb_version_hascrc(&mp->m_sb)) {
197 block_ok = block_ok && __xfs_dir2_data_check(NULL, bp) == 0; 203 if (hdr3->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC))
198 204 return false;
199 if (!block_ok) { 205 if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid))
200 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); 206 return false;
201 xfs_buf_ioerror(bp, EFSCORRUPTED); 207 if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
208 return false;
209 } else {
210 if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC))
211 return false;
202 } 212 }
213 if (__xfs_dir3_data_check(NULL, bp))
214 return false;
215 return true;
203} 216}
204 217
205/* 218/*
@@ -208,7 +221,7 @@ xfs_dir2_data_verify(
208 * format buffer or a data format buffer on readahead. 221 * format buffer or a data format buffer on readahead.
209 */ 222 */
210static void 223static void
211xfs_dir2_data_reada_verify( 224xfs_dir3_data_reada_verify(
212 struct xfs_buf *bp) 225 struct xfs_buf *bp)
213{ 226{
214 struct xfs_mount *mp = bp->b_target->bt_mount; 227 struct xfs_mount *mp = bp->b_target->bt_mount;
@@ -216,11 +229,13 @@ xfs_dir2_data_reada_verify(
216 229
217 switch (hdr->magic) { 230 switch (hdr->magic) {
218 case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): 231 case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
219 bp->b_ops = &xfs_dir2_block_buf_ops; 232 case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
233 bp->b_ops = &xfs_dir3_block_buf_ops;
220 bp->b_ops->verify_read(bp); 234 bp->b_ops->verify_read(bp);
221 return; 235 return;
222 case cpu_to_be32(XFS_DIR2_DATA_MAGIC): 236 case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
223 xfs_dir2_data_verify(bp); 237 case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
238 xfs_dir3_data_verify(bp);
224 return; 239 return;
225 default: 240 default:
226 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); 241 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
@@ -230,51 +245,80 @@ xfs_dir2_data_reada_verify(
230} 245}
231 246
232static void 247static void
233xfs_dir2_data_read_verify( 248xfs_dir3_data_read_verify(
234 struct xfs_buf *bp) 249 struct xfs_buf *bp)
235{ 250{
236 xfs_dir2_data_verify(bp); 251 struct xfs_mount *mp = bp->b_target->bt_mount;
252
253 if ((xfs_sb_version_hascrc(&mp->m_sb) &&
254 !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
255 XFS_DIR3_DATA_CRC_OFF)) ||
256 !xfs_dir3_data_verify(bp)) {
257 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
258 xfs_buf_ioerror(bp, EFSCORRUPTED);
259 }
237} 260}
238 261
239static void 262static void
240xfs_dir2_data_write_verify( 263xfs_dir3_data_write_verify(
241 struct xfs_buf *bp) 264 struct xfs_buf *bp)
242{ 265{
243 xfs_dir2_data_verify(bp); 266 struct xfs_mount *mp = bp->b_target->bt_mount;
267 struct xfs_buf_log_item *bip = bp->b_fspriv;
268 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
269
270 if (!xfs_dir3_data_verify(bp)) {
271 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
272 xfs_buf_ioerror(bp, EFSCORRUPTED);
273 return;
274 }
275
276 if (!xfs_sb_version_hascrc(&mp->m_sb))
277 return;
278
279 if (bip)
280 hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
281
282 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF);
244} 283}
245 284
246const struct xfs_buf_ops xfs_dir2_data_buf_ops = { 285const struct xfs_buf_ops xfs_dir3_data_buf_ops = {
247 .verify_read = xfs_dir2_data_read_verify, 286 .verify_read = xfs_dir3_data_read_verify,
248 .verify_write = xfs_dir2_data_write_verify, 287 .verify_write = xfs_dir3_data_write_verify,
249}; 288};
250 289
251static const struct xfs_buf_ops xfs_dir2_data_reada_buf_ops = { 290static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = {
252 .verify_read = xfs_dir2_data_reada_verify, 291 .verify_read = xfs_dir3_data_reada_verify,
253 .verify_write = xfs_dir2_data_write_verify, 292 .verify_write = xfs_dir3_data_write_verify,
254}; 293};
255 294
256 295
257int 296int
258xfs_dir2_data_read( 297xfs_dir3_data_read(
259 struct xfs_trans *tp, 298 struct xfs_trans *tp,
260 struct xfs_inode *dp, 299 struct xfs_inode *dp,
261 xfs_dablk_t bno, 300 xfs_dablk_t bno,
262 xfs_daddr_t mapped_bno, 301 xfs_daddr_t mapped_bno,
263 struct xfs_buf **bpp) 302 struct xfs_buf **bpp)
264{ 303{
265 return xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp, 304 int err;
266 XFS_DATA_FORK, &xfs_dir2_data_buf_ops); 305
306 err = xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp,
307 XFS_DATA_FORK, &xfs_dir3_data_buf_ops);
308 if (!err && tp)
309 xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF);
310 return err;
267} 311}
268 312
269int 313int
270xfs_dir2_data_readahead( 314xfs_dir3_data_readahead(
271 struct xfs_trans *tp, 315 struct xfs_trans *tp,
272 struct xfs_inode *dp, 316 struct xfs_inode *dp,
273 xfs_dablk_t bno, 317 xfs_dablk_t bno,
274 xfs_daddr_t mapped_bno) 318 xfs_daddr_t mapped_bno)
275{ 319{
276 return xfs_da_reada_buf(tp, dp, bno, mapped_bno, 320 return xfs_da_reada_buf(tp, dp, bno, mapped_bno,
277 XFS_DATA_FORK, &xfs_dir2_data_reada_buf_ops); 321 XFS_DATA_FORK, &xfs_dir3_data_reada_buf_ops);
278} 322}
279 323
280/* 324/*
@@ -288,12 +332,15 @@ xfs_dir2_data_freefind(
288{ 332{
289 xfs_dir2_data_free_t *dfp; /* bestfree entry */ 333 xfs_dir2_data_free_t *dfp; /* bestfree entry */
290 xfs_dir2_data_aoff_t off; /* offset value needed */ 334 xfs_dir2_data_aoff_t off; /* offset value needed */
335 struct xfs_dir2_data_free *bf;
291#if defined(DEBUG) && defined(__KERNEL__) 336#if defined(DEBUG) && defined(__KERNEL__)
292 int matched; /* matched the value */ 337 int matched; /* matched the value */
293 int seenzero; /* saw a 0 bestfree entry */ 338 int seenzero; /* saw a 0 bestfree entry */
294#endif 339#endif
295 340
296 off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr); 341 off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr);
342 bf = xfs_dir3_data_bestfree_p(hdr);
343
297#if defined(DEBUG) && defined(__KERNEL__) 344#if defined(DEBUG) && defined(__KERNEL__)
298 /* 345 /*
299 * Validate some consistency in the bestfree table. 346 * Validate some consistency in the bestfree table.
@@ -301,9 +348,11 @@ xfs_dir2_data_freefind(
301 * one we're looking for it has to be exact. 348 * one we're looking for it has to be exact.
302 */ 349 */
303 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || 350 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
304 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); 351 hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
305 for (dfp = &hdr->bestfree[0], seenzero = matched = 0; 352 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
306 dfp < &hdr->bestfree[XFS_DIR2_DATA_FD_COUNT]; 353 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
354 for (dfp = &bf[0], seenzero = matched = 0;
355 dfp < &bf[XFS_DIR2_DATA_FD_COUNT];
307 dfp++) { 356 dfp++) {
308 if (!dfp->offset) { 357 if (!dfp->offset) {
309 ASSERT(!dfp->length); 358 ASSERT(!dfp->length);
@@ -319,7 +368,7 @@ xfs_dir2_data_freefind(
319 else 368 else
320 ASSERT(be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) <= off); 369 ASSERT(be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) <= off);
321 ASSERT(matched || be16_to_cpu(dfp->length) >= be16_to_cpu(dup->length)); 370 ASSERT(matched || be16_to_cpu(dfp->length) >= be16_to_cpu(dup->length));
322 if (dfp > &hdr->bestfree[0]) 371 if (dfp > &bf[0])
323 ASSERT(be16_to_cpu(dfp[-1].length) >= be16_to_cpu(dfp[0].length)); 372 ASSERT(be16_to_cpu(dfp[-1].length) >= be16_to_cpu(dfp[0].length));
324 } 373 }
325#endif 374#endif
@@ -328,14 +377,12 @@ xfs_dir2_data_freefind(
328 * it can't be there since they're sorted. 377 * it can't be there since they're sorted.
329 */ 378 */
330 if (be16_to_cpu(dup->length) < 379 if (be16_to_cpu(dup->length) <
331 be16_to_cpu(hdr->bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length)) 380 be16_to_cpu(bf[XFS_DIR2_DATA_FD_COUNT - 1].length))
332 return NULL; 381 return NULL;
333 /* 382 /*
334 * Look at the three bestfree entries for our guy. 383 * Look at the three bestfree entries for our guy.
335 */ 384 */
336 for (dfp = &hdr->bestfree[0]; 385 for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) {
337 dfp < &hdr->bestfree[XFS_DIR2_DATA_FD_COUNT];
338 dfp++) {
339 if (!dfp->offset) 386 if (!dfp->offset)
340 return NULL; 387 return NULL;
341 if (be16_to_cpu(dfp->offset) == off) 388 if (be16_to_cpu(dfp->offset) == off)
@@ -359,11 +406,12 @@ xfs_dir2_data_freeinsert(
359 xfs_dir2_data_free_t *dfp; /* bestfree table pointer */ 406 xfs_dir2_data_free_t *dfp; /* bestfree table pointer */
360 xfs_dir2_data_free_t new; /* new bestfree entry */ 407 xfs_dir2_data_free_t new; /* new bestfree entry */
361 408
362#ifdef __KERNEL__
363 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || 409 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
364 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); 410 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
365#endif 411 hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
366 dfp = hdr->bestfree; 412 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
413
414 dfp = xfs_dir3_data_bestfree_p(hdr);
367 new.length = dup->length; 415 new.length = dup->length;
368 new.offset = cpu_to_be16((char *)dup - (char *)hdr); 416 new.offset = cpu_to_be16((char *)dup - (char *)hdr);
369 417
@@ -400,32 +448,36 @@ xfs_dir2_data_freeremove(
400 xfs_dir2_data_free_t *dfp, /* bestfree entry pointer */ 448 xfs_dir2_data_free_t *dfp, /* bestfree entry pointer */
401 int *loghead) /* out: log data header */ 449 int *loghead) /* out: log data header */
402{ 450{
403#ifdef __KERNEL__ 451 struct xfs_dir2_data_free *bf;
452
404 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || 453 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
405 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); 454 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
406#endif 455 hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
456 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
457
407 /* 458 /*
408 * It's the first entry, slide the next 2 up. 459 * It's the first entry, slide the next 2 up.
409 */ 460 */
410 if (dfp == &hdr->bestfree[0]) { 461 bf = xfs_dir3_data_bestfree_p(hdr);
411 hdr->bestfree[0] = hdr->bestfree[1]; 462 if (dfp == &bf[0]) {
412 hdr->bestfree[1] = hdr->bestfree[2]; 463 bf[0] = bf[1];
464 bf[1] = bf[2];
413 } 465 }
414 /* 466 /*
415 * It's the second entry, slide the 3rd entry up. 467 * It's the second entry, slide the 3rd entry up.
416 */ 468 */
417 else if (dfp == &hdr->bestfree[1]) 469 else if (dfp == &bf[1])
418 hdr->bestfree[1] = hdr->bestfree[2]; 470 bf[1] = bf[2];
419 /* 471 /*
420 * Must be the last entry. 472 * Must be the last entry.
421 */ 473 */
422 else 474 else
423 ASSERT(dfp == &hdr->bestfree[2]); 475 ASSERT(dfp == &bf[2]);
424 /* 476 /*
425 * Clear the 3rd entry, must be zero now. 477 * Clear the 3rd entry, must be zero now.
426 */ 478 */
427 hdr->bestfree[2].length = 0; 479 bf[2].length = 0;
428 hdr->bestfree[2].offset = 0; 480 bf[2].offset = 0;
429 *loghead = 1; 481 *loghead = 1;
430} 482}
431 483
@@ -441,23 +493,27 @@ xfs_dir2_data_freescan(
441 xfs_dir2_block_tail_t *btp; /* block tail */ 493 xfs_dir2_block_tail_t *btp; /* block tail */
442 xfs_dir2_data_entry_t *dep; /* active data entry */ 494 xfs_dir2_data_entry_t *dep; /* active data entry */
443 xfs_dir2_data_unused_t *dup; /* unused data entry */ 495 xfs_dir2_data_unused_t *dup; /* unused data entry */
496 struct xfs_dir2_data_free *bf;
444 char *endp; /* end of block's data */ 497 char *endp; /* end of block's data */
445 char *p; /* current entry pointer */ 498 char *p; /* current entry pointer */
446 499
447#ifdef __KERNEL__
448 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || 500 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
449 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); 501 hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
450#endif 502 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
503 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
504
451 /* 505 /*
452 * Start by clearing the table. 506 * Start by clearing the table.
453 */ 507 */
454 memset(hdr->bestfree, 0, sizeof(hdr->bestfree)); 508 bf = xfs_dir3_data_bestfree_p(hdr);
509 memset(bf, 0, sizeof(*bf) * XFS_DIR2_DATA_FD_COUNT);
455 *loghead = 1; 510 *loghead = 1;
456 /* 511 /*
457 * Set up pointers. 512 * Set up pointers.
458 */ 513 */
459 p = (char *)(hdr + 1); 514 p = (char *)xfs_dir3_data_entry_p(hdr);
460 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) { 515 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
516 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
461 btp = xfs_dir2_block_tail_p(mp, hdr); 517 btp = xfs_dir2_block_tail_p(mp, hdr);
462 endp = (char *)xfs_dir2_block_leaf_p(btp); 518 endp = (char *)xfs_dir2_block_leaf_p(btp);
463 } else 519 } else
@@ -493,7 +549,7 @@ xfs_dir2_data_freescan(
493 * Give back the buffer for the created block. 549 * Give back the buffer for the created block.
494 */ 550 */
495int /* error */ 551int /* error */
496xfs_dir2_data_init( 552xfs_dir3_data_init(
497 xfs_da_args_t *args, /* directory operation args */ 553 xfs_da_args_t *args, /* directory operation args */
498 xfs_dir2_db_t blkno, /* logical dir block number */ 554 xfs_dir2_db_t blkno, /* logical dir block number */
499 struct xfs_buf **bpp) /* output block buffer */ 555 struct xfs_buf **bpp) /* output block buffer */
@@ -502,6 +558,7 @@ xfs_dir2_data_init(
502 xfs_dir2_data_hdr_t *hdr; /* data block header */ 558 xfs_dir2_data_hdr_t *hdr; /* data block header */
503 xfs_inode_t *dp; /* incore directory inode */ 559 xfs_inode_t *dp; /* incore directory inode */
504 xfs_dir2_data_unused_t *dup; /* unused entry pointer */ 560 xfs_dir2_data_unused_t *dup; /* unused entry pointer */
561 struct xfs_dir2_data_free *bf;
505 int error; /* error return value */ 562 int error; /* error return value */
506 int i; /* bestfree index */ 563 int i; /* bestfree index */
507 xfs_mount_t *mp; /* filesystem mount point */ 564 xfs_mount_t *mp; /* filesystem mount point */
@@ -518,27 +575,40 @@ xfs_dir2_data_init(
518 XFS_DATA_FORK); 575 XFS_DATA_FORK);
519 if (error) 576 if (error)
520 return error; 577 return error;
521 bp->b_ops = &xfs_dir2_data_buf_ops; 578 bp->b_ops = &xfs_dir3_data_buf_ops;
579 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_DATA_BUF);
522 580
523 /* 581 /*
524 * Initialize the header. 582 * Initialize the header.
525 */ 583 */
526 hdr = bp->b_addr; 584 hdr = bp->b_addr;
527 hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); 585 if (xfs_sb_version_hascrc(&mp->m_sb)) {
528 hdr->bestfree[0].offset = cpu_to_be16(sizeof(*hdr)); 586 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
587
588 memset(hdr3, 0, sizeof(*hdr3));
589 hdr3->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
590 hdr3->blkno = cpu_to_be64(bp->b_bn);
591 hdr3->owner = cpu_to_be64(dp->i_ino);
592 uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid);
593
594 } else
595 hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
596
597 bf = xfs_dir3_data_bestfree_p(hdr);
598 bf[0].offset = cpu_to_be16(xfs_dir3_data_entry_offset(hdr));
529 for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) { 599 for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) {
530 hdr->bestfree[i].length = 0; 600 bf[i].length = 0;
531 hdr->bestfree[i].offset = 0; 601 bf[i].offset = 0;
532 } 602 }
533 603
534 /* 604 /*
535 * Set up an unused entry for the block's body. 605 * Set up an unused entry for the block's body.
536 */ 606 */
537 dup = (xfs_dir2_data_unused_t *)(hdr + 1); 607 dup = xfs_dir3_data_unused_p(hdr);
538 dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); 608 dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
539 609
540 t = mp->m_dirblksize - (uint)sizeof(*hdr); 610 t = mp->m_dirblksize - (uint)xfs_dir3_data_entry_offset(hdr);
541 hdr->bestfree[0].length = cpu_to_be16(t); 611 bf[0].length = cpu_to_be16(t);
542 dup->length = cpu_to_be16(t); 612 dup->length = cpu_to_be16(t);
543 *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr); 613 *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr);
544 /* 614 /*
@@ -562,7 +632,9 @@ xfs_dir2_data_log_entry(
562 xfs_dir2_data_hdr_t *hdr = bp->b_addr; 632 xfs_dir2_data_hdr_t *hdr = bp->b_addr;
563 633
564 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || 634 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
565 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); 635 hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
636 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
637 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
566 638
567 xfs_trans_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr), 639 xfs_trans_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr),
568 (uint)((char *)(xfs_dir2_data_entry_tag_p(dep) + 1) - 640 (uint)((char *)(xfs_dir2_data_entry_tag_p(dep) + 1) -
@@ -580,9 +652,11 @@ xfs_dir2_data_log_header(
580 xfs_dir2_data_hdr_t *hdr = bp->b_addr; 652 xfs_dir2_data_hdr_t *hdr = bp->b_addr;
581 653
582 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || 654 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
583 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); 655 hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
656 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
657 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
584 658
585 xfs_trans_log_buf(tp, bp, 0, sizeof(*hdr) - 1); 659 xfs_trans_log_buf(tp, bp, 0, xfs_dir3_data_entry_offset(hdr) - 1);
586} 660}
587 661
588/* 662/*
@@ -597,7 +671,9 @@ xfs_dir2_data_log_unused(
597 xfs_dir2_data_hdr_t *hdr = bp->b_addr; 671 xfs_dir2_data_hdr_t *hdr = bp->b_addr;
598 672
599 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || 673 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
600 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); 674 hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
675 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
676 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
601 677
602 /* 678 /*
603 * Log the first part of the unused entry. 679 * Log the first part of the unused entry.
@@ -635,6 +711,7 @@ xfs_dir2_data_make_free(
635 xfs_dir2_data_unused_t *newdup; /* new unused entry */ 711 xfs_dir2_data_unused_t *newdup; /* new unused entry */
636 xfs_dir2_data_unused_t *postdup; /* unused entry after us */ 712 xfs_dir2_data_unused_t *postdup; /* unused entry after us */
637 xfs_dir2_data_unused_t *prevdup; /* unused entry before us */ 713 xfs_dir2_data_unused_t *prevdup; /* unused entry before us */
714 struct xfs_dir2_data_free *bf;
638 715
639 mp = tp->t_mountp; 716 mp = tp->t_mountp;
640 hdr = bp->b_addr; 717 hdr = bp->b_addr;
@@ -642,12 +719,14 @@ xfs_dir2_data_make_free(
642 /* 719 /*
643 * Figure out where the end of the data area is. 720 * Figure out where the end of the data area is.
644 */ 721 */
645 if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)) 722 if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
723 hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC))
646 endptr = (char *)hdr + mp->m_dirblksize; 724 endptr = (char *)hdr + mp->m_dirblksize;
647 else { 725 else {
648 xfs_dir2_block_tail_t *btp; /* block tail */ 726 xfs_dir2_block_tail_t *btp; /* block tail */
649 727
650 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); 728 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
729 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
651 btp = xfs_dir2_block_tail_p(mp, hdr); 730 btp = xfs_dir2_block_tail_p(mp, hdr);
652 endptr = (char *)xfs_dir2_block_leaf_p(btp); 731 endptr = (char *)xfs_dir2_block_leaf_p(btp);
653 } 732 }
@@ -655,7 +734,7 @@ xfs_dir2_data_make_free(
655 * If this isn't the start of the block, then back up to 734 * If this isn't the start of the block, then back up to
656 * the previous entry and see if it's free. 735 * the previous entry and see if it's free.
657 */ 736 */
658 if (offset > sizeof(*hdr)) { 737 if (offset > xfs_dir3_data_entry_offset(hdr)) {
659 __be16 *tagp; /* tag just before us */ 738 __be16 *tagp; /* tag just before us */
660 739
661 tagp = (__be16 *)((char *)hdr + offset) - 1; 740 tagp = (__be16 *)((char *)hdr + offset) - 1;
@@ -681,6 +760,7 @@ xfs_dir2_data_make_free(
681 * Previous and following entries are both free, 760 * Previous and following entries are both free,
682 * merge everything into a single free entry. 761 * merge everything into a single free entry.
683 */ 762 */
763 bf = xfs_dir3_data_bestfree_p(hdr);
684 if (prevdup && postdup) { 764 if (prevdup && postdup) {
685 xfs_dir2_data_free_t *dfp2; /* another bestfree pointer */ 765 xfs_dir2_data_free_t *dfp2; /* another bestfree pointer */
686 766
@@ -695,7 +775,7 @@ xfs_dir2_data_make_free(
695 * since the third bestfree is there, there might be more 775 * since the third bestfree is there, there might be more
696 * entries. 776 * entries.
697 */ 777 */
698 needscan = (hdr->bestfree[2].length != 0); 778 needscan = (bf[2].length != 0);
699 /* 779 /*
700 * Fix up the new big freespace. 780 * Fix up the new big freespace.
701 */ 781 */
@@ -711,10 +791,10 @@ xfs_dir2_data_make_free(
711 * Remove entry 1 first then entry 0. 791 * Remove entry 1 first then entry 0.
712 */ 792 */
713 ASSERT(dfp && dfp2); 793 ASSERT(dfp && dfp2);
714 if (dfp == &hdr->bestfree[1]) { 794 if (dfp == &bf[1]) {
715 dfp = &hdr->bestfree[0]; 795 dfp = &bf[0];
716 ASSERT(dfp2 == dfp); 796 ASSERT(dfp2 == dfp);
717 dfp2 = &hdr->bestfree[1]; 797 dfp2 = &bf[1];
718 } 798 }
719 xfs_dir2_data_freeremove(hdr, dfp2, needlogp); 799 xfs_dir2_data_freeremove(hdr, dfp2, needlogp);
720 xfs_dir2_data_freeremove(hdr, dfp, needlogp); 800 xfs_dir2_data_freeremove(hdr, dfp, needlogp);
@@ -722,7 +802,7 @@ xfs_dir2_data_make_free(
722 * Now insert the new entry. 802 * Now insert the new entry.
723 */ 803 */
724 dfp = xfs_dir2_data_freeinsert(hdr, prevdup, needlogp); 804 dfp = xfs_dir2_data_freeinsert(hdr, prevdup, needlogp);
725 ASSERT(dfp == &hdr->bestfree[0]); 805 ASSERT(dfp == &bf[0]);
726 ASSERT(dfp->length == prevdup->length); 806 ASSERT(dfp->length == prevdup->length);
727 ASSERT(!dfp[1].length); 807 ASSERT(!dfp[1].length);
728 ASSERT(!dfp[2].length); 808 ASSERT(!dfp[2].length);
@@ -751,7 +831,7 @@ xfs_dir2_data_make_free(
751 */ 831 */
752 else { 832 else {
753 needscan = be16_to_cpu(prevdup->length) > 833 needscan = be16_to_cpu(prevdup->length) >
754 be16_to_cpu(hdr->bestfree[2].length); 834 be16_to_cpu(bf[2].length);
755 } 835 }
756 } 836 }
757 /* 837 /*
@@ -779,7 +859,7 @@ xfs_dir2_data_make_free(
779 */ 859 */
780 else { 860 else {
781 needscan = be16_to_cpu(newdup->length) > 861 needscan = be16_to_cpu(newdup->length) >
782 be16_to_cpu(hdr->bestfree[2].length); 862 be16_to_cpu(bf[2].length);
783 } 863 }
784 } 864 }
785 /* 865 /*
@@ -818,10 +898,13 @@ xfs_dir2_data_use_free(
818 xfs_dir2_data_unused_t *newdup; /* new unused entry */ 898 xfs_dir2_data_unused_t *newdup; /* new unused entry */
819 xfs_dir2_data_unused_t *newdup2; /* another new unused entry */ 899 xfs_dir2_data_unused_t *newdup2; /* another new unused entry */
820 int oldlen; /* old unused entry's length */ 900 int oldlen; /* old unused entry's length */
901 struct xfs_dir2_data_free *bf;
821 902
822 hdr = bp->b_addr; 903 hdr = bp->b_addr;
823 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || 904 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
824 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); 905 hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
906 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
907 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
825 ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG); 908 ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG);
826 ASSERT(offset >= (char *)dup - (char *)hdr); 909 ASSERT(offset >= (char *)dup - (char *)hdr);
827 ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)hdr); 910 ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)hdr);
@@ -831,7 +914,8 @@ xfs_dir2_data_use_free(
831 */ 914 */
832 dfp = xfs_dir2_data_freefind(hdr, dup); 915 dfp = xfs_dir2_data_freefind(hdr, dup);
833 oldlen = be16_to_cpu(dup->length); 916 oldlen = be16_to_cpu(dup->length);
834 ASSERT(dfp || oldlen <= be16_to_cpu(hdr->bestfree[2].length)); 917 bf = xfs_dir3_data_bestfree_p(hdr);
918 ASSERT(dfp || oldlen <= be16_to_cpu(bf[2].length));
835 /* 919 /*
836 * Check for alignment with front and back of the entry. 920 * Check for alignment with front and back of the entry.
837 */ 921 */
@@ -845,7 +929,7 @@ xfs_dir2_data_use_free(
845 */ 929 */
846 if (matchfront && matchback) { 930 if (matchfront && matchback) {
847 if (dfp) { 931 if (dfp) {
848 needscan = (hdr->bestfree[2].offset != 0); 932 needscan = (bf[2].offset != 0);
849 if (!needscan) 933 if (!needscan)
850 xfs_dir2_data_freeremove(hdr, dfp, needlogp); 934 xfs_dir2_data_freeremove(hdr, dfp, needlogp);
851 } 935 }
@@ -875,7 +959,7 @@ xfs_dir2_data_use_free(
875 * that means we don't know if there was a better 959 * that means we don't know if there was a better
876 * choice for the last slot, or not. Rescan. 960 * choice for the last slot, or not. Rescan.
877 */ 961 */
878 needscan = dfp == &hdr->bestfree[2]; 962 needscan = dfp == &bf[2];
879 } 963 }
880 } 964 }
881 /* 965 /*
@@ -902,7 +986,7 @@ xfs_dir2_data_use_free(
902 * that means we don't know if there was a better 986 * that means we don't know if there was a better
903 * choice for the last slot, or not. Rescan. 987 * choice for the last slot, or not. Rescan.
904 */ 988 */
905 needscan = dfp == &hdr->bestfree[2]; 989 needscan = dfp == &bf[2];
906 } 990 }
907 } 991 }
908 /* 992 /*
@@ -930,7 +1014,7 @@ xfs_dir2_data_use_free(
930 * the 2 new will work. 1014 * the 2 new will work.
931 */ 1015 */
932 if (dfp) { 1016 if (dfp) {
933 needscan = (hdr->bestfree[2].length != 0); 1017 needscan = (bf[2].length != 0);
934 if (!needscan) { 1018 if (!needscan) {
935 xfs_dir2_data_freeremove(hdr, dfp, needlogp); 1019 xfs_dir2_data_freeremove(hdr, dfp, needlogp);
936 xfs_dir2_data_freeinsert(hdr, newdup, needlogp); 1020 xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
diff --git a/fs/xfs/xfs_dir2_format.h b/fs/xfs/xfs_dir2_format.h
index 07270981f48f..a3b1bd841a80 100644
--- a/fs/xfs/xfs_dir2_format.h
+++ b/fs/xfs/xfs_dir2_format.h
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
3 * Copyright (c) 2013 Red Hat, Inc.
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -36,6 +37,38 @@
36#define XFS_DIR2_FREE_MAGIC 0x58443246 /* XD2F: free index blocks */ 37#define XFS_DIR2_FREE_MAGIC 0x58443246 /* XD2F: free index blocks */
37 38
38/* 39/*
40 * Directory Version 3 With CRCs.
41 *
42 * The tree formats are the same as for version 2 directories. The difference
43 * is in the block header and dirent formats. In many cases the v3 structures
44 * use v2 definitions as they are no different and this makes code sharing much
45 * easier.
46 *
47 * Also, the xfs_dir3_*() functions handle both v2 and v3 formats - if the
48 * format is v2 then they switch to the existing v2 code, or the format is v3
49 * they implement the v3 functionality. This means the existing dir2 is a mix of
50 * xfs_dir2/xfs_dir3 calls and functions. The xfs_dir3 functions are called
51 * where there is a difference in the formats, otherwise the code is unchanged.
52 *
53 * Where it is possible, the code decides what to do based on the magic numbers
54 * in the blocks rather than feature bits in the superblock. This means the code
55 * is as independent of the external XFS code as possible as doesn't require
56 * passing struct xfs_mount pointers into places where it isn't really
57 * necessary.
58 *
59 * Version 3 includes:
60 *
61 * - a larger block header for CRC and identification purposes and so the
62 * offsets of all the structures inside the blocks are different.
63 *
64 * - new magic numbers to be able to detect the v2/v3 types on the fly.
65 */
66
67#define XFS_DIR3_BLOCK_MAGIC 0x58444233 /* XDB3: single block dirs */
68#define XFS_DIR3_DATA_MAGIC 0x58444433 /* XDD3: multiblock dirs */
69#define XFS_DIR3_FREE_MAGIC 0x58444633 /* XDF3: free index blocks */
70
71/*
39 * Byte offset in data block and shortform entry. 72 * Byte offset in data block and shortform entry.
40 */ 73 */
41typedef __uint16_t xfs_dir2_data_off_t; 74typedef __uint16_t xfs_dir2_data_off_t;
@@ -195,16 +228,6 @@ xfs_dir2_sf_nextentry(struct xfs_dir2_sf_hdr *hdr,
195 xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET) 228 xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET)
196 229
197/* 230/*
198 * Offsets of . and .. in data space (always block 0)
199 */
200#define XFS_DIR2_DATA_DOT_OFFSET \
201 ((xfs_dir2_data_aoff_t)sizeof(struct xfs_dir2_data_hdr))
202#define XFS_DIR2_DATA_DOTDOT_OFFSET \
203 (XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1))
204#define XFS_DIR2_DATA_FIRST_OFFSET \
205 (XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2))
206
207/*
208 * Describe a free area in the data block. 231 * Describe a free area in the data block.
209 * 232 *
210 * The freespace will be formatted as a xfs_dir2_data_unused_t. 233 * The freespace will be formatted as a xfs_dir2_data_unused_t.
@@ -226,6 +249,39 @@ typedef struct xfs_dir2_data_hdr {
226} xfs_dir2_data_hdr_t; 249} xfs_dir2_data_hdr_t;
227 250
228/* 251/*
252 * define a structure for all the verification fields we are adding to the
253 * directory block structures. This will be used in several structures.
254 * The magic number must be the first entry to align with all the dir2
255 * structures so we determine how to decode them just by the magic number.
256 */
257struct xfs_dir3_blk_hdr {
258 __be32 magic; /* magic number */
259 __be32 crc; /* CRC of block */
260 __be64 blkno; /* first block of the buffer */
261 __be64 lsn; /* sequence number of last write */
262 uuid_t uuid; /* filesystem we belong to */
263 __be64 owner; /* inode that owns the block */
264};
265
266struct xfs_dir3_data_hdr {
267 struct xfs_dir3_blk_hdr hdr;
268 xfs_dir2_data_free_t best_free[XFS_DIR2_DATA_FD_COUNT];
269};
270
271#define XFS_DIR3_DATA_CRC_OFF offsetof(struct xfs_dir3_data_hdr, hdr.crc)
272
273static inline struct xfs_dir2_data_free *
274xfs_dir3_data_bestfree_p(struct xfs_dir2_data_hdr *hdr)
275{
276 if (hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
277 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
278 struct xfs_dir3_data_hdr *hdr3 = (struct xfs_dir3_data_hdr *)hdr;
279 return hdr3->best_free;
280 }
281 return hdr->bestfree;
282}
283
284/*
229 * Active entry in a data block. 285 * Active entry in a data block.
230 * 286 *
231 * Aligned to 8 bytes. After the variable length name field there is a 287 * Aligned to 8 bytes. After the variable length name field there is a
@@ -280,6 +336,94 @@ xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused *dup)
280 be16_to_cpu(dup->length) - sizeof(__be16)); 336 be16_to_cpu(dup->length) - sizeof(__be16));
281} 337}
282 338
339static inline size_t
340xfs_dir3_data_hdr_size(bool dir3)
341{
342 if (dir3)
343 return sizeof(struct xfs_dir3_data_hdr);
344 return sizeof(struct xfs_dir2_data_hdr);
345}
346
347static inline size_t
348xfs_dir3_data_entry_offset(struct xfs_dir2_data_hdr *hdr)
349{
350 bool dir3 = hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
351 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
352 return xfs_dir3_data_hdr_size(dir3);
353}
354
355static inline struct xfs_dir2_data_entry *
356xfs_dir3_data_entry_p(struct xfs_dir2_data_hdr *hdr)
357{
358 return (struct xfs_dir2_data_entry *)
359 ((char *)hdr + xfs_dir3_data_entry_offset(hdr));
360}
361
362static inline struct xfs_dir2_data_unused *
363xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr)
364{
365 return (struct xfs_dir2_data_unused *)
366 ((char *)hdr + xfs_dir3_data_entry_offset(hdr));
367}
368
369/*
370 * Offsets of . and .. in data space (always block 0)
371 *
372 * The macros are used for shortform directories as they have no headers to read
373 * the magic number out of. Shortform directories need to know the size of the
374 * data block header because the sfe embeds the block offset of the entry into
375 * it so that it doesn't change when format conversion occurs. Bad Things Happen
376 * if we don't follow this rule.
377 */
378#define XFS_DIR3_DATA_DOT_OFFSET(mp) \
379 xfs_dir3_data_hdr_size(xfs_sb_version_hascrc(&(mp)->m_sb))
380#define XFS_DIR3_DATA_DOTDOT_OFFSET(mp) \
381 (XFS_DIR3_DATA_DOT_OFFSET(mp) + xfs_dir2_data_entsize(1))
382#define XFS_DIR3_DATA_FIRST_OFFSET(mp) \
383 (XFS_DIR3_DATA_DOTDOT_OFFSET(mp) + xfs_dir2_data_entsize(2))
384
385static inline xfs_dir2_data_aoff_t
386xfs_dir3_data_dot_offset(struct xfs_dir2_data_hdr *hdr)
387{
388 return xfs_dir3_data_entry_offset(hdr);
389}
390
391static inline xfs_dir2_data_aoff_t
392xfs_dir3_data_dotdot_offset(struct xfs_dir2_data_hdr *hdr)
393{
394 return xfs_dir3_data_dot_offset(hdr) + xfs_dir2_data_entsize(1);
395}
396
397static inline xfs_dir2_data_aoff_t
398xfs_dir3_data_first_offset(struct xfs_dir2_data_hdr *hdr)
399{
400 return xfs_dir3_data_dotdot_offset(hdr) + xfs_dir2_data_entsize(2);
401}
402
403/*
404 * location of . and .. in data space (always block 0)
405 */
406static inline struct xfs_dir2_data_entry *
407xfs_dir3_data_dot_entry_p(struct xfs_dir2_data_hdr *hdr)
408{
409 return (struct xfs_dir2_data_entry *)
410 ((char *)hdr + xfs_dir3_data_dot_offset(hdr));
411}
412
413static inline struct xfs_dir2_data_entry *
414xfs_dir3_data_dotdot_entry_p(struct xfs_dir2_data_hdr *hdr)
415{
416 return (struct xfs_dir2_data_entry *)
417 ((char *)hdr + xfs_dir3_data_dotdot_offset(hdr));
418}
419
420static inline struct xfs_dir2_data_entry *
421xfs_dir3_data_first_entry_p(struct xfs_dir2_data_hdr *hdr)
422{
423 return (struct xfs_dir2_data_entry *)
424 ((char *)hdr + xfs_dir3_data_first_offset(hdr));
425}
426
283/* 427/*
284 * Leaf block structures. 428 * Leaf block structures.
285 * 429 *
@@ -329,6 +473,21 @@ typedef struct xfs_dir2_leaf_hdr {
329 __be16 stale; /* count of stale entries */ 473 __be16 stale; /* count of stale entries */
330} xfs_dir2_leaf_hdr_t; 474} xfs_dir2_leaf_hdr_t;
331 475
476struct xfs_dir3_leaf_hdr {
477 struct xfs_da3_blkinfo info; /* header for da routines */
478 __be16 count; /* count of entries */
479 __be16 stale; /* count of stale entries */
480 __be32 pad;
481};
482
483struct xfs_dir3_icleaf_hdr {
484 __uint32_t forw;
485 __uint32_t back;
486 __uint16_t magic;
487 __uint16_t count;
488 __uint16_t stale;
489};
490
332/* 491/*
333 * Leaf block entry. 492 * Leaf block entry.
334 */ 493 */
@@ -348,23 +507,50 @@ typedef struct xfs_dir2_leaf_tail {
348 * Leaf block. 507 * Leaf block.
349 */ 508 */
350typedef struct xfs_dir2_leaf { 509typedef struct xfs_dir2_leaf {
351 xfs_dir2_leaf_hdr_t hdr; /* leaf header */ 510 xfs_dir2_leaf_hdr_t hdr; /* leaf header */
352 xfs_dir2_leaf_entry_t ents[]; /* entries */ 511 xfs_dir2_leaf_entry_t __ents[]; /* entries */
353} xfs_dir2_leaf_t; 512} xfs_dir2_leaf_t;
354 513
355/* 514struct xfs_dir3_leaf {
356 * DB blocks here are logical directory block numbers, not filesystem blocks. 515 struct xfs_dir3_leaf_hdr hdr; /* leaf header */
357 */ 516 struct xfs_dir2_leaf_entry __ents[]; /* entries */
517};
358 518
359static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp) 519#define XFS_DIR3_LEAF_CRC_OFF offsetof(struct xfs_dir3_leaf_hdr, info.crc)
520
521static inline int
522xfs_dir3_leaf_hdr_size(struct xfs_dir2_leaf *lp)
360{ 523{
361 return (mp->m_dirblksize - (uint)sizeof(struct xfs_dir2_leaf_hdr)) / 524 if (lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
525 lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC))
526 return sizeof(struct xfs_dir3_leaf_hdr);
527 return sizeof(struct xfs_dir2_leaf_hdr);
528}
529
530static inline int
531xfs_dir3_max_leaf_ents(struct xfs_mount *mp, struct xfs_dir2_leaf *lp)
532{
533 return (mp->m_dirblksize - xfs_dir3_leaf_hdr_size(lp)) /
362 (uint)sizeof(struct xfs_dir2_leaf_entry); 534 (uint)sizeof(struct xfs_dir2_leaf_entry);
363} 535}
364 536
365/* 537/*
366 * Get address of the bestcount field in the single-leaf block. 538 * Get address of the bestcount field in the single-leaf block.
367 */ 539 */
540static inline struct xfs_dir2_leaf_entry *
541xfs_dir3_leaf_ents_p(struct xfs_dir2_leaf *lp)
542{
543 if (lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
544 lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
545 struct xfs_dir3_leaf *lp3 = (struct xfs_dir3_leaf *)lp;
546 return lp3->__ents;
547 }
548 return lp->__ents;
549}
550
551/*
552 * Get address of the bestcount field in the single-leaf block.
553 */
368static inline struct xfs_dir2_leaf_tail * 554static inline struct xfs_dir2_leaf_tail *
369xfs_dir2_leaf_tail_p(struct xfs_mount *mp, struct xfs_dir2_leaf *lp) 555xfs_dir2_leaf_tail_p(struct xfs_mount *mp, struct xfs_dir2_leaf *lp)
370{ 556{
@@ -383,6 +569,10 @@ xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail *ltp)
383} 569}
384 570
385/* 571/*
572 * DB blocks here are logical directory block numbers, not filesystem blocks.
573 */
574
575/*
386 * Convert dataptr to byte in file space 576 * Convert dataptr to byte in file space
387 */ 577 */
388static inline xfs_dir2_off_t 578static inline xfs_dir2_off_t
@@ -520,19 +710,65 @@ typedef struct xfs_dir2_free {
520 /* unused entries are -1 */ 710 /* unused entries are -1 */
521} xfs_dir2_free_t; 711} xfs_dir2_free_t;
522 712
523static inline int xfs_dir2_free_max_bests(struct xfs_mount *mp) 713struct xfs_dir3_free_hdr {
714 struct xfs_dir3_blk_hdr hdr;
715 __be32 firstdb; /* db of first entry */
716 __be32 nvalid; /* count of valid entries */
717 __be32 nused; /* count of used entries */
718};
719
720struct xfs_dir3_free {
721 struct xfs_dir3_free_hdr hdr;
722 __be16 bests[]; /* best free counts */
723 /* unused entries are -1 */
724};
725
726#define XFS_DIR3_FREE_CRC_OFF offsetof(struct xfs_dir3_free, hdr.hdr.crc)
727
728/*
729 * In core version of the free block header, abstracted away from on-disk format
730 * differences. Use this in the code, and convert to/from the disk version using
731 * xfs_dir3_free_hdr_from_disk/xfs_dir3_free_hdr_to_disk.
732 */
733struct xfs_dir3_icfree_hdr {
734 __uint32_t magic;
735 __uint32_t firstdb;
736 __uint32_t nvalid;
737 __uint32_t nused;
738
739};
740
741void xfs_dir3_free_hdr_from_disk(struct xfs_dir3_icfree_hdr *to,
742 struct xfs_dir2_free *from);
743
744static inline int
745xfs_dir3_free_hdr_size(struct xfs_mount *mp)
524{ 746{
525 return (mp->m_dirblksize - sizeof(struct xfs_dir2_free_hdr)) / 747 if (xfs_sb_version_hascrc(&mp->m_sb))
748 return sizeof(struct xfs_dir3_free_hdr);
749 return sizeof(struct xfs_dir2_free_hdr);
750}
751
752static inline int
753xfs_dir3_free_max_bests(struct xfs_mount *mp)
754{
755 return (mp->m_dirblksize - xfs_dir3_free_hdr_size(mp)) /
526 sizeof(xfs_dir2_data_off_t); 756 sizeof(xfs_dir2_data_off_t);
527} 757}
528 758
759static inline __be16 *
760xfs_dir3_free_bests_p(struct xfs_mount *mp, struct xfs_dir2_free *free)
761{
762 return (__be16 *)((char *)free + xfs_dir3_free_hdr_size(mp));
763}
764
529/* 765/*
530 * Convert data space db to the corresponding free db. 766 * Convert data space db to the corresponding free db.
531 */ 767 */
532static inline xfs_dir2_db_t 768static inline xfs_dir2_db_t
533xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db) 769xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
534{ 770{
535 return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir2_free_max_bests(mp); 771 return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir3_free_max_bests(mp);
536} 772}
537 773
538/* 774/*
@@ -541,7 +777,7 @@ xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
541static inline int 777static inline int
542xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db) 778xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
543{ 779{
544 return db % xfs_dir2_free_max_bests(mp); 780 return db % xfs_dir3_free_max_bests(mp);
545} 781}
546 782
547/* 783/*
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 60cd2fa4e047..721ba2fe8e54 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * Copyright (c) 2013 Red Hat, Inc.
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -33,97 +34,371 @@
33#include "xfs_dir2_priv.h" 34#include "xfs_dir2_priv.h"
34#include "xfs_error.h" 35#include "xfs_error.h"
35#include "xfs_trace.h" 36#include "xfs_trace.h"
37#include "xfs_buf_item.h"
38#include "xfs_cksum.h"
36 39
37/* 40/*
38 * Local function declarations. 41 * Local function declarations.
39 */ 42 */
40#ifdef DEBUG
41static void xfs_dir2_leaf_check(struct xfs_inode *dp, struct xfs_buf *bp);
42#else
43#define xfs_dir2_leaf_check(dp, bp)
44#endif
45static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, struct xfs_buf **lbpp, 43static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, struct xfs_buf **lbpp,
46 int *indexp, struct xfs_buf **dbpp); 44 int *indexp, struct xfs_buf **dbpp);
47static void xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_buf *bp, 45static void xfs_dir3_leaf_log_bests(struct xfs_trans *tp, struct xfs_buf *bp,
48 int first, int last); 46 int first, int last);
49static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_buf *bp); 47static void xfs_dir3_leaf_log_tail(struct xfs_trans *tp, struct xfs_buf *bp);
50 48
51static void 49/*
52xfs_dir2_leaf_verify( 50 * Check the internal consistency of a leaf1 block.
51 * Pop an assert if something is wrong.
52 */
53#ifdef DEBUG
54#define xfs_dir3_leaf_check(mp, bp) \
55do { \
56 if (!xfs_dir3_leaf1_check((mp), (bp))) \
57 ASSERT(0); \
58} while (0);
59
60STATIC bool
61xfs_dir3_leaf1_check(
62 struct xfs_mount *mp,
63 struct xfs_buf *bp)
64{
65 struct xfs_dir2_leaf *leaf = bp->b_addr;
66 struct xfs_dir3_icleaf_hdr leafhdr;
67
68 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
69
70 if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) {
71 struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
72 if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
73 return false;
74 } else if (leafhdr.magic != XFS_DIR2_LEAF1_MAGIC)
75 return false;
76
77 return xfs_dir3_leaf_check_int(mp, &leafhdr, leaf);
78}
79#else
80#define xfs_dir3_leaf_check(mp, bp)
81#endif
82
83void
84xfs_dir3_leaf_hdr_from_disk(
85 struct xfs_dir3_icleaf_hdr *to,
86 struct xfs_dir2_leaf *from)
87{
88 if (from->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
89 from->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) {
90 to->forw = be32_to_cpu(from->hdr.info.forw);
91 to->back = be32_to_cpu(from->hdr.info.back);
92 to->magic = be16_to_cpu(from->hdr.info.magic);
93 to->count = be16_to_cpu(from->hdr.count);
94 to->stale = be16_to_cpu(from->hdr.stale);
95 } else {
96 struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)from;
97
98 to->forw = be32_to_cpu(hdr3->info.hdr.forw);
99 to->back = be32_to_cpu(hdr3->info.hdr.back);
100 to->magic = be16_to_cpu(hdr3->info.hdr.magic);
101 to->count = be16_to_cpu(hdr3->count);
102 to->stale = be16_to_cpu(hdr3->stale);
103 }
104
105 ASSERT(to->magic == XFS_DIR2_LEAF1_MAGIC ||
106 to->magic == XFS_DIR3_LEAF1_MAGIC ||
107 to->magic == XFS_DIR2_LEAFN_MAGIC ||
108 to->magic == XFS_DIR3_LEAFN_MAGIC);
109}
110
111void
112xfs_dir3_leaf_hdr_to_disk(
113 struct xfs_dir2_leaf *to,
114 struct xfs_dir3_icleaf_hdr *from)
115{
116 ASSERT(from->magic == XFS_DIR2_LEAF1_MAGIC ||
117 from->magic == XFS_DIR3_LEAF1_MAGIC ||
118 from->magic == XFS_DIR2_LEAFN_MAGIC ||
119 from->magic == XFS_DIR3_LEAFN_MAGIC);
120
121 if (from->magic == XFS_DIR2_LEAF1_MAGIC ||
122 from->magic == XFS_DIR2_LEAFN_MAGIC) {
123 to->hdr.info.forw = cpu_to_be32(from->forw);
124 to->hdr.info.back = cpu_to_be32(from->back);
125 to->hdr.info.magic = cpu_to_be16(from->magic);
126 to->hdr.count = cpu_to_be16(from->count);
127 to->hdr.stale = cpu_to_be16(from->stale);
128 } else {
129 struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)to;
130
131 hdr3->info.hdr.forw = cpu_to_be32(from->forw);
132 hdr3->info.hdr.back = cpu_to_be32(from->back);
133 hdr3->info.hdr.magic = cpu_to_be16(from->magic);
134 hdr3->count = cpu_to_be16(from->count);
135 hdr3->stale = cpu_to_be16(from->stale);
136 }
137}
138
139bool
140xfs_dir3_leaf_check_int(
141 struct xfs_mount *mp,
142 struct xfs_dir3_icleaf_hdr *hdr,
143 struct xfs_dir2_leaf *leaf)
144{
145 struct xfs_dir2_leaf_entry *ents;
146 xfs_dir2_leaf_tail_t *ltp;
147 int stale;
148 int i;
149
150 ents = xfs_dir3_leaf_ents_p(leaf);
151 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
152
153 /*
154 * XXX (dgc): This value is not restrictive enough.
155 * Should factor in the size of the bests table as well.
156 * We can deduce a value for that from di_size.
157 */
158 if (hdr->count > xfs_dir3_max_leaf_ents(mp, leaf))
159 return false;
160
161 /* Leaves and bests don't overlap in leaf format. */
162 if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC ||
163 hdr->magic == XFS_DIR3_LEAF1_MAGIC) &&
164 (char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp))
165 return false;
166
167 /* Check hash value order, count stale entries. */
168 for (i = stale = 0; i < hdr->count; i++) {
169 if (i + 1 < hdr->count) {
170 if (be32_to_cpu(ents[i].hashval) >
171 be32_to_cpu(ents[i + 1].hashval))
172 return false;
173 }
174 if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
175 stale++;
176 }
177 if (hdr->stale != stale)
178 return false;
179 return true;
180}
181
182static bool
183xfs_dir3_leaf_verify(
53 struct xfs_buf *bp, 184 struct xfs_buf *bp,
54 __be16 magic) 185 __uint16_t magic)
186{
187 struct xfs_mount *mp = bp->b_target->bt_mount;
188 struct xfs_dir2_leaf *leaf = bp->b_addr;
189 struct xfs_dir3_icleaf_hdr leafhdr;
190
191 ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC);
192
193 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
194 if (xfs_sb_version_hascrc(&mp->m_sb)) {
195 struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
196
197 if ((magic == XFS_DIR2_LEAF1_MAGIC &&
198 leafhdr.magic != XFS_DIR3_LEAF1_MAGIC) ||
199 (magic == XFS_DIR2_LEAFN_MAGIC &&
200 leafhdr.magic != XFS_DIR3_LEAFN_MAGIC))
201 return false;
202
203 if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_uuid))
204 return false;
205 if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
206 return false;
207 } else {
208 if (leafhdr.magic != magic)
209 return false;
210 }
211 return xfs_dir3_leaf_check_int(mp, &leafhdr, leaf);
212}
213
214static void
215__read_verify(
216 struct xfs_buf *bp,
217 __uint16_t magic)
218{
219 struct xfs_mount *mp = bp->b_target->bt_mount;
220
221 if ((xfs_sb_version_hascrc(&mp->m_sb) &&
222 !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
223 XFS_DIR3_LEAF_CRC_OFF)) ||
224 !xfs_dir3_leaf_verify(bp, magic)) {
225 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
226 xfs_buf_ioerror(bp, EFSCORRUPTED);
227 }
228}
229
230static void
231__write_verify(
232 struct xfs_buf *bp,
233 __uint16_t magic)
55{ 234{
56 struct xfs_mount *mp = bp->b_target->bt_mount; 235 struct xfs_mount *mp = bp->b_target->bt_mount;
57 struct xfs_dir2_leaf_hdr *hdr = bp->b_addr; 236 struct xfs_buf_log_item *bip = bp->b_fspriv;
58 int block_ok = 0; 237 struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;
59 238
60 block_ok = hdr->info.magic == magic; 239 if (!xfs_dir3_leaf_verify(bp, magic)) {
61 if (!block_ok) { 240 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
62 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
63 xfs_buf_ioerror(bp, EFSCORRUPTED); 241 xfs_buf_ioerror(bp, EFSCORRUPTED);
242 return;
64 } 243 }
244
245 if (!xfs_sb_version_hascrc(&mp->m_sb))
246 return;
247
248 if (bip)
249 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
250
251 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_LEAF_CRC_OFF);
65} 252}
66 253
67static void 254static void
68xfs_dir2_leaf1_read_verify( 255xfs_dir3_leaf1_read_verify(
69 struct xfs_buf *bp) 256 struct xfs_buf *bp)
70{ 257{
71 xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); 258 __read_verify(bp, XFS_DIR2_LEAF1_MAGIC);
72} 259}
73 260
74static void 261static void
75xfs_dir2_leaf1_write_verify( 262xfs_dir3_leaf1_write_verify(
76 struct xfs_buf *bp) 263 struct xfs_buf *bp)
77{ 264{
78 xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); 265 __write_verify(bp, XFS_DIR2_LEAF1_MAGIC);
79} 266}
80 267
81void 268static void
82xfs_dir2_leafn_read_verify( 269xfs_dir3_leafn_read_verify(
83 struct xfs_buf *bp) 270 struct xfs_buf *bp)
84{ 271{
85 xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 272 __read_verify(bp, XFS_DIR2_LEAFN_MAGIC);
86} 273}
87 274
88void 275static void
89xfs_dir2_leafn_write_verify( 276xfs_dir3_leafn_write_verify(
90 struct xfs_buf *bp) 277 struct xfs_buf *bp)
91{ 278{
92 xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 279 __write_verify(bp, XFS_DIR2_LEAFN_MAGIC);
93} 280}
94 281
95static const struct xfs_buf_ops xfs_dir2_leaf1_buf_ops = { 282const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops = {
96 .verify_read = xfs_dir2_leaf1_read_verify, 283 .verify_read = xfs_dir3_leaf1_read_verify,
97 .verify_write = xfs_dir2_leaf1_write_verify, 284 .verify_write = xfs_dir3_leaf1_write_verify,
98}; 285};
99 286
100const struct xfs_buf_ops xfs_dir2_leafn_buf_ops = { 287const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = {
101 .verify_read = xfs_dir2_leafn_read_verify, 288 .verify_read = xfs_dir3_leafn_read_verify,
102 .verify_write = xfs_dir2_leafn_write_verify, 289 .verify_write = xfs_dir3_leafn_write_verify,
103}; 290};
104 291
105static int 292static int
106xfs_dir2_leaf_read( 293xfs_dir3_leaf_read(
107 struct xfs_trans *tp, 294 struct xfs_trans *tp,
108 struct xfs_inode *dp, 295 struct xfs_inode *dp,
109 xfs_dablk_t fbno, 296 xfs_dablk_t fbno,
110 xfs_daddr_t mappedbno, 297 xfs_daddr_t mappedbno,
111 struct xfs_buf **bpp) 298 struct xfs_buf **bpp)
112{ 299{
113 return xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, 300 int err;
114 XFS_DATA_FORK, &xfs_dir2_leaf1_buf_ops); 301
302 err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
303 XFS_DATA_FORK, &xfs_dir3_leaf1_buf_ops);
304 if (!err && tp)
305 xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAF1_BUF);
306 return err;
115} 307}
116 308
117int 309int
118xfs_dir2_leafn_read( 310xfs_dir3_leafn_read(
119 struct xfs_trans *tp, 311 struct xfs_trans *tp,
120 struct xfs_inode *dp, 312 struct xfs_inode *dp,
121 xfs_dablk_t fbno, 313 xfs_dablk_t fbno,
122 xfs_daddr_t mappedbno, 314 xfs_daddr_t mappedbno,
123 struct xfs_buf **bpp) 315 struct xfs_buf **bpp)
124{ 316{
125 return xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, 317 int err;
126 XFS_DATA_FORK, &xfs_dir2_leafn_buf_ops); 318
319 err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
320 XFS_DATA_FORK, &xfs_dir3_leafn_buf_ops);
321 if (!err && tp)
322 xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAFN_BUF);
323 return err;
324}
325
326/*
327 * Initialize a new leaf block, leaf1 or leafn magic accepted.
328 */
329static void
330xfs_dir3_leaf_init(
331 struct xfs_mount *mp,
332 struct xfs_trans *tp,
333 struct xfs_buf *bp,
334 xfs_ino_t owner,
335 __uint16_t type)
336{
337 struct xfs_dir2_leaf *leaf = bp->b_addr;
338
339 ASSERT(type == XFS_DIR2_LEAF1_MAGIC || type == XFS_DIR2_LEAFN_MAGIC);
340
341 if (xfs_sb_version_hascrc(&mp->m_sb)) {
342 struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
343
344 memset(leaf3, 0, sizeof(*leaf3));
345
346 leaf3->info.hdr.magic = (type == XFS_DIR2_LEAF1_MAGIC)
347 ? cpu_to_be16(XFS_DIR3_LEAF1_MAGIC)
348 : cpu_to_be16(XFS_DIR3_LEAFN_MAGIC);
349 leaf3->info.blkno = cpu_to_be64(bp->b_bn);
350 leaf3->info.owner = cpu_to_be64(owner);
351 uuid_copy(&leaf3->info.uuid, &mp->m_sb.sb_uuid);
352 } else {
353 memset(leaf, 0, sizeof(*leaf));
354 leaf->hdr.info.magic = cpu_to_be16(type);
355 }
356
357 /*
358 * If it's a leaf-format directory initialize the tail.
359 * Caller is responsible for initialising the bests table.
360 */
361 if (type == XFS_DIR2_LEAF1_MAGIC) {
362 struct xfs_dir2_leaf_tail *ltp;
363
364 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
365 ltp->bestcount = 0;
366 bp->b_ops = &xfs_dir3_leaf1_buf_ops;
367 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAF1_BUF);
368 } else {
369 bp->b_ops = &xfs_dir3_leafn_buf_ops;
370 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF);
371 }
372}
373
374int
375xfs_dir3_leaf_get_buf(
376 xfs_da_args_t *args,
377 xfs_dir2_db_t bno,
378 struct xfs_buf **bpp,
379 __uint16_t magic)
380{
381 struct xfs_inode *dp = args->dp;
382 struct xfs_trans *tp = args->trans;
383 struct xfs_mount *mp = dp->i_mount;
384 struct xfs_buf *bp;
385 int error;
386
387 ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC);
388 ASSERT(bno >= XFS_DIR2_LEAF_FIRSTDB(mp) &&
389 bno < XFS_DIR2_FREE_FIRSTDB(mp));
390
391 error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, bno), -1, &bp,
392 XFS_DATA_FORK);
393 if (error)
394 return error;
395
396 xfs_dir3_leaf_init(mp, tp, bp, dp->i_ino, magic);
397 xfs_dir3_leaf_log_header(tp, bp);
398 if (magic == XFS_DIR2_LEAF1_MAGIC)
399 xfs_dir3_leaf_log_tail(tp, bp);
400 *bpp = bp;
401 return 0;
127} 402}
128 403
129/* 404/*
@@ -149,6 +424,9 @@ xfs_dir2_block_to_leaf(
149 int needlog; /* need to log block header */ 424 int needlog; /* need to log block header */
150 int needscan; /* need to rescan bestfree */ 425 int needscan; /* need to rescan bestfree */
151 xfs_trans_t *tp; /* transaction pointer */ 426 xfs_trans_t *tp; /* transaction pointer */
427 struct xfs_dir2_data_free *bf;
428 struct xfs_dir2_leaf_entry *ents;
429 struct xfs_dir3_icleaf_hdr leafhdr;
152 430
153 trace_xfs_dir2_block_to_leaf(args); 431 trace_xfs_dir2_block_to_leaf(args);
154 432
@@ -168,26 +446,33 @@ xfs_dir2_block_to_leaf(
168 /* 446 /*
169 * Initialize the leaf block, get a buffer for it. 447 * Initialize the leaf block, get a buffer for it.
170 */ 448 */
171 if ((error = xfs_dir2_leaf_init(args, ldb, &lbp, XFS_DIR2_LEAF1_MAGIC))) { 449 error = xfs_dir3_leaf_get_buf(args, ldb, &lbp, XFS_DIR2_LEAF1_MAGIC);
450 if (error)
172 return error; 451 return error;
173 } 452
174 ASSERT(lbp != NULL);
175 leaf = lbp->b_addr; 453 leaf = lbp->b_addr;
176 hdr = dbp->b_addr; 454 hdr = dbp->b_addr;
177 xfs_dir2_data_check(dp, dbp); 455 xfs_dir3_data_check(dp, dbp);
178 btp = xfs_dir2_block_tail_p(mp, hdr); 456 btp = xfs_dir2_block_tail_p(mp, hdr);
179 blp = xfs_dir2_block_leaf_p(btp); 457 blp = xfs_dir2_block_leaf_p(btp);
458 bf = xfs_dir3_data_bestfree_p(hdr);
459 ents = xfs_dir3_leaf_ents_p(leaf);
460
180 /* 461 /*
181 * Set the counts in the leaf header. 462 * Set the counts in the leaf header.
182 */ 463 */
183 leaf->hdr.count = cpu_to_be16(be32_to_cpu(btp->count)); 464 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
184 leaf->hdr.stale = cpu_to_be16(be32_to_cpu(btp->stale)); 465 leafhdr.count = be32_to_cpu(btp->count);
466 leafhdr.stale = be32_to_cpu(btp->stale);
467 xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr);
468 xfs_dir3_leaf_log_header(tp, lbp);
469
185 /* 470 /*
186 * Could compact these but I think we always do the conversion 471 * Could compact these but I think we always do the conversion
187 * after squeezing out stale entries. 472 * after squeezing out stale entries.
188 */ 473 */
189 memcpy(leaf->ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t)); 474 memcpy(ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t));
190 xfs_dir2_leaf_log_ents(tp, lbp, 0, be16_to_cpu(leaf->hdr.count) - 1); 475 xfs_dir3_leaf_log_ents(tp, lbp, 0, leafhdr.count - 1);
191 needscan = 0; 476 needscan = 0;
192 needlog = 1; 477 needlog = 1;
193 /* 478 /*
@@ -202,8 +487,13 @@ xfs_dir2_block_to_leaf(
202 /* 487 /*
203 * Fix up the block header, make it a data block. 488 * Fix up the block header, make it a data block.
204 */ 489 */
205 dbp->b_ops = &xfs_dir2_data_buf_ops; 490 dbp->b_ops = &xfs_dir3_data_buf_ops;
206 hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); 491 xfs_trans_buf_set_type(tp, dbp, XFS_BLFT_DIR_DATA_BUF);
492 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))
493 hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
494 else
495 hdr->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
496
207 if (needscan) 497 if (needscan)
208 xfs_dir2_data_freescan(mp, hdr, &needlog); 498 xfs_dir2_data_freescan(mp, hdr, &needlog);
209 /* 499 /*
@@ -212,21 +502,22 @@ xfs_dir2_block_to_leaf(
212 ltp = xfs_dir2_leaf_tail_p(mp, leaf); 502 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
213 ltp->bestcount = cpu_to_be32(1); 503 ltp->bestcount = cpu_to_be32(1);
214 bestsp = xfs_dir2_leaf_bests_p(ltp); 504 bestsp = xfs_dir2_leaf_bests_p(ltp);
215 bestsp[0] = hdr->bestfree[0].length; 505 bestsp[0] = bf[0].length;
216 /* 506 /*
217 * Log the data header and leaf bests table. 507 * Log the data header and leaf bests table.
218 */ 508 */
219 if (needlog) 509 if (needlog)
220 xfs_dir2_data_log_header(tp, dbp); 510 xfs_dir2_data_log_header(tp, dbp);
221 xfs_dir2_leaf_check(dp, lbp); 511 xfs_dir3_leaf_check(mp, lbp);
222 xfs_dir2_data_check(dp, dbp); 512 xfs_dir3_data_check(dp, dbp);
223 xfs_dir2_leaf_log_bests(tp, lbp, 0, 0); 513 xfs_dir3_leaf_log_bests(tp, lbp, 0, 0);
224 return 0; 514 return 0;
225} 515}
226 516
227STATIC void 517STATIC void
228xfs_dir2_leaf_find_stale( 518xfs_dir3_leaf_find_stale(
229 struct xfs_dir2_leaf *leaf, 519 struct xfs_dir3_icleaf_hdr *leafhdr,
520 struct xfs_dir2_leaf_entry *ents,
230 int index, 521 int index,
231 int *lowstale, 522 int *lowstale,
232 int *highstale) 523 int *highstale)
@@ -235,7 +526,7 @@ xfs_dir2_leaf_find_stale(
235 * Find the first stale entry before our index, if any. 526 * Find the first stale entry before our index, if any.
236 */ 527 */
237 for (*lowstale = index - 1; *lowstale >= 0; --*lowstale) { 528 for (*lowstale = index - 1; *lowstale >= 0; --*lowstale) {
238 if (leaf->ents[*lowstale].address == 529 if (ents[*lowstale].address ==
239 cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) 530 cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
240 break; 531 break;
241 } 532 }
@@ -245,10 +536,8 @@ xfs_dir2_leaf_find_stale(
245 * Stop if the result would require moving more entries than using 536 * Stop if the result would require moving more entries than using
246 * lowstale. 537 * lowstale.
247 */ 538 */
248 for (*highstale = index; 539 for (*highstale = index; *highstale < leafhdr->count; ++*highstale) {
249 *highstale < be16_to_cpu(leaf->hdr.count); 540 if (ents[*highstale].address ==
250 ++*highstale) {
251 if (leaf->ents[*highstale].address ==
252 cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) 541 cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
253 break; 542 break;
254 if (*lowstale >= 0 && index - *lowstale <= *highstale - index) 543 if (*lowstale >= 0 && index - *lowstale <= *highstale - index)
@@ -257,8 +546,9 @@ xfs_dir2_leaf_find_stale(
257} 546}
258 547
259struct xfs_dir2_leaf_entry * 548struct xfs_dir2_leaf_entry *
260xfs_dir2_leaf_find_entry( 549xfs_dir3_leaf_find_entry(
261 xfs_dir2_leaf_t *leaf, /* leaf structure */ 550 struct xfs_dir3_icleaf_hdr *leafhdr,
551 struct xfs_dir2_leaf_entry *ents,
262 int index, /* leaf table position */ 552 int index, /* leaf table position */
263 int compact, /* need to compact leaves */ 553 int compact, /* need to compact leaves */
264 int lowstale, /* index of prev stale leaf */ 554 int lowstale, /* index of prev stale leaf */
@@ -266,7 +556,7 @@ xfs_dir2_leaf_find_entry(
266 int *lfloglow, /* low leaf logging index */ 556 int *lfloglow, /* low leaf logging index */
267 int *lfloghigh) /* high leaf logging index */ 557 int *lfloghigh) /* high leaf logging index */
268{ 558{
269 if (!leaf->hdr.stale) { 559 if (!leafhdr->stale) {
270 xfs_dir2_leaf_entry_t *lep; /* leaf entry table pointer */ 560 xfs_dir2_leaf_entry_t *lep; /* leaf entry table pointer */
271 561
272 /* 562 /*
@@ -274,18 +564,16 @@ xfs_dir2_leaf_find_entry(
274 * 564 *
275 * If there are no stale entries, just insert a hole at index. 565 * If there are no stale entries, just insert a hole at index.
276 */ 566 */
277 lep = &leaf->ents[index]; 567 lep = &ents[index];
278 if (index < be16_to_cpu(leaf->hdr.count)) 568 if (index < leafhdr->count)
279 memmove(lep + 1, lep, 569 memmove(lep + 1, lep,
280 (be16_to_cpu(leaf->hdr.count) - index) * 570 (leafhdr->count - index) * sizeof(*lep));
281 sizeof(*lep));
282 571
283 /* 572 /*
284 * Record low and high logging indices for the leaf. 573 * Record low and high logging indices for the leaf.
285 */ 574 */
286 *lfloglow = index; 575 *lfloglow = index;
287 *lfloghigh = be16_to_cpu(leaf->hdr.count); 576 *lfloghigh = leafhdr->count++;
288 be16_add_cpu(&leaf->hdr.count, 1);
289 return lep; 577 return lep;
290 } 578 }
291 579
@@ -299,16 +587,17 @@ xfs_dir2_leaf_find_entry(
299 * entries before and after our insertion point. 587 * entries before and after our insertion point.
300 */ 588 */
301 if (compact == 0) 589 if (compact == 0)
302 xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale); 590 xfs_dir3_leaf_find_stale(leafhdr, ents, index,
591 &lowstale, &highstale);
303 592
304 /* 593 /*
305 * If the low one is better, use it. 594 * If the low one is better, use it.
306 */ 595 */
307 if (lowstale >= 0 && 596 if (lowstale >= 0 &&
308 (highstale == be16_to_cpu(leaf->hdr.count) || 597 (highstale == leafhdr->count ||
309 index - lowstale - 1 < highstale - index)) { 598 index - lowstale - 1 < highstale - index)) {
310 ASSERT(index - lowstale - 1 >= 0); 599 ASSERT(index - lowstale - 1 >= 0);
311 ASSERT(leaf->ents[lowstale].address == 600 ASSERT(ents[lowstale].address ==
312 cpu_to_be32(XFS_DIR2_NULL_DATAPTR)); 601 cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
313 602
314 /* 603 /*
@@ -316,37 +605,34 @@ xfs_dir2_leaf_find_entry(
316 * for the new entry. 605 * for the new entry.
317 */ 606 */
318 if (index - lowstale - 1 > 0) { 607 if (index - lowstale - 1 > 0) {
319 memmove(&leaf->ents[lowstale], 608 memmove(&ents[lowstale], &ents[lowstale + 1],
320 &leaf->ents[lowstale + 1],
321 (index - lowstale - 1) * 609 (index - lowstale - 1) *
322 sizeof(xfs_dir2_leaf_entry_t)); 610 sizeof(xfs_dir2_leaf_entry_t));
323 } 611 }
324 *lfloglow = MIN(lowstale, *lfloglow); 612 *lfloglow = MIN(lowstale, *lfloglow);
325 *lfloghigh = MAX(index - 1, *lfloghigh); 613 *lfloghigh = MAX(index - 1, *lfloghigh);
326 be16_add_cpu(&leaf->hdr.stale, -1); 614 leafhdr->stale--;
327 return &leaf->ents[index - 1]; 615 return &ents[index - 1];
328 } 616 }
329 617
330 /* 618 /*
331 * The high one is better, so use that one. 619 * The high one is better, so use that one.
332 */ 620 */
333 ASSERT(highstale - index >= 0); 621 ASSERT(highstale - index >= 0);
334 ASSERT(leaf->ents[highstale].address == 622 ASSERT(ents[highstale].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
335 cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
336 623
337 /* 624 /*
338 * Copy entries down to cover the stale entry and make room for the 625 * Copy entries down to cover the stale entry and make room for the
339 * new entry. 626 * new entry.
340 */ 627 */
341 if (highstale - index > 0) { 628 if (highstale - index > 0) {
342 memmove(&leaf->ents[index + 1], 629 memmove(&ents[index + 1], &ents[index],
343 &leaf->ents[index],
344 (highstale - index) * sizeof(xfs_dir2_leaf_entry_t)); 630 (highstale - index) * sizeof(xfs_dir2_leaf_entry_t));
345 } 631 }
346 *lfloglow = MIN(index, *lfloglow); 632 *lfloglow = MIN(index, *lfloglow);
347 *lfloghigh = MAX(highstale, *lfloghigh); 633 *lfloghigh = MAX(highstale, *lfloghigh);
348 be16_add_cpu(&leaf->hdr.stale, -1); 634 leafhdr->stale--;
349 return &leaf->ents[index]; 635 return &ents[index];
350} 636}
351 637
352/* 638/*
@@ -383,6 +669,9 @@ xfs_dir2_leaf_addname(
383 __be16 *tagp; /* end of data entry */ 669 __be16 *tagp; /* end of data entry */
384 xfs_trans_t *tp; /* transaction pointer */ 670 xfs_trans_t *tp; /* transaction pointer */
385 xfs_dir2_db_t use_block; /* data block number */ 671 xfs_dir2_db_t use_block; /* data block number */
672 struct xfs_dir2_data_free *bf; /* bestfree table */
673 struct xfs_dir2_leaf_entry *ents;
674 struct xfs_dir3_icleaf_hdr leafhdr;
386 675
387 trace_xfs_dir2_leaf_addname(args); 676 trace_xfs_dir2_leaf_addname(args);
388 677
@@ -390,7 +679,7 @@ xfs_dir2_leaf_addname(
390 tp = args->trans; 679 tp = args->trans;
391 mp = dp->i_mount; 680 mp = dp->i_mount;
392 681
393 error = xfs_dir2_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp); 682 error = xfs_dir3_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp);
394 if (error) 683 if (error)
395 return error; 684 return error;
396 685
@@ -403,16 +692,19 @@ xfs_dir2_leaf_addname(
403 index = xfs_dir2_leaf_search_hash(args, lbp); 692 index = xfs_dir2_leaf_search_hash(args, lbp);
404 leaf = lbp->b_addr; 693 leaf = lbp->b_addr;
405 ltp = xfs_dir2_leaf_tail_p(mp, leaf); 694 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
695 ents = xfs_dir3_leaf_ents_p(leaf);
696 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
406 bestsp = xfs_dir2_leaf_bests_p(ltp); 697 bestsp = xfs_dir2_leaf_bests_p(ltp);
407 length = xfs_dir2_data_entsize(args->namelen); 698 length = xfs_dir2_data_entsize(args->namelen);
699
408 /* 700 /*
409 * See if there are any entries with the same hash value 701 * See if there are any entries with the same hash value
410 * and space in their block for the new entry. 702 * and space in their block for the new entry.
411 * This is good because it puts multiple same-hash value entries 703 * This is good because it puts multiple same-hash value entries
412 * in a data block, improving the lookup of those entries. 704 * in a data block, improving the lookup of those entries.
413 */ 705 */
414 for (use_block = -1, lep = &leaf->ents[index]; 706 for (use_block = -1, lep = &ents[index];
415 index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval; 707 index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval;
416 index++, lep++) { 708 index++, lep++) {
417 if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR) 709 if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
418 continue; 710 continue;
@@ -445,7 +737,7 @@ xfs_dir2_leaf_addname(
445 * How many bytes do we need in the leaf block? 737 * How many bytes do we need in the leaf block?
446 */ 738 */
447 needbytes = 0; 739 needbytes = 0;
448 if (!leaf->hdr.stale) 740 if (!leafhdr.stale)
449 needbytes += sizeof(xfs_dir2_leaf_entry_t); 741 needbytes += sizeof(xfs_dir2_leaf_entry_t);
450 if (use_block == -1) 742 if (use_block == -1)
451 needbytes += sizeof(xfs_dir2_data_off_t); 743 needbytes += sizeof(xfs_dir2_data_off_t);
@@ -460,16 +752,15 @@ xfs_dir2_leaf_addname(
460 * If we don't have enough free bytes but we can make enough 752 * If we don't have enough free bytes but we can make enough
461 * by compacting out stale entries, we'll do that. 753 * by compacting out stale entries, we'll do that.
462 */ 754 */
463 if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] < 755 if ((char *)bestsp - (char *)&ents[leafhdr.count] < needbytes &&
464 needbytes && be16_to_cpu(leaf->hdr.stale) > 1) { 756 leafhdr.stale > 1)
465 compact = 1; 757 compact = 1;
466 } 758
467 /* 759 /*
468 * Otherwise if we don't have enough free bytes we need to 760 * Otherwise if we don't have enough free bytes we need to
469 * convert to node form. 761 * convert to node form.
470 */ 762 */
471 else if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu( 763 else if ((char *)bestsp - (char *)&ents[leafhdr.count] < needbytes) {
472 leaf->hdr.count)] < needbytes) {
473 /* 764 /*
474 * Just checking or no space reservation, give up. 765 * Just checking or no space reservation, give up.
475 */ 766 */
@@ -517,15 +808,15 @@ xfs_dir2_leaf_addname(
517 * point later. 808 * point later.
518 */ 809 */
519 if (compact) { 810 if (compact) {
520 xfs_dir2_leaf_compact_x1(lbp, &index, &lowstale, &highstale, 811 xfs_dir3_leaf_compact_x1(&leafhdr, ents, &index, &lowstale,
521 &lfloglow, &lfloghigh); 812 &highstale, &lfloglow, &lfloghigh);
522 } 813 }
523 /* 814 /*
524 * There are stale entries, so we'll need log-low and log-high 815 * There are stale entries, so we'll need log-low and log-high
525 * impossibly bad values later. 816 * impossibly bad values later.
526 */ 817 */
527 else if (be16_to_cpu(leaf->hdr.stale)) { 818 else if (leafhdr.stale) {
528 lfloglow = be16_to_cpu(leaf->hdr.count); 819 lfloglow = leafhdr.count;
529 lfloghigh = -1; 820 lfloghigh = -1;
530 } 821 }
531 /* 822 /*
@@ -544,7 +835,7 @@ xfs_dir2_leaf_addname(
544 /* 835 /*
545 * Initialize the block. 836 * Initialize the block.
546 */ 837 */
547 if ((error = xfs_dir2_data_init(args, use_block, &dbp))) { 838 if ((error = xfs_dir3_data_init(args, use_block, &dbp))) {
548 xfs_trans_brelse(tp, lbp); 839 xfs_trans_brelse(tp, lbp);
549 return error; 840 return error;
550 } 841 }
@@ -557,23 +848,24 @@ xfs_dir2_leaf_addname(
557 memmove(&bestsp[0], &bestsp[1], 848 memmove(&bestsp[0], &bestsp[1],
558 be32_to_cpu(ltp->bestcount) * sizeof(bestsp[0])); 849 be32_to_cpu(ltp->bestcount) * sizeof(bestsp[0]));
559 be32_add_cpu(&ltp->bestcount, 1); 850 be32_add_cpu(&ltp->bestcount, 1);
560 xfs_dir2_leaf_log_tail(tp, lbp); 851 xfs_dir3_leaf_log_tail(tp, lbp);
561 xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); 852 xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
562 } 853 }
563 /* 854 /*
564 * If we're filling in a previously empty block just log it. 855 * If we're filling in a previously empty block just log it.
565 */ 856 */
566 else 857 else
567 xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block); 858 xfs_dir3_leaf_log_bests(tp, lbp, use_block, use_block);
568 hdr = dbp->b_addr; 859 hdr = dbp->b_addr;
569 bestsp[use_block] = hdr->bestfree[0].length; 860 bf = xfs_dir3_data_bestfree_p(hdr);
861 bestsp[use_block] = bf[0].length;
570 grown = 1; 862 grown = 1;
571 } else { 863 } else {
572 /* 864 /*
573 * Already had space in some data block. 865 * Already had space in some data block.
574 * Just read that one in. 866 * Just read that one in.
575 */ 867 */
576 error = xfs_dir2_data_read(tp, dp, 868 error = xfs_dir3_data_read(tp, dp,
577 xfs_dir2_db_to_da(mp, use_block), 869 xfs_dir2_db_to_da(mp, use_block),
578 -1, &dbp); 870 -1, &dbp);
579 if (error) { 871 if (error) {
@@ -581,13 +873,14 @@ xfs_dir2_leaf_addname(
581 return error; 873 return error;
582 } 874 }
583 hdr = dbp->b_addr; 875 hdr = dbp->b_addr;
876 bf = xfs_dir3_data_bestfree_p(hdr);
584 grown = 0; 877 grown = 0;
585 } 878 }
586 /* 879 /*
587 * Point to the biggest freespace in our data block. 880 * Point to the biggest freespace in our data block.
588 */ 881 */
589 dup = (xfs_dir2_data_unused_t *) 882 dup = (xfs_dir2_data_unused_t *)
590 ((char *)hdr + be16_to_cpu(hdr->bestfree[0].offset)); 883 ((char *)hdr + be16_to_cpu(bf[0].offset));
591 ASSERT(be16_to_cpu(dup->length) >= length); 884 ASSERT(be16_to_cpu(dup->length) >= length);
592 needscan = needlog = 0; 885 needscan = needlog = 0;
593 /* 886 /*
@@ -620,13 +913,13 @@ xfs_dir2_leaf_addname(
620 * If the bests table needs to be changed, do it. 913 * If the bests table needs to be changed, do it.
621 * Log the change unless we've already done that. 914 * Log the change unless we've already done that.
622 */ 915 */
623 if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(hdr->bestfree[0].length)) { 916 if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(bf[0].length)) {
624 bestsp[use_block] = hdr->bestfree[0].length; 917 bestsp[use_block] = bf[0].length;
625 if (!grown) 918 if (!grown)
626 xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block); 919 xfs_dir3_leaf_log_bests(tp, lbp, use_block, use_block);
627 } 920 }
628 921
629 lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale, 922 lep = xfs_dir3_leaf_find_entry(&leafhdr, ents, index, compact, lowstale,
630 highstale, &lfloglow, &lfloghigh); 923 highstale, &lfloglow, &lfloghigh);
631 924
632 /* 925 /*
@@ -638,82 +931,40 @@ xfs_dir2_leaf_addname(
638 /* 931 /*
639 * Log the leaf fields and give up the buffers. 932 * Log the leaf fields and give up the buffers.
640 */ 933 */
641 xfs_dir2_leaf_log_header(tp, lbp); 934 xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr);
642 xfs_dir2_leaf_log_ents(tp, lbp, lfloglow, lfloghigh); 935 xfs_dir3_leaf_log_header(tp, lbp);
643 xfs_dir2_leaf_check(dp, lbp); 936 xfs_dir3_leaf_log_ents(tp, lbp, lfloglow, lfloghigh);
644 xfs_dir2_data_check(dp, dbp); 937 xfs_dir3_leaf_check(mp, lbp);
938 xfs_dir3_data_check(dp, dbp);
645 return 0; 939 return 0;
646} 940}
647 941
648#ifdef DEBUG
649/*
650 * Check the internal consistency of a leaf1 block.
651 * Pop an assert if something is wrong.
652 */
653STATIC void
654xfs_dir2_leaf_check(
655 struct xfs_inode *dp, /* incore directory inode */
656 struct xfs_buf *bp) /* leaf's buffer */
657{
658 int i; /* leaf index */
659 xfs_dir2_leaf_t *leaf; /* leaf structure */
660 xfs_dir2_leaf_tail_t *ltp; /* leaf tail pointer */
661 xfs_mount_t *mp; /* filesystem mount point */
662 int stale; /* count of stale leaves */
663
664 leaf = bp->b_addr;
665 mp = dp->i_mount;
666 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
667 /*
668 * This value is not restrictive enough.
669 * Should factor in the size of the bests table as well.
670 * We can deduce a value for that from di_size.
671 */
672 ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp));
673 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
674 /*
675 * Leaves and bests don't overlap.
676 */
677 ASSERT((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <=
678 (char *)xfs_dir2_leaf_bests_p(ltp));
679 /*
680 * Check hash value order, count stale entries.
681 */
682 for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
683 if (i + 1 < be16_to_cpu(leaf->hdr.count))
684 ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
685 be32_to_cpu(leaf->ents[i + 1].hashval));
686 if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
687 stale++;
688 }
689 ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
690}
691#endif /* DEBUG */
692
693/* 942/*
694 * Compact out any stale entries in the leaf. 943 * Compact out any stale entries in the leaf.
695 * Log the header and changed leaf entries, if any. 944 * Log the header and changed leaf entries, if any.
696 */ 945 */
697void 946void
698xfs_dir2_leaf_compact( 947xfs_dir3_leaf_compact(
699 xfs_da_args_t *args, /* operation arguments */ 948 xfs_da_args_t *args, /* operation arguments */
949 struct xfs_dir3_icleaf_hdr *leafhdr,
700 struct xfs_buf *bp) /* leaf buffer */ 950 struct xfs_buf *bp) /* leaf buffer */
701{ 951{
702 int from; /* source leaf index */ 952 int from; /* source leaf index */
703 xfs_dir2_leaf_t *leaf; /* leaf structure */ 953 xfs_dir2_leaf_t *leaf; /* leaf structure */
704 int loglow; /* first leaf entry to log */ 954 int loglow; /* first leaf entry to log */
705 int to; /* target leaf index */ 955 int to; /* target leaf index */
956 struct xfs_dir2_leaf_entry *ents;
706 957
707 leaf = bp->b_addr; 958 leaf = bp->b_addr;
708 if (!leaf->hdr.stale) { 959 if (!leafhdr->stale)
709 return; 960 return;
710 } 961
711 /* 962 /*
712 * Compress out the stale entries in place. 963 * Compress out the stale entries in place.
713 */ 964 */
714 for (from = to = 0, loglow = -1; from < be16_to_cpu(leaf->hdr.count); from++) { 965 ents = xfs_dir3_leaf_ents_p(leaf);
715 if (leaf->ents[from].address == 966 for (from = to = 0, loglow = -1; from < leafhdr->count; from++) {
716 cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) 967 if (ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
717 continue; 968 continue;
718 /* 969 /*
719 * Only actually copy the entries that are different. 970 * Only actually copy the entries that are different.
@@ -721,19 +972,21 @@ xfs_dir2_leaf_compact(
721 if (from > to) { 972 if (from > to) {
722 if (loglow == -1) 973 if (loglow == -1)
723 loglow = to; 974 loglow = to;
724 leaf->ents[to] = leaf->ents[from]; 975 ents[to] = ents[from];
725 } 976 }
726 to++; 977 to++;
727 } 978 }
728 /* 979 /*
729 * Update and log the header, log the leaf entries. 980 * Update and log the header, log the leaf entries.
730 */ 981 */
731 ASSERT(be16_to_cpu(leaf->hdr.stale) == from - to); 982 ASSERT(leafhdr->stale == from - to);
732 be16_add_cpu(&leaf->hdr.count, -(be16_to_cpu(leaf->hdr.stale))); 983 leafhdr->count -= leafhdr->stale;
733 leaf->hdr.stale = 0; 984 leafhdr->stale = 0;
734 xfs_dir2_leaf_log_header(args->trans, bp); 985
986 xfs_dir3_leaf_hdr_to_disk(leaf, leafhdr);
987 xfs_dir3_leaf_log_header(args->trans, bp);
735 if (loglow != -1) 988 if (loglow != -1)
736 xfs_dir2_leaf_log_ents(args->trans, bp, loglow, to - 1); 989 xfs_dir3_leaf_log_ents(args->trans, bp, loglow, to - 1);
737} 990}
738 991
739/* 992/*
@@ -745,8 +998,9 @@ xfs_dir2_leaf_compact(
745 * and leaf logging indices. 998 * and leaf logging indices.
746 */ 999 */
747void 1000void
748xfs_dir2_leaf_compact_x1( 1001xfs_dir3_leaf_compact_x1(
749 struct xfs_buf *bp, /* leaf buffer */ 1002 struct xfs_dir3_icleaf_hdr *leafhdr,
1003 struct xfs_dir2_leaf_entry *ents,
750 int *indexp, /* insertion index */ 1004 int *indexp, /* insertion index */
751 int *lowstalep, /* out: stale entry before us */ 1005 int *lowstalep, /* out: stale entry before us */
752 int *highstalep, /* out: stale entry after us */ 1006 int *highstalep, /* out: stale entry after us */
@@ -757,22 +1011,20 @@ xfs_dir2_leaf_compact_x1(
757 int highstale; /* stale entry at/after index */ 1011 int highstale; /* stale entry at/after index */
758 int index; /* insertion index */ 1012 int index; /* insertion index */
759 int keepstale; /* source index of kept stale */ 1013 int keepstale; /* source index of kept stale */
760 xfs_dir2_leaf_t *leaf; /* leaf structure */
761 int lowstale; /* stale entry before index */ 1014 int lowstale; /* stale entry before index */
762 int newindex=0; /* new insertion index */ 1015 int newindex=0; /* new insertion index */
763 int to; /* destination copy index */ 1016 int to; /* destination copy index */
764 1017
765 leaf = bp->b_addr; 1018 ASSERT(leafhdr->stale > 1);
766 ASSERT(be16_to_cpu(leaf->hdr.stale) > 1);
767 index = *indexp; 1019 index = *indexp;
768 1020
769 xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale); 1021 xfs_dir3_leaf_find_stale(leafhdr, ents, index, &lowstale, &highstale);
770 1022
771 /* 1023 /*
772 * Pick the better of lowstale and highstale. 1024 * Pick the better of lowstale and highstale.
773 */ 1025 */
774 if (lowstale >= 0 && 1026 if (lowstale >= 0 &&
775 (highstale == be16_to_cpu(leaf->hdr.count) || 1027 (highstale == leafhdr->count ||
776 index - lowstale <= highstale - index)) 1028 index - lowstale <= highstale - index))
777 keepstale = lowstale; 1029 keepstale = lowstale;
778 else 1030 else
@@ -781,15 +1033,14 @@ xfs_dir2_leaf_compact_x1(
781 * Copy the entries in place, removing all the stale entries 1033 * Copy the entries in place, removing all the stale entries
782 * except keepstale. 1034 * except keepstale.
783 */ 1035 */
784 for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) { 1036 for (from = to = 0; from < leafhdr->count; from++) {
785 /* 1037 /*
786 * Notice the new value of index. 1038 * Notice the new value of index.
787 */ 1039 */
788 if (index == from) 1040 if (index == from)
789 newindex = to; 1041 newindex = to;
790 if (from != keepstale && 1042 if (from != keepstale &&
791 leaf->ents[from].address == 1043 ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
792 cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
793 if (from == to) 1044 if (from == to)
794 *lowlogp = to; 1045 *lowlogp = to;
795 continue; 1046 continue;
@@ -803,7 +1054,7 @@ xfs_dir2_leaf_compact_x1(
803 * Copy only the entries that have moved. 1054 * Copy only the entries that have moved.
804 */ 1055 */
805 if (from > to) 1056 if (from > to)
806 leaf->ents[to] = leaf->ents[from]; 1057 ents[to] = ents[from];
807 to++; 1058 to++;
808 } 1059 }
809 ASSERT(from > to); 1060 ASSERT(from > to);
@@ -817,8 +1068,8 @@ xfs_dir2_leaf_compact_x1(
817 /* 1068 /*
818 * Adjust the leaf header values. 1069 * Adjust the leaf header values.
819 */ 1070 */
820 be16_add_cpu(&leaf->hdr.count, -(from - to)); 1071 leafhdr->count -= from - to;
821 leaf->hdr.stale = cpu_to_be16(1); 1072 leafhdr->stale = 1;
822 /* 1073 /*
823 * Remember the low/high stale value only in the "right" 1074 * Remember the low/high stale value only in the "right"
824 * direction. 1075 * direction.
@@ -826,8 +1077,8 @@ xfs_dir2_leaf_compact_x1(
826 if (lowstale >= newindex) 1077 if (lowstale >= newindex)
827 lowstale = -1; 1078 lowstale = -1;
828 else 1079 else
829 highstale = be16_to_cpu(leaf->hdr.count); 1080 highstale = leafhdr->count;
830 *highlogp = be16_to_cpu(leaf->hdr.count) - 1; 1081 *highlogp = leafhdr->count - 1;
831 *lowstalep = lowstale; 1082 *lowstalep = lowstale;
832 *highstalep = highstale; 1083 *highstalep = highstale;
833} 1084}
@@ -965,7 +1216,7 @@ xfs_dir2_leaf_readbuf(
965 * Read the directory block starting at the first mapping. 1216 * Read the directory block starting at the first mapping.
966 */ 1217 */
967 mip->curdb = xfs_dir2_da_to_db(mp, map->br_startoff); 1218 mip->curdb = xfs_dir2_da_to_db(mp, map->br_startoff);
968 error = xfs_dir2_data_read(NULL, dp, map->br_startoff, 1219 error = xfs_dir3_data_read(NULL, dp, map->br_startoff,
969 map->br_blockcount >= mp->m_dirblkfsbs ? 1220 map->br_blockcount >= mp->m_dirblkfsbs ?
970 XFS_FSB_TO_DADDR(mp, map->br_startblock) : -1, &bp); 1221 XFS_FSB_TO_DADDR(mp, map->br_startblock) : -1, &bp);
971 1222
@@ -994,7 +1245,7 @@ xfs_dir2_leaf_readbuf(
994 */ 1245 */
995 if (i > mip->ra_current && 1246 if (i > mip->ra_current &&
996 map[mip->ra_index].br_blockcount >= mp->m_dirblkfsbs) { 1247 map[mip->ra_index].br_blockcount >= mp->m_dirblkfsbs) {
997 xfs_dir2_data_readahead(NULL, dp, 1248 xfs_dir3_data_readahead(NULL, dp,
998 map[mip->ra_index].br_startoff + mip->ra_offset, 1249 map[mip->ra_index].br_startoff + mip->ra_offset,
999 XFS_FSB_TO_DADDR(mp, 1250 XFS_FSB_TO_DADDR(mp,
1000 map[mip->ra_index].br_startblock + 1251 map[mip->ra_index].br_startblock +
@@ -1007,7 +1258,7 @@ xfs_dir2_leaf_readbuf(
1007 * use our mapping, but this is a very rare case. 1258 * use our mapping, but this is a very rare case.
1008 */ 1259 */
1009 else if (i > mip->ra_current) { 1260 else if (i > mip->ra_current) {
1010 xfs_dir2_data_readahead(NULL, dp, 1261 xfs_dir3_data_readahead(NULL, dp,
1011 map[mip->ra_index].br_startoff + 1262 map[mip->ra_index].br_startoff +
1012 mip->ra_offset, -1); 1263 mip->ra_offset, -1);
1013 mip->ra_current = i; 1264 mip->ra_current = i;
@@ -1133,17 +1384,17 @@ xfs_dir2_leaf_getdents(
1133 ASSERT(xfs_dir2_byte_to_db(mp, curoff) == 1384 ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
1134 map_info->curdb); 1385 map_info->curdb);
1135 hdr = bp->b_addr; 1386 hdr = bp->b_addr;
1136 xfs_dir2_data_check(dp, bp); 1387 xfs_dir3_data_check(dp, bp);
1137 /* 1388 /*
1138 * Find our position in the block. 1389 * Find our position in the block.
1139 */ 1390 */
1140 ptr = (char *)(hdr + 1); 1391 ptr = (char *)xfs_dir3_data_entry_p(hdr);
1141 byteoff = xfs_dir2_byte_to_off(mp, curoff); 1392 byteoff = xfs_dir2_byte_to_off(mp, curoff);
1142 /* 1393 /*
1143 * Skip past the header. 1394 * Skip past the header.
1144 */ 1395 */
1145 if (byteoff == 0) 1396 if (byteoff == 0)
1146 curoff += (uint)sizeof(*hdr); 1397 curoff += xfs_dir3_data_entry_offset(hdr);
1147 /* 1398 /*
1148 * Skip past entries until we reach our offset. 1399 * Skip past entries until we reach our offset.
1149 */ 1400 */
@@ -1220,69 +1471,12 @@ xfs_dir2_leaf_getdents(
1220 return error; 1471 return error;
1221} 1472}
1222 1473
1223/*
1224 * Initialize a new leaf block, leaf1 or leafn magic accepted.
1225 */
1226int
1227xfs_dir2_leaf_init(
1228 xfs_da_args_t *args, /* operation arguments */
1229 xfs_dir2_db_t bno, /* directory block number */
1230 struct xfs_buf **bpp, /* out: leaf buffer */
1231 int magic) /* magic number for block */
1232{
1233 struct xfs_buf *bp; /* leaf buffer */
1234 xfs_inode_t *dp; /* incore directory inode */
1235 int error; /* error return code */
1236 xfs_dir2_leaf_t *leaf; /* leaf structure */
1237 xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */
1238 xfs_mount_t *mp; /* filesystem mount point */
1239 xfs_trans_t *tp; /* transaction pointer */
1240
1241 dp = args->dp;
1242 ASSERT(dp != NULL);
1243 tp = args->trans;
1244 mp = dp->i_mount;
1245 ASSERT(bno >= XFS_DIR2_LEAF_FIRSTDB(mp) &&
1246 bno < XFS_DIR2_FREE_FIRSTDB(mp));
1247 /*
1248 * Get the buffer for the block.
1249 */
1250 error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, bno), -1, &bp,
1251 XFS_DATA_FORK);
1252 if (error)
1253 return error;
1254
1255 /*
1256 * Initialize the header.
1257 */
1258 leaf = bp->b_addr;
1259 leaf->hdr.info.magic = cpu_to_be16(magic);
1260 leaf->hdr.info.forw = 0;
1261 leaf->hdr.info.back = 0;
1262 leaf->hdr.count = 0;
1263 leaf->hdr.stale = 0;
1264 xfs_dir2_leaf_log_header(tp, bp);
1265 /*
1266 * If it's a leaf-format directory initialize the tail.
1267 * In this case our caller has the real bests table to copy into
1268 * the block.
1269 */
1270 if (magic == XFS_DIR2_LEAF1_MAGIC) {
1271 bp->b_ops = &xfs_dir2_leaf1_buf_ops;
1272 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1273 ltp->bestcount = 0;
1274 xfs_dir2_leaf_log_tail(tp, bp);
1275 } else
1276 bp->b_ops = &xfs_dir2_leafn_buf_ops;
1277 *bpp = bp;
1278 return 0;
1279}
1280 1474
1281/* 1475/*
1282 * Log the bests entries indicated from a leaf1 block. 1476 * Log the bests entries indicated from a leaf1 block.
1283 */ 1477 */
1284static void 1478static void
1285xfs_dir2_leaf_log_bests( 1479xfs_dir3_leaf_log_bests(
1286 xfs_trans_t *tp, /* transaction pointer */ 1480 xfs_trans_t *tp, /* transaction pointer */
1287 struct xfs_buf *bp, /* leaf buffer */ 1481 struct xfs_buf *bp, /* leaf buffer */
1288 int first, /* first entry to log */ 1482 int first, /* first entry to log */
@@ -1290,11 +1484,12 @@ xfs_dir2_leaf_log_bests(
1290{ 1484{
1291 __be16 *firstb; /* pointer to first entry */ 1485 __be16 *firstb; /* pointer to first entry */
1292 __be16 *lastb; /* pointer to last entry */ 1486 __be16 *lastb; /* pointer to last entry */
1293 xfs_dir2_leaf_t *leaf; /* leaf structure */ 1487 struct xfs_dir2_leaf *leaf = bp->b_addr;
1294 xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ 1488 xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */
1295 1489
1296 leaf = bp->b_addr; 1490 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
1297 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); 1491 leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC));
1492
1298 ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf); 1493 ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf);
1299 firstb = xfs_dir2_leaf_bests_p(ltp) + first; 1494 firstb = xfs_dir2_leaf_bests_p(ltp) + first;
1300 lastb = xfs_dir2_leaf_bests_p(ltp) + last; 1495 lastb = xfs_dir2_leaf_bests_p(ltp) + last;
@@ -1306,7 +1501,7 @@ xfs_dir2_leaf_log_bests(
1306 * Log the leaf entries indicated from a leaf1 or leafn block. 1501 * Log the leaf entries indicated from a leaf1 or leafn block.
1307 */ 1502 */
1308void 1503void
1309xfs_dir2_leaf_log_ents( 1504xfs_dir3_leaf_log_ents(
1310 xfs_trans_t *tp, /* transaction pointer */ 1505 xfs_trans_t *tp, /* transaction pointer */
1311 struct xfs_buf *bp, /* leaf buffer */ 1506 struct xfs_buf *bp, /* leaf buffer */
1312 int first, /* first entry to log */ 1507 int first, /* first entry to log */
@@ -1314,13 +1509,17 @@ xfs_dir2_leaf_log_ents(
1314{ 1509{
1315 xfs_dir2_leaf_entry_t *firstlep; /* pointer to first entry */ 1510 xfs_dir2_leaf_entry_t *firstlep; /* pointer to first entry */
1316 xfs_dir2_leaf_entry_t *lastlep; /* pointer to last entry */ 1511 xfs_dir2_leaf_entry_t *lastlep; /* pointer to last entry */
1317 xfs_dir2_leaf_t *leaf; /* leaf structure */ 1512 struct xfs_dir2_leaf *leaf = bp->b_addr;
1513 struct xfs_dir2_leaf_entry *ents;
1318 1514
1319 leaf = bp->b_addr;
1320 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || 1515 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
1321 leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 1516 leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
1322 firstlep = &leaf->ents[first]; 1517 leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
1323 lastlep = &leaf->ents[last]; 1518 leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC));
1519
1520 ents = xfs_dir3_leaf_ents_p(leaf);
1521 firstlep = &ents[first];
1522 lastlep = &ents[last];
1324 xfs_trans_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf), 1523 xfs_trans_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf),
1325 (uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1)); 1524 (uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1));
1326} 1525}
@@ -1329,34 +1528,38 @@ xfs_dir2_leaf_log_ents(
1329 * Log the header of the leaf1 or leafn block. 1528 * Log the header of the leaf1 or leafn block.
1330 */ 1529 */
1331void 1530void
1332xfs_dir2_leaf_log_header( 1531xfs_dir3_leaf_log_header(
1333 struct xfs_trans *tp, 1532 struct xfs_trans *tp,
1334 struct xfs_buf *bp) 1533 struct xfs_buf *bp)
1335{ 1534{
1336 xfs_dir2_leaf_t *leaf; /* leaf structure */ 1535 struct xfs_dir2_leaf *leaf = bp->b_addr;
1337 1536
1338 leaf = bp->b_addr;
1339 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || 1537 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
1340 leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 1538 leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
1539 leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
1540 leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC));
1541
1341 xfs_trans_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf), 1542 xfs_trans_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf),
1342 (uint)(sizeof(leaf->hdr) - 1)); 1543 xfs_dir3_leaf_hdr_size(leaf) - 1);
1343} 1544}
1344 1545
1345/* 1546/*
1346 * Log the tail of the leaf1 block. 1547 * Log the tail of the leaf1 block.
1347 */ 1548 */
1348STATIC void 1549STATIC void
1349xfs_dir2_leaf_log_tail( 1550xfs_dir3_leaf_log_tail(
1350 struct xfs_trans *tp, 1551 struct xfs_trans *tp,
1351 struct xfs_buf *bp) 1552 struct xfs_buf *bp)
1352{ 1553{
1353 xfs_dir2_leaf_t *leaf; /* leaf structure */ 1554 struct xfs_dir2_leaf *leaf = bp->b_addr;
1354 xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ 1555 xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */
1355 xfs_mount_t *mp; /* filesystem mount point */ 1556 struct xfs_mount *mp = tp->t_mountp;
1557
1558 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
1559 leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
1560 leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
1561 leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC));
1356 1562
1357 mp = tp->t_mountp;
1358 leaf = bp->b_addr;
1359 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
1360 ltp = xfs_dir2_leaf_tail_p(mp, leaf); 1563 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1361 xfs_trans_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf), 1564 xfs_trans_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
1362 (uint)(mp->m_dirblksize - 1)); 1565 (uint)(mp->m_dirblksize - 1));
@@ -1380,6 +1583,7 @@ xfs_dir2_leaf_lookup(
1380 xfs_dir2_leaf_t *leaf; /* leaf structure */ 1583 xfs_dir2_leaf_t *leaf; /* leaf structure */
1381 xfs_dir2_leaf_entry_t *lep; /* leaf entry */ 1584 xfs_dir2_leaf_entry_t *lep; /* leaf entry */
1382 xfs_trans_t *tp; /* transaction pointer */ 1585 xfs_trans_t *tp; /* transaction pointer */
1586 struct xfs_dir2_leaf_entry *ents;
1383 1587
1384 trace_xfs_dir2_leaf_lookup(args); 1588 trace_xfs_dir2_leaf_lookup(args);
1385 1589
@@ -1391,12 +1595,14 @@ xfs_dir2_leaf_lookup(
1391 } 1595 }
1392 tp = args->trans; 1596 tp = args->trans;
1393 dp = args->dp; 1597 dp = args->dp;
1394 xfs_dir2_leaf_check(dp, lbp); 1598 xfs_dir3_leaf_check(dp->i_mount, lbp);
1395 leaf = lbp->b_addr; 1599 leaf = lbp->b_addr;
1600 ents = xfs_dir3_leaf_ents_p(leaf);
1396 /* 1601 /*
1397 * Get to the leaf entry and contained data entry address. 1602 * Get to the leaf entry and contained data entry address.
1398 */ 1603 */
1399 lep = &leaf->ents[index]; 1604 lep = &ents[index];
1605
1400 /* 1606 /*
1401 * Point to the data entry. 1607 * Point to the data entry.
1402 */ 1608 */
@@ -1440,18 +1646,23 @@ xfs_dir2_leaf_lookup_int(
1440 xfs_trans_t *tp; /* transaction pointer */ 1646 xfs_trans_t *tp; /* transaction pointer */
1441 xfs_dir2_db_t cidb = -1; /* case match data block no. */ 1647 xfs_dir2_db_t cidb = -1; /* case match data block no. */
1442 enum xfs_dacmp cmp; /* name compare result */ 1648 enum xfs_dacmp cmp; /* name compare result */
1649 struct xfs_dir2_leaf_entry *ents;
1650 struct xfs_dir3_icleaf_hdr leafhdr;
1443 1651
1444 dp = args->dp; 1652 dp = args->dp;
1445 tp = args->trans; 1653 tp = args->trans;
1446 mp = dp->i_mount; 1654 mp = dp->i_mount;
1447 1655
1448 error = xfs_dir2_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp); 1656 error = xfs_dir3_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp);
1449 if (error) 1657 if (error)
1450 return error; 1658 return error;
1451 1659
1452 *lbpp = lbp; 1660 *lbpp = lbp;
1453 leaf = lbp->b_addr; 1661 leaf = lbp->b_addr;
1454 xfs_dir2_leaf_check(dp, lbp); 1662 xfs_dir3_leaf_check(mp, lbp);
1663 ents = xfs_dir3_leaf_ents_p(leaf);
1664 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
1665
1455 /* 1666 /*
1456 * Look for the first leaf entry with our hash value. 1667 * Look for the first leaf entry with our hash value.
1457 */ 1668 */
@@ -1460,9 +1671,9 @@ xfs_dir2_leaf_lookup_int(
1460 * Loop over all the entries with the right hash value 1671 * Loop over all the entries with the right hash value
1461 * looking to match the name. 1672 * looking to match the name.
1462 */ 1673 */
1463 for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) && 1674 for (lep = &ents[index];
1464 be32_to_cpu(lep->hashval) == args->hashval; 1675 index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval;
1465 lep++, index++) { 1676 lep++, index++) {
1466 /* 1677 /*
1467 * Skip over stale leaf entries. 1678 * Skip over stale leaf entries.
1468 */ 1679 */
@@ -1479,7 +1690,7 @@ xfs_dir2_leaf_lookup_int(
1479 if (newdb != curdb) { 1690 if (newdb != curdb) {
1480 if (dbp) 1691 if (dbp)
1481 xfs_trans_brelse(tp, dbp); 1692 xfs_trans_brelse(tp, dbp);
1482 error = xfs_dir2_data_read(tp, dp, 1693 error = xfs_dir3_data_read(tp, dp,
1483 xfs_dir2_db_to_da(mp, newdb), 1694 xfs_dir2_db_to_da(mp, newdb),
1484 -1, &dbp); 1695 -1, &dbp);
1485 if (error) { 1696 if (error) {
@@ -1520,7 +1731,7 @@ xfs_dir2_leaf_lookup_int(
1520 ASSERT(cidb != -1); 1731 ASSERT(cidb != -1);
1521 if (cidb != curdb) { 1732 if (cidb != curdb) {
1522 xfs_trans_brelse(tp, dbp); 1733 xfs_trans_brelse(tp, dbp);
1523 error = xfs_dir2_data_read(tp, dp, 1734 error = xfs_dir3_data_read(tp, dp,
1524 xfs_dir2_db_to_da(mp, cidb), 1735 xfs_dir2_db_to_da(mp, cidb),
1525 -1, &dbp); 1736 -1, &dbp);
1526 if (error) { 1737 if (error) {
@@ -1566,6 +1777,9 @@ xfs_dir2_leaf_removename(
1566 int needscan; /* need to rescan data frees */ 1777 int needscan; /* need to rescan data frees */
1567 xfs_dir2_data_off_t oldbest; /* old value of best free */ 1778 xfs_dir2_data_off_t oldbest; /* old value of best free */
1568 xfs_trans_t *tp; /* transaction pointer */ 1779 xfs_trans_t *tp; /* transaction pointer */
1780 struct xfs_dir2_data_free *bf; /* bestfree table */
1781 struct xfs_dir2_leaf_entry *ents;
1782 struct xfs_dir3_icleaf_hdr leafhdr;
1569 1783
1570 trace_xfs_dir2_leaf_removename(args); 1784 trace_xfs_dir2_leaf_removename(args);
1571 1785
@@ -1580,16 +1794,19 @@ xfs_dir2_leaf_removename(
1580 mp = dp->i_mount; 1794 mp = dp->i_mount;
1581 leaf = lbp->b_addr; 1795 leaf = lbp->b_addr;
1582 hdr = dbp->b_addr; 1796 hdr = dbp->b_addr;
1583 xfs_dir2_data_check(dp, dbp); 1797 xfs_dir3_data_check(dp, dbp);
1798 bf = xfs_dir3_data_bestfree_p(hdr);
1799 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
1800 ents = xfs_dir3_leaf_ents_p(leaf);
1584 /* 1801 /*
1585 * Point to the leaf entry, use that to point to the data entry. 1802 * Point to the leaf entry, use that to point to the data entry.
1586 */ 1803 */
1587 lep = &leaf->ents[index]; 1804 lep = &ents[index];
1588 db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); 1805 db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
1589 dep = (xfs_dir2_data_entry_t *) 1806 dep = (xfs_dir2_data_entry_t *)
1590 ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); 1807 ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
1591 needscan = needlog = 0; 1808 needscan = needlog = 0;
1592 oldbest = be16_to_cpu(hdr->bestfree[0].length); 1809 oldbest = be16_to_cpu(bf[0].length);
1593 ltp = xfs_dir2_leaf_tail_p(mp, leaf); 1810 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1594 bestsp = xfs_dir2_leaf_bests_p(ltp); 1811 bestsp = xfs_dir2_leaf_bests_p(ltp);
1595 ASSERT(be16_to_cpu(bestsp[db]) == oldbest); 1812 ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
@@ -1602,10 +1819,13 @@ xfs_dir2_leaf_removename(
1602 /* 1819 /*
1603 * We just mark the leaf entry stale by putting a null in it. 1820 * We just mark the leaf entry stale by putting a null in it.
1604 */ 1821 */
1605 be16_add_cpu(&leaf->hdr.stale, 1); 1822 leafhdr.stale++;
1606 xfs_dir2_leaf_log_header(tp, lbp); 1823 xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr);
1824 xfs_dir3_leaf_log_header(tp, lbp);
1825
1607 lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); 1826 lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
1608 xfs_dir2_leaf_log_ents(tp, lbp, index, index); 1827 xfs_dir3_leaf_log_ents(tp, lbp, index, index);
1828
1609 /* 1829 /*
1610 * Scan the freespace in the data block again if necessary, 1830 * Scan the freespace in the data block again if necessary,
1611 * log the data block header if necessary. 1831 * log the data block header if necessary.
@@ -1618,16 +1838,16 @@ xfs_dir2_leaf_removename(
1618 * If the longest freespace in the data block has changed, 1838 * If the longest freespace in the data block has changed,
1619 * put the new value in the bests table and log that. 1839 * put the new value in the bests table and log that.
1620 */ 1840 */
1621 if (be16_to_cpu(hdr->bestfree[0].length) != oldbest) { 1841 if (be16_to_cpu(bf[0].length) != oldbest) {
1622 bestsp[db] = hdr->bestfree[0].length; 1842 bestsp[db] = bf[0].length;
1623 xfs_dir2_leaf_log_bests(tp, lbp, db, db); 1843 xfs_dir3_leaf_log_bests(tp, lbp, db, db);
1624 } 1844 }
1625 xfs_dir2_data_check(dp, dbp); 1845 xfs_dir3_data_check(dp, dbp);
1626 /* 1846 /*
1627 * If the data block is now empty then get rid of the data block. 1847 * If the data block is now empty then get rid of the data block.
1628 */ 1848 */
1629 if (be16_to_cpu(hdr->bestfree[0].length) == 1849 if (be16_to_cpu(bf[0].length) ==
1630 mp->m_dirblksize - (uint)sizeof(*hdr)) { 1850 mp->m_dirblksize - xfs_dir3_data_entry_offset(hdr)) {
1631 ASSERT(db != mp->m_dirdatablk); 1851 ASSERT(db != mp->m_dirdatablk);
1632 if ((error = xfs_dir2_shrink_inode(args, db, dbp))) { 1852 if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
1633 /* 1853 /*
@@ -1638,7 +1858,7 @@ xfs_dir2_leaf_removename(
1638 */ 1858 */
1639 if (error == ENOSPC && args->total == 0) 1859 if (error == ENOSPC && args->total == 0)
1640 error = 0; 1860 error = 0;
1641 xfs_dir2_leaf_check(dp, lbp); 1861 xfs_dir3_leaf_check(mp, lbp);
1642 return error; 1862 return error;
1643 } 1863 }
1644 dbp = NULL; 1864 dbp = NULL;
@@ -1661,8 +1881,8 @@ xfs_dir2_leaf_removename(
1661 memmove(&bestsp[db - i], bestsp, 1881 memmove(&bestsp[db - i], bestsp,
1662 (be32_to_cpu(ltp->bestcount) - (db - i)) * sizeof(*bestsp)); 1882 (be32_to_cpu(ltp->bestcount) - (db - i)) * sizeof(*bestsp));
1663 be32_add_cpu(&ltp->bestcount, -(db - i)); 1883 be32_add_cpu(&ltp->bestcount, -(db - i));
1664 xfs_dir2_leaf_log_tail(tp, lbp); 1884 xfs_dir3_leaf_log_tail(tp, lbp);
1665 xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); 1885 xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
1666 } else 1886 } else
1667 bestsp[db] = cpu_to_be16(NULLDATAOFF); 1887 bestsp[db] = cpu_to_be16(NULLDATAOFF);
1668 } 1888 }
@@ -1672,7 +1892,7 @@ xfs_dir2_leaf_removename(
1672 else if (db != mp->m_dirdatablk) 1892 else if (db != mp->m_dirdatablk)
1673 dbp = NULL; 1893 dbp = NULL;
1674 1894
1675 xfs_dir2_leaf_check(dp, lbp); 1895 xfs_dir3_leaf_check(mp, lbp);
1676 /* 1896 /*
1677 * See if we can convert to block form. 1897 * See if we can convert to block form.
1678 */ 1898 */
@@ -1695,6 +1915,7 @@ xfs_dir2_leaf_replace(
1695 xfs_dir2_leaf_t *leaf; /* leaf structure */ 1915 xfs_dir2_leaf_t *leaf; /* leaf structure */
1696 xfs_dir2_leaf_entry_t *lep; /* leaf entry */ 1916 xfs_dir2_leaf_entry_t *lep; /* leaf entry */
1697 xfs_trans_t *tp; /* transaction pointer */ 1917 xfs_trans_t *tp; /* transaction pointer */
1918 struct xfs_dir2_leaf_entry *ents;
1698 1919
1699 trace_xfs_dir2_leaf_replace(args); 1920 trace_xfs_dir2_leaf_replace(args);
1700 1921
@@ -1706,10 +1927,11 @@ xfs_dir2_leaf_replace(
1706 } 1927 }
1707 dp = args->dp; 1928 dp = args->dp;
1708 leaf = lbp->b_addr; 1929 leaf = lbp->b_addr;
1930 ents = xfs_dir3_leaf_ents_p(leaf);
1709 /* 1931 /*
1710 * Point to the leaf entry, get data address from it. 1932 * Point to the leaf entry, get data address from it.
1711 */ 1933 */
1712 lep = &leaf->ents[index]; 1934 lep = &ents[index];
1713 /* 1935 /*
1714 * Point to the data entry. 1936 * Point to the data entry.
1715 */ 1937 */
@@ -1723,7 +1945,7 @@ xfs_dir2_leaf_replace(
1723 dep->inumber = cpu_to_be64(args->inumber); 1945 dep->inumber = cpu_to_be64(args->inumber);
1724 tp = args->trans; 1946 tp = args->trans;
1725 xfs_dir2_data_log_entry(tp, dbp, dep); 1947 xfs_dir2_data_log_entry(tp, dbp, dep);
1726 xfs_dir2_leaf_check(dp, lbp); 1948 xfs_dir3_leaf_check(dp->i_mount, lbp);
1727 xfs_trans_brelse(tp, lbp); 1949 xfs_trans_brelse(tp, lbp);
1728 return 0; 1950 return 0;
1729} 1951}
@@ -1745,17 +1967,22 @@ xfs_dir2_leaf_search_hash(
1745 xfs_dir2_leaf_t *leaf; /* leaf structure */ 1967 xfs_dir2_leaf_t *leaf; /* leaf structure */
1746 xfs_dir2_leaf_entry_t *lep; /* leaf entry */ 1968 xfs_dir2_leaf_entry_t *lep; /* leaf entry */
1747 int mid=0; /* current leaf index */ 1969 int mid=0; /* current leaf index */
1970 struct xfs_dir2_leaf_entry *ents;
1971 struct xfs_dir3_icleaf_hdr leafhdr;
1748 1972
1749 leaf = lbp->b_addr; 1973 leaf = lbp->b_addr;
1974 ents = xfs_dir3_leaf_ents_p(leaf);
1975 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
1976
1750#ifndef __KERNEL__ 1977#ifndef __KERNEL__
1751 if (!leaf->hdr.count) 1978 if (!leafhdr.count)
1752 return 0; 1979 return 0;
1753#endif 1980#endif
1754 /* 1981 /*
1755 * Note, the table cannot be empty, so we have to go through the loop. 1982 * Note, the table cannot be empty, so we have to go through the loop.
1756 * Binary search the leaf entries looking for our hash value. 1983 * Binary search the leaf entries looking for our hash value.
1757 */ 1984 */
1758 for (lep = leaf->ents, low = 0, high = be16_to_cpu(leaf->hdr.count) - 1, 1985 for (lep = ents, low = 0, high = leafhdr.count - 1,
1759 hashwant = args->hashval; 1986 hashwant = args->hashval;
1760 low <= high; ) { 1987 low <= high; ) {
1761 mid = (low + high) >> 1; 1988 mid = (low + high) >> 1;
@@ -1807,7 +2034,7 @@ xfs_dir2_leaf_trim_data(
1807 /* 2034 /*
1808 * Read the offending data block. We need its buffer. 2035 * Read the offending data block. We need its buffer.
1809 */ 2036 */
1810 error = xfs_dir2_data_read(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp); 2037 error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp);
1811 if (error) 2038 if (error)
1812 return error; 2039 return error;
1813 2040
@@ -1817,10 +2044,12 @@ xfs_dir2_leaf_trim_data(
1817#ifdef DEBUG 2044#ifdef DEBUG
1818{ 2045{
1819 struct xfs_dir2_data_hdr *hdr = dbp->b_addr; 2046 struct xfs_dir2_data_hdr *hdr = dbp->b_addr;
2047 struct xfs_dir2_data_free *bf = xfs_dir3_data_bestfree_p(hdr);
1820 2048
1821 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)); 2049 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
1822 ASSERT(be16_to_cpu(hdr->bestfree[0].length) == 2050 hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC));
1823 mp->m_dirblksize - (uint)sizeof(*hdr)); 2051 ASSERT(be16_to_cpu(bf[0].length) ==
2052 mp->m_dirblksize - xfs_dir3_data_entry_offset(hdr));
1824 ASSERT(db == be32_to_cpu(ltp->bestcount) - 1); 2053 ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
1825} 2054}
1826#endif 2055#endif
@@ -1839,23 +2068,29 @@ xfs_dir2_leaf_trim_data(
1839 bestsp = xfs_dir2_leaf_bests_p(ltp); 2068 bestsp = xfs_dir2_leaf_bests_p(ltp);
1840 be32_add_cpu(&ltp->bestcount, -1); 2069 be32_add_cpu(&ltp->bestcount, -1);
1841 memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp)); 2070 memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp));
1842 xfs_dir2_leaf_log_tail(tp, lbp); 2071 xfs_dir3_leaf_log_tail(tp, lbp);
1843 xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); 2072 xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
1844 return 0; 2073 return 0;
1845} 2074}
1846 2075
1847static inline size_t 2076static inline size_t
1848xfs_dir2_leaf_size( 2077xfs_dir3_leaf_size(
1849 struct xfs_dir2_leaf_hdr *hdr, 2078 struct xfs_dir3_icleaf_hdr *hdr,
1850 int counts) 2079 int counts)
1851{ 2080{
1852 int entries; 2081 int entries;
2082 int hdrsize;
2083
2084 entries = hdr->count - hdr->stale;
2085 if (hdr->magic == XFS_DIR2_LEAF1_MAGIC ||
2086 hdr->magic == XFS_DIR2_LEAFN_MAGIC)
2087 hdrsize = sizeof(struct xfs_dir2_leaf_hdr);
2088 else
2089 hdrsize = sizeof(struct xfs_dir3_leaf_hdr);
1853 2090
1854 entries = be16_to_cpu(hdr->count) - be16_to_cpu(hdr->stale); 2091 return hdrsize + entries * sizeof(xfs_dir2_leaf_entry_t)
1855 return sizeof(xfs_dir2_leaf_hdr_t) + 2092 + counts * sizeof(xfs_dir2_data_off_t)
1856 entries * sizeof(xfs_dir2_leaf_entry_t) + 2093 + sizeof(xfs_dir2_leaf_tail_t);
1857 counts * sizeof(xfs_dir2_data_off_t) +
1858 sizeof(xfs_dir2_leaf_tail_t);
1859} 2094}
1860 2095
1861/* 2096/*
@@ -1879,6 +2114,8 @@ xfs_dir2_node_to_leaf(
1879 xfs_mount_t *mp; /* filesystem mount point */ 2114 xfs_mount_t *mp; /* filesystem mount point */
1880 int rval; /* successful free trim? */ 2115 int rval; /* successful free trim? */
1881 xfs_trans_t *tp; /* transaction pointer */ 2116 xfs_trans_t *tp; /* transaction pointer */
2117 struct xfs_dir3_icleaf_hdr leafhdr;
2118 struct xfs_dir3_icfree_hdr freehdr;
1882 2119
1883 /* 2120 /*
1884 * There's more than a leaf level in the btree, so there must 2121 * There's more than a leaf level in the btree, so there must
@@ -1928,7 +2165,11 @@ xfs_dir2_node_to_leaf(
1928 return 0; 2165 return 0;
1929 lbp = state->path.blk[0].bp; 2166 lbp = state->path.blk[0].bp;
1930 leaf = lbp->b_addr; 2167 leaf = lbp->b_addr;
1931 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 2168 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
2169
2170 ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
2171 leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
2172
1932 /* 2173 /*
1933 * Read the freespace block. 2174 * Read the freespace block.
1934 */ 2175 */
@@ -1936,44 +2177,49 @@ xfs_dir2_node_to_leaf(
1936 if (error) 2177 if (error)
1937 return error; 2178 return error;
1938 free = fbp->b_addr; 2179 free = fbp->b_addr;
1939 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); 2180 xfs_dir3_free_hdr_from_disk(&freehdr, free);
1940 ASSERT(!free->hdr.firstdb); 2181
2182 ASSERT(!freehdr.firstdb);
1941 2183
1942 /* 2184 /*
1943 * Now see if the leafn and free data will fit in a leaf1. 2185 * Now see if the leafn and free data will fit in a leaf1.
1944 * If not, release the buffer and give up. 2186 * If not, release the buffer and give up.
1945 */ 2187 */
1946 if (xfs_dir2_leaf_size(&leaf->hdr, be32_to_cpu(free->hdr.nvalid)) > 2188 if (xfs_dir3_leaf_size(&leafhdr, freehdr.nvalid) > mp->m_dirblksize) {
1947 mp->m_dirblksize) {
1948 xfs_trans_brelse(tp, fbp); 2189 xfs_trans_brelse(tp, fbp);
1949 return 0; 2190 return 0;
1950 } 2191 }
1951 2192
1952 /* 2193 /*
1953 * If the leaf has any stale entries in it, compress them out. 2194 * If the leaf has any stale entries in it, compress them out.
1954 * The compact routine will log the header.
1955 */ 2195 */
1956 if (be16_to_cpu(leaf->hdr.stale)) 2196 if (leafhdr.stale)
1957 xfs_dir2_leaf_compact(args, lbp); 2197 xfs_dir3_leaf_compact(args, &leafhdr, lbp);
1958 else
1959 xfs_dir2_leaf_log_header(tp, lbp);
1960 2198
1961 lbp->b_ops = &xfs_dir2_leaf1_buf_ops; 2199 lbp->b_ops = &xfs_dir3_leaf1_buf_ops;
1962 leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAF1_MAGIC); 2200 xfs_trans_buf_set_type(tp, lbp, XFS_BLFT_DIR_LEAF1_BUF);
2201 leafhdr.magic = (leafhdr.magic == XFS_DIR2_LEAFN_MAGIC)
2202 ? XFS_DIR2_LEAF1_MAGIC
2203 : XFS_DIR3_LEAF1_MAGIC;
1963 2204
1964 /* 2205 /*
1965 * Set up the leaf tail from the freespace block. 2206 * Set up the leaf tail from the freespace block.
1966 */ 2207 */
1967 ltp = xfs_dir2_leaf_tail_p(mp, leaf); 2208 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1968 ltp->bestcount = free->hdr.nvalid; 2209 ltp->bestcount = cpu_to_be32(freehdr.nvalid);
2210
1969 /* 2211 /*
1970 * Set up the leaf bests table. 2212 * Set up the leaf bests table.
1971 */ 2213 */
1972 memcpy(xfs_dir2_leaf_bests_p(ltp), free->bests, 2214 memcpy(xfs_dir2_leaf_bests_p(ltp), xfs_dir3_free_bests_p(mp, free),
1973 be32_to_cpu(ltp->bestcount) * sizeof(xfs_dir2_data_off_t)); 2215 freehdr.nvalid * sizeof(xfs_dir2_data_off_t));
1974 xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); 2216
1975 xfs_dir2_leaf_log_tail(tp, lbp); 2217 xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr);
1976 xfs_dir2_leaf_check(dp, lbp); 2218 xfs_dir3_leaf_log_header(tp, lbp);
2219 xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
2220 xfs_dir3_leaf_log_tail(tp, lbp);
2221 xfs_dir3_leaf_check(mp, lbp);
2222
1977 /* 2223 /*
1978 * Get rid of the freespace block. 2224 * Get rid of the freespace block.
1979 */ 2225 */
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 5980f9b7fa9b..ecc6c661064c 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * Copyright (c) 2013 Red Hat, Inc.
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -32,20 +33,14 @@
32#include "xfs_dir2_priv.h" 33#include "xfs_dir2_priv.h"
33#include "xfs_error.h" 34#include "xfs_error.h"
34#include "xfs_trace.h" 35#include "xfs_trace.h"
36#include "xfs_buf_item.h"
37#include "xfs_cksum.h"
35 38
36/* 39/*
37 * Function declarations. 40 * Function declarations.
38 */ 41 */
39static int xfs_dir2_leafn_add(struct xfs_buf *bp, xfs_da_args_t *args, 42static int xfs_dir2_leafn_add(struct xfs_buf *bp, xfs_da_args_t *args,
40 int index); 43 int index);
41#ifdef DEBUG
42static void xfs_dir2_leafn_check(struct xfs_inode *dp, struct xfs_buf *bp);
43#else
44#define xfs_dir2_leafn_check(dp, bp)
45#endif
46static void xfs_dir2_leafn_moveents(xfs_da_args_t *args, struct xfs_buf *bp_s,
47 int start_s, struct xfs_buf *bp_d,
48 int start_d, int count);
49static void xfs_dir2_leafn_rebalance(xfs_da_state_t *state, 44static void xfs_dir2_leafn_rebalance(xfs_da_state_t *state,
50 xfs_da_state_blk_t *blk1, 45 xfs_da_state_blk_t *blk1,
51 xfs_da_state_blk_t *blk2); 46 xfs_da_state_blk_t *blk2);
@@ -55,52 +50,126 @@ static int xfs_dir2_leafn_remove(xfs_da_args_t *args, struct xfs_buf *bp,
55static int xfs_dir2_node_addname_int(xfs_da_args_t *args, 50static int xfs_dir2_node_addname_int(xfs_da_args_t *args,
56 xfs_da_state_blk_t *fblk); 51 xfs_da_state_blk_t *fblk);
57 52
58static void 53/*
59xfs_dir2_free_verify( 54 * Check internal consistency of a leafn block.
55 */
56#ifdef DEBUG
57#define xfs_dir3_leaf_check(mp, bp) \
58do { \
59 if (!xfs_dir3_leafn_check((mp), (bp))) \
60 ASSERT(0); \
61} while (0);
62
63static bool
64xfs_dir3_leafn_check(
65 struct xfs_mount *mp,
66 struct xfs_buf *bp)
67{
68 struct xfs_dir2_leaf *leaf = bp->b_addr;
69 struct xfs_dir3_icleaf_hdr leafhdr;
70
71 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
72
73 if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC) {
74 struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
75 if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
76 return false;
77 } else if (leafhdr.magic != XFS_DIR2_LEAFN_MAGIC)
78 return false;
79
80 return xfs_dir3_leaf_check_int(mp, &leafhdr, leaf);
81}
82#else
83#define xfs_dir3_leaf_check(mp, bp)
84#endif
85
86static bool
87xfs_dir3_free_verify(
60 struct xfs_buf *bp) 88 struct xfs_buf *bp)
61{ 89{
62 struct xfs_mount *mp = bp->b_target->bt_mount; 90 struct xfs_mount *mp = bp->b_target->bt_mount;
63 struct xfs_dir2_free_hdr *hdr = bp->b_addr; 91 struct xfs_dir2_free_hdr *hdr = bp->b_addr;
64 int block_ok = 0;
65 92
66 block_ok = hdr->magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC); 93 if (xfs_sb_version_hascrc(&mp->m_sb)) {
67 if (!block_ok) { 94 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
68 XFS_CORRUPTION_ERROR("xfs_dir2_free_verify magic", 95
69 XFS_ERRLEVEL_LOW, mp, hdr); 96 if (hdr3->magic != cpu_to_be32(XFS_DIR3_FREE_MAGIC))
70 xfs_buf_ioerror(bp, EFSCORRUPTED); 97 return false;
98 if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid))
99 return false;
100 if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
101 return false;
102 } else {
103 if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC))
104 return false;
71 } 105 }
106
107 /* XXX: should bounds check the xfs_dir3_icfree_hdr here */
108
109 return true;
72} 110}
73 111
74static void 112static void
75xfs_dir2_free_read_verify( 113xfs_dir3_free_read_verify(
76 struct xfs_buf *bp) 114 struct xfs_buf *bp)
77{ 115{
78 xfs_dir2_free_verify(bp); 116 struct xfs_mount *mp = bp->b_target->bt_mount;
117
118 if ((xfs_sb_version_hascrc(&mp->m_sb) &&
119 !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
120 XFS_DIR3_FREE_CRC_OFF)) ||
121 !xfs_dir3_free_verify(bp)) {
122 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
123 xfs_buf_ioerror(bp, EFSCORRUPTED);
124 }
79} 125}
80 126
81static void 127static void
82xfs_dir2_free_write_verify( 128xfs_dir3_free_write_verify(
83 struct xfs_buf *bp) 129 struct xfs_buf *bp)
84{ 130{
85 xfs_dir2_free_verify(bp); 131 struct xfs_mount *mp = bp->b_target->bt_mount;
132 struct xfs_buf_log_item *bip = bp->b_fspriv;
133 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
134
135 if (!xfs_dir3_free_verify(bp)) {
136 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
137 xfs_buf_ioerror(bp, EFSCORRUPTED);
138 return;
139 }
140
141 if (!xfs_sb_version_hascrc(&mp->m_sb))
142 return;
143
144 if (bip)
145 hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
146
147 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_FREE_CRC_OFF);
86} 148}
87 149
88static const struct xfs_buf_ops xfs_dir2_free_buf_ops = { 150const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
89 .verify_read = xfs_dir2_free_read_verify, 151 .verify_read = xfs_dir3_free_read_verify,
90 .verify_write = xfs_dir2_free_write_verify, 152 .verify_write = xfs_dir3_free_write_verify,
91}; 153};
92 154
93 155
94static int 156static int
95__xfs_dir2_free_read( 157__xfs_dir3_free_read(
96 struct xfs_trans *tp, 158 struct xfs_trans *tp,
97 struct xfs_inode *dp, 159 struct xfs_inode *dp,
98 xfs_dablk_t fbno, 160 xfs_dablk_t fbno,
99 xfs_daddr_t mappedbno, 161 xfs_daddr_t mappedbno,
100 struct xfs_buf **bpp) 162 struct xfs_buf **bpp)
101{ 163{
102 return xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, 164 int err;
103 XFS_DATA_FORK, &xfs_dir2_free_buf_ops); 165
166 err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
167 XFS_DATA_FORK, &xfs_dir3_free_buf_ops);
168
169 /* try read returns without an error or *bpp if it lands in a hole */
170 if (!err && tp && *bpp)
171 xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_FREE_BUF);
172 return err;
104} 173}
105 174
106int 175int
@@ -110,7 +179,7 @@ xfs_dir2_free_read(
110 xfs_dablk_t fbno, 179 xfs_dablk_t fbno,
111 struct xfs_buf **bpp) 180 struct xfs_buf **bpp)
112{ 181{
113 return __xfs_dir2_free_read(tp, dp, fbno, -1, bpp); 182 return __xfs_dir3_free_read(tp, dp, fbno, -1, bpp);
114} 183}
115 184
116static int 185static int
@@ -120,7 +189,95 @@ xfs_dir2_free_try_read(
120 xfs_dablk_t fbno, 189 xfs_dablk_t fbno,
121 struct xfs_buf **bpp) 190 struct xfs_buf **bpp)
122{ 191{
123 return __xfs_dir2_free_read(tp, dp, fbno, -2, bpp); 192 return __xfs_dir3_free_read(tp, dp, fbno, -2, bpp);
193}
194
195
196void
197xfs_dir3_free_hdr_from_disk(
198 struct xfs_dir3_icfree_hdr *to,
199 struct xfs_dir2_free *from)
200{
201 if (from->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)) {
202 to->magic = be32_to_cpu(from->hdr.magic);
203 to->firstdb = be32_to_cpu(from->hdr.firstdb);
204 to->nvalid = be32_to_cpu(from->hdr.nvalid);
205 to->nused = be32_to_cpu(from->hdr.nused);
206 } else {
207 struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)from;
208
209 to->magic = be32_to_cpu(hdr3->hdr.magic);
210 to->firstdb = be32_to_cpu(hdr3->firstdb);
211 to->nvalid = be32_to_cpu(hdr3->nvalid);
212 to->nused = be32_to_cpu(hdr3->nused);
213 }
214
215 ASSERT(to->magic == XFS_DIR2_FREE_MAGIC ||
216 to->magic == XFS_DIR3_FREE_MAGIC);
217}
218
219static void
220xfs_dir3_free_hdr_to_disk(
221 struct xfs_dir2_free *to,
222 struct xfs_dir3_icfree_hdr *from)
223{
224 ASSERT(from->magic == XFS_DIR2_FREE_MAGIC ||
225 from->magic == XFS_DIR3_FREE_MAGIC);
226
227 if (from->magic == XFS_DIR2_FREE_MAGIC) {
228 to->hdr.magic = cpu_to_be32(from->magic);
229 to->hdr.firstdb = cpu_to_be32(from->firstdb);
230 to->hdr.nvalid = cpu_to_be32(from->nvalid);
231 to->hdr.nused = cpu_to_be32(from->nused);
232 } else {
233 struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)to;
234
235 hdr3->hdr.magic = cpu_to_be32(from->magic);
236 hdr3->firstdb = cpu_to_be32(from->firstdb);
237 hdr3->nvalid = cpu_to_be32(from->nvalid);
238 hdr3->nused = cpu_to_be32(from->nused);
239 }
240}
241
242static int
243xfs_dir3_free_get_buf(
244 struct xfs_trans *tp,
245 struct xfs_inode *dp,
246 xfs_dir2_db_t fbno,
247 struct xfs_buf **bpp)
248{
249 struct xfs_mount *mp = dp->i_mount;
250 struct xfs_buf *bp;
251 int error;
252 struct xfs_dir3_icfree_hdr hdr;
253
254 error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, fbno),
255 -1, &bp, XFS_DATA_FORK);
256 if (error)
257 return error;
258
259 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_FREE_BUF);
260 bp->b_ops = &xfs_dir3_free_buf_ops;
261
262 /*
263 * Initialize the new block to be empty, and remember
264 * its first slot as our empty slot.
265 */
266 hdr.magic = XFS_DIR2_FREE_MAGIC;
267 hdr.firstdb = 0;
268 hdr.nused = 0;
269 hdr.nvalid = 0;
270 if (xfs_sb_version_hascrc(&mp->m_sb)) {
271 struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;
272
273 hdr.magic = XFS_DIR3_FREE_MAGIC;
274 hdr3->hdr.blkno = cpu_to_be64(bp->b_bn);
275 hdr3->hdr.owner = cpu_to_be64(dp->i_ino);
276 uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_uuid);
277 }
278 xfs_dir3_free_hdr_to_disk(bp->b_addr, &hdr);
279 *bpp = bp;
280 return 0;
124} 281}
125 282
126/* 283/*
@@ -134,13 +291,16 @@ xfs_dir2_free_log_bests(
134 int last) /* last entry to log */ 291 int last) /* last entry to log */
135{ 292{
136 xfs_dir2_free_t *free; /* freespace structure */ 293 xfs_dir2_free_t *free; /* freespace structure */
294 __be16 *bests;
137 295
138 free = bp->b_addr; 296 free = bp->b_addr;
139 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); 297 bests = xfs_dir3_free_bests_p(tp->t_mountp, free);
298 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||
299 free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));
140 xfs_trans_log_buf(tp, bp, 300 xfs_trans_log_buf(tp, bp,
141 (uint)((char *)&free->bests[first] - (char *)free), 301 (uint)((char *)&bests[first] - (char *)free),
142 (uint)((char *)&free->bests[last] - (char *)free + 302 (uint)((char *)&bests[last] - (char *)free +
143 sizeof(free->bests[0]) - 1)); 303 sizeof(bests[0]) - 1));
144} 304}
145 305
146/* 306/*
@@ -154,9 +314,9 @@ xfs_dir2_free_log_header(
154 xfs_dir2_free_t *free; /* freespace structure */ 314 xfs_dir2_free_t *free; /* freespace structure */
155 315
156 free = bp->b_addr; 316 free = bp->b_addr;
157 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); 317 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||
158 xfs_trans_log_buf(tp, bp, (uint)((char *)&free->hdr - (char *)free), 318 free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));
159 (uint)(sizeof(xfs_dir2_free_hdr_t) - 1)); 319 xfs_trans_log_buf(tp, bp, 0, xfs_dir3_free_hdr_size(tp->t_mountp) - 1);
160} 320}
161 321
162/* 322/*
@@ -183,6 +343,7 @@ xfs_dir2_leaf_to_node(
183 xfs_dir2_data_off_t off; /* freespace entry value */ 343 xfs_dir2_data_off_t off; /* freespace entry value */
184 __be16 *to; /* pointer to freespace entry */ 344 __be16 *to; /* pointer to freespace entry */
185 xfs_trans_t *tp; /* transaction pointer */ 345 xfs_trans_t *tp; /* transaction pointer */
346 struct xfs_dir3_icfree_hdr freehdr;
186 347
187 trace_xfs_dir2_leaf_to_node(args); 348 trace_xfs_dir2_leaf_to_node(args);
188 349
@@ -199,44 +360,53 @@ xfs_dir2_leaf_to_node(
199 /* 360 /*
200 * Get the buffer for the new freespace block. 361 * Get the buffer for the new freespace block.
201 */ 362 */
202 error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb), -1, &fbp, 363 error = xfs_dir3_free_get_buf(tp, dp, fdb, &fbp);
203 XFS_DATA_FORK);
204 if (error) 364 if (error)
205 return error; 365 return error;
206 fbp->b_ops = &xfs_dir2_free_buf_ops;
207 366
208 free = fbp->b_addr; 367 free = fbp->b_addr;
368 xfs_dir3_free_hdr_from_disk(&freehdr, free);
209 leaf = lbp->b_addr; 369 leaf = lbp->b_addr;
210 ltp = xfs_dir2_leaf_tail_p(mp, leaf); 370 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
211 /* 371 ASSERT(be32_to_cpu(ltp->bestcount) <=
212 * Initialize the freespace block header. 372 (uint)dp->i_d.di_size / mp->m_dirblksize);
213 */ 373
214 free->hdr.magic = cpu_to_be32(XFS_DIR2_FREE_MAGIC);
215 free->hdr.firstdb = 0;
216 ASSERT(be32_to_cpu(ltp->bestcount) <= (uint)dp->i_d.di_size / mp->m_dirblksize);
217 free->hdr.nvalid = ltp->bestcount;
218 /* 374 /*
219 * Copy freespace entries from the leaf block to the new block. 375 * Copy freespace entries from the leaf block to the new block.
220 * Count active entries. 376 * Count active entries.
221 */ 377 */
222 for (i = n = 0, from = xfs_dir2_leaf_bests_p(ltp), to = free->bests; 378 from = xfs_dir2_leaf_bests_p(ltp);
223 i < be32_to_cpu(ltp->bestcount); i++, from++, to++) { 379 to = xfs_dir3_free_bests_p(mp, free);
380 for (i = n = 0; i < be32_to_cpu(ltp->bestcount); i++, from++, to++) {
224 if ((off = be16_to_cpu(*from)) != NULLDATAOFF) 381 if ((off = be16_to_cpu(*from)) != NULLDATAOFF)
225 n++; 382 n++;
226 *to = cpu_to_be16(off); 383 *to = cpu_to_be16(off);
227 } 384 }
228 free->hdr.nused = cpu_to_be32(n);
229
230 lbp->b_ops = &xfs_dir2_leafn_buf_ops;
231 leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAFN_MAGIC);
232 385
233 /* 386 /*
234 * Log everything. 387 * Now initialize the freespace block header.
235 */ 388 */
236 xfs_dir2_leaf_log_header(tp, lbp); 389 freehdr.nused = n;
390 freehdr.nvalid = be32_to_cpu(ltp->bestcount);
391
392 xfs_dir3_free_hdr_to_disk(fbp->b_addr, &freehdr);
393 xfs_dir2_free_log_bests(tp, fbp, 0, freehdr.nvalid - 1);
237 xfs_dir2_free_log_header(tp, fbp); 394 xfs_dir2_free_log_header(tp, fbp);
238 xfs_dir2_free_log_bests(tp, fbp, 0, be32_to_cpu(free->hdr.nvalid) - 1); 395
239 xfs_dir2_leafn_check(dp, lbp); 396 /*
397 * Converting the leaf to a leafnode is just a matter of changing the
398 * magic number and the ops. Do the change directly to the buffer as
399 * it's less work (and less code) than decoding the header to host
400 * format and back again.
401 */
402 if (leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC))
403 leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAFN_MAGIC);
404 else
405 leaf->hdr.info.magic = cpu_to_be16(XFS_DIR3_LEAFN_MAGIC);
406 lbp->b_ops = &xfs_dir3_leafn_buf_ops;
407 xfs_trans_buf_set_type(tp, lbp, XFS_BLFT_DIR_LEAFN_BUF);
408 xfs_dir3_leaf_log_header(tp, lbp);
409 xfs_dir3_leaf_check(mp, lbp);
240 return 0; 410 return 0;
241} 411}
242 412
@@ -260,6 +430,8 @@ xfs_dir2_leafn_add(
260 int lowstale; /* previous stale entry */ 430 int lowstale; /* previous stale entry */
261 xfs_mount_t *mp; /* filesystem mount point */ 431 xfs_mount_t *mp; /* filesystem mount point */
262 xfs_trans_t *tp; /* transaction pointer */ 432 xfs_trans_t *tp; /* transaction pointer */
433 struct xfs_dir3_icleaf_hdr leafhdr;
434 struct xfs_dir2_leaf_entry *ents;
263 435
264 trace_xfs_dir2_leafn_add(args, index); 436 trace_xfs_dir2_leafn_add(args, index);
265 437
@@ -267,6 +439,8 @@ xfs_dir2_leafn_add(
267 mp = dp->i_mount; 439 mp = dp->i_mount;
268 tp = args->trans; 440 tp = args->trans;
269 leaf = bp->b_addr; 441 leaf = bp->b_addr;
442 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
443 ents = xfs_dir3_leaf_ents_p(leaf);
270 444
271 /* 445 /*
272 * Quick check just to make sure we are not going to index 446 * Quick check just to make sure we are not going to index
@@ -282,15 +456,15 @@ xfs_dir2_leafn_add(
282 * a compact. 456 * a compact.
283 */ 457 */
284 458
285 if (be16_to_cpu(leaf->hdr.count) == xfs_dir2_max_leaf_ents(mp)) { 459 if (leafhdr.count == xfs_dir3_max_leaf_ents(mp, leaf)) {
286 if (!leaf->hdr.stale) 460 if (!leafhdr.stale)
287 return XFS_ERROR(ENOSPC); 461 return XFS_ERROR(ENOSPC);
288 compact = be16_to_cpu(leaf->hdr.stale) > 1; 462 compact = leafhdr.stale > 1;
289 } else 463 } else
290 compact = 0; 464 compact = 0;
291 ASSERT(index == 0 || be32_to_cpu(leaf->ents[index - 1].hashval) <= args->hashval); 465 ASSERT(index == 0 || be32_to_cpu(ents[index - 1].hashval) <= args->hashval);
292 ASSERT(index == be16_to_cpu(leaf->hdr.count) || 466 ASSERT(index == leafhdr.count ||
293 be32_to_cpu(leaf->ents[index].hashval) >= args->hashval); 467 be32_to_cpu(ents[index].hashval) >= args->hashval);
294 468
295 if (args->op_flags & XFS_DA_OP_JUSTCHECK) 469 if (args->op_flags & XFS_DA_OP_JUSTCHECK)
296 return 0; 470 return 0;
@@ -299,61 +473,51 @@ xfs_dir2_leafn_add(
299 * Compact out all but one stale leaf entry. Leaves behind 473 * Compact out all but one stale leaf entry. Leaves behind
300 * the entry closest to index. 474 * the entry closest to index.
301 */ 475 */
302 if (compact) { 476 if (compact)
303 xfs_dir2_leaf_compact_x1(bp, &index, &lowstale, &highstale, 477 xfs_dir3_leaf_compact_x1(&leafhdr, ents, &index, &lowstale,
304 &lfloglow, &lfloghigh); 478 &highstale, &lfloglow, &lfloghigh);
305 } 479 else if (leafhdr.stale) {
306 /* 480 /*
307 * Set impossible logging indices for this case. 481 * Set impossible logging indices for this case.
308 */ 482 */
309 else if (leaf->hdr.stale) { 483 lfloglow = leafhdr.count;
310 lfloglow = be16_to_cpu(leaf->hdr.count);
311 lfloghigh = -1; 484 lfloghigh = -1;
312 } 485 }
313 486
314 /* 487 /*
315 * Insert the new entry, log everything. 488 * Insert the new entry, log everything.
316 */ 489 */
317 lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale, 490 lep = xfs_dir3_leaf_find_entry(&leafhdr, ents, index, compact, lowstale,
318 highstale, &lfloglow, &lfloghigh); 491 highstale, &lfloglow, &lfloghigh);
319 492
320 lep->hashval = cpu_to_be32(args->hashval); 493 lep->hashval = cpu_to_be32(args->hashval);
321 lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, 494 lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp,
322 args->blkno, args->index)); 495 args->blkno, args->index));
323 xfs_dir2_leaf_log_header(tp, bp); 496
324 xfs_dir2_leaf_log_ents(tp, bp, lfloglow, lfloghigh); 497 xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr);
325 xfs_dir2_leafn_check(dp, bp); 498 xfs_dir3_leaf_log_header(tp, bp);
499 xfs_dir3_leaf_log_ents(tp, bp, lfloglow, lfloghigh);
500 xfs_dir3_leaf_check(mp, bp);
326 return 0; 501 return 0;
327} 502}
328 503
329#ifdef DEBUG 504#ifdef DEBUG
330/* 505static void
331 * Check internal consistency of a leafn block. 506xfs_dir2_free_hdr_check(
332 */ 507 struct xfs_mount *mp,
333void 508 struct xfs_buf *bp,
334xfs_dir2_leafn_check( 509 xfs_dir2_db_t db)
335 struct xfs_inode *dp,
336 struct xfs_buf *bp)
337{ 510{
338 int i; /* leaf index */ 511 struct xfs_dir3_icfree_hdr hdr;
339 xfs_dir2_leaf_t *leaf; /* leaf structure */
340 xfs_mount_t *mp; /* filesystem mount point */
341 int stale; /* count of stale leaves */
342 512
343 leaf = bp->b_addr; 513 xfs_dir3_free_hdr_from_disk(&hdr, bp->b_addr);
344 mp = dp->i_mount; 514
345 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 515 ASSERT((hdr.firstdb % xfs_dir3_free_max_bests(mp)) == 0);
346 ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp)); 516 ASSERT(hdr.firstdb <= db);
347 for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) { 517 ASSERT(db < hdr.firstdb + hdr.nvalid);
348 if (i + 1 < be16_to_cpu(leaf->hdr.count)) {
349 ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
350 be32_to_cpu(leaf->ents[i + 1].hashval));
351 }
352 if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
353 stale++;
354 }
355 ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
356} 518}
519#else
520#define xfs_dir2_free_hdr_check(mp, dp, db)
357#endif /* DEBUG */ 521#endif /* DEBUG */
358 522
359/* 523/*
@@ -365,15 +529,22 @@ xfs_dir2_leafn_lasthash(
365 struct xfs_buf *bp, /* leaf buffer */ 529 struct xfs_buf *bp, /* leaf buffer */
366 int *count) /* count of entries in leaf */ 530 int *count) /* count of entries in leaf */
367{ 531{
368 xfs_dir2_leaf_t *leaf; /* leaf structure */ 532 struct xfs_dir2_leaf *leaf = bp->b_addr;
533 struct xfs_dir2_leaf_entry *ents;
534 struct xfs_dir3_icleaf_hdr leafhdr;
535
536 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
537
538 ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
539 leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
369 540
370 leaf = bp->b_addr;
371 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
372 if (count) 541 if (count)
373 *count = be16_to_cpu(leaf->hdr.count); 542 *count = leafhdr.count;
374 if (!leaf->hdr.count) 543 if (!leafhdr.count)
375 return 0; 544 return 0;
376 return be32_to_cpu(leaf->ents[be16_to_cpu(leaf->hdr.count) - 1].hashval); 545
546 ents = xfs_dir3_leaf_ents_p(leaf);
547 return be32_to_cpu(ents[leafhdr.count - 1].hashval);
377} 548}
378 549
379/* 550/*
@@ -402,16 +573,19 @@ xfs_dir2_leafn_lookup_for_addname(
402 xfs_dir2_db_t newdb; /* new data block number */ 573 xfs_dir2_db_t newdb; /* new data block number */
403 xfs_dir2_db_t newfdb; /* new free block number */ 574 xfs_dir2_db_t newfdb; /* new free block number */
404 xfs_trans_t *tp; /* transaction pointer */ 575 xfs_trans_t *tp; /* transaction pointer */
576 struct xfs_dir2_leaf_entry *ents;
577 struct xfs_dir3_icleaf_hdr leafhdr;
405 578
406 dp = args->dp; 579 dp = args->dp;
407 tp = args->trans; 580 tp = args->trans;
408 mp = dp->i_mount; 581 mp = dp->i_mount;
409 leaf = bp->b_addr; 582 leaf = bp->b_addr;
410 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 583 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
411#ifdef __KERNEL__ 584 ents = xfs_dir3_leaf_ents_p(leaf);
412 ASSERT(be16_to_cpu(leaf->hdr.count) > 0); 585
413#endif 586 xfs_dir3_leaf_check(mp, bp);
414 xfs_dir2_leafn_check(dp, bp); 587 ASSERT(leafhdr.count > 0);
588
415 /* 589 /*
416 * Look up the hash value in the leaf entries. 590 * Look up the hash value in the leaf entries.
417 */ 591 */
@@ -424,15 +598,16 @@ xfs_dir2_leafn_lookup_for_addname(
424 curbp = state->extrablk.bp; 598 curbp = state->extrablk.bp;
425 curfdb = state->extrablk.blkno; 599 curfdb = state->extrablk.blkno;
426 free = curbp->b_addr; 600 free = curbp->b_addr;
427 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); 601 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||
602 free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));
428 } 603 }
429 length = xfs_dir2_data_entsize(args->namelen); 604 length = xfs_dir2_data_entsize(args->namelen);
430 /* 605 /*
431 * Loop over leaf entries with the right hash value. 606 * Loop over leaf entries with the right hash value.
432 */ 607 */
433 for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) && 608 for (lep = &ents[index];
434 be32_to_cpu(lep->hashval) == args->hashval; 609 index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval;
435 lep++, index++) { 610 lep++, index++) {
436 /* 611 /*
437 * Skip stale leaf entries. 612 * Skip stale leaf entries.
438 */ 613 */
@@ -451,6 +626,8 @@ xfs_dir2_leafn_lookup_for_addname(
451 * in hand, take a look at it. 626 * in hand, take a look at it.
452 */ 627 */
453 if (newdb != curdb) { 628 if (newdb != curdb) {
629 __be16 *bests;
630
454 curdb = newdb; 631 curdb = newdb;
455 /* 632 /*
456 * Convert the data block to the free block 633 * Convert the data block to the free block
@@ -473,13 +650,8 @@ xfs_dir2_leafn_lookup_for_addname(
473 if (error) 650 if (error)
474 return error; 651 return error;
475 free = curbp->b_addr; 652 free = curbp->b_addr;
476 ASSERT(be32_to_cpu(free->hdr.magic) == 653
477 XFS_DIR2_FREE_MAGIC); 654 xfs_dir2_free_hdr_check(mp, curbp, curdb);
478 ASSERT((be32_to_cpu(free->hdr.firstdb) %
479 xfs_dir2_free_max_bests(mp)) == 0);
480 ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb);
481 ASSERT(curdb < be32_to_cpu(free->hdr.firstdb) +
482 be32_to_cpu(free->hdr.nvalid));
483 } 655 }
484 /* 656 /*
485 * Get the index for our entry. 657 * Get the index for our entry.
@@ -488,8 +660,8 @@ xfs_dir2_leafn_lookup_for_addname(
488 /* 660 /*
489 * If it has room, return it. 661 * If it has room, return it.
490 */ 662 */
491 if (unlikely(free->bests[fi] == 663 bests = xfs_dir3_free_bests_p(mp, free);
492 cpu_to_be16(NULLDATAOFF))) { 664 if (unlikely(bests[fi] == cpu_to_be16(NULLDATAOFF))) {
493 XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int", 665 XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
494 XFS_ERRLEVEL_LOW, mp); 666 XFS_ERRLEVEL_LOW, mp);
495 if (curfdb != newfdb) 667 if (curfdb != newfdb)
@@ -497,7 +669,7 @@ xfs_dir2_leafn_lookup_for_addname(
497 return XFS_ERROR(EFSCORRUPTED); 669 return XFS_ERROR(EFSCORRUPTED);
498 } 670 }
499 curfdb = newfdb; 671 curfdb = newfdb;
500 if (be16_to_cpu(free->bests[fi]) >= length) 672 if (be16_to_cpu(bests[fi]) >= length)
501 goto out; 673 goto out;
502 } 674 }
503 } 675 }
@@ -511,6 +683,12 @@ out:
511 state->extrablk.bp = curbp; 683 state->extrablk.bp = curbp;
512 state->extrablk.index = fi; 684 state->extrablk.index = fi;
513 state->extrablk.blkno = curfdb; 685 state->extrablk.blkno = curfdb;
686
687 /*
688 * Important: this magic number is not in the buffer - it's for
689 * buffer type information and therefore only the free/data type
690 * matters here, not whether CRCs are enabled or not.
691 */
514 state->extrablk.magic = XFS_DIR2_FREE_MAGIC; 692 state->extrablk.magic = XFS_DIR2_FREE_MAGIC;
515 } else { 693 } else {
516 state->extravalid = 0; 694 state->extravalid = 0;
@@ -545,16 +723,19 @@ xfs_dir2_leafn_lookup_for_entry(
545 xfs_dir2_db_t newdb; /* new data block number */ 723 xfs_dir2_db_t newdb; /* new data block number */
546 xfs_trans_t *tp; /* transaction pointer */ 724 xfs_trans_t *tp; /* transaction pointer */
547 enum xfs_dacmp cmp; /* comparison result */ 725 enum xfs_dacmp cmp; /* comparison result */
726 struct xfs_dir2_leaf_entry *ents;
727 struct xfs_dir3_icleaf_hdr leafhdr;
548 728
549 dp = args->dp; 729 dp = args->dp;
550 tp = args->trans; 730 tp = args->trans;
551 mp = dp->i_mount; 731 mp = dp->i_mount;
552 leaf = bp->b_addr; 732 leaf = bp->b_addr;
553 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 733 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
554#ifdef __KERNEL__ 734 ents = xfs_dir3_leaf_ents_p(leaf);
555 ASSERT(be16_to_cpu(leaf->hdr.count) > 0); 735
556#endif 736 xfs_dir3_leaf_check(mp, bp);
557 xfs_dir2_leafn_check(dp, bp); 737 ASSERT(leafhdr.count > 0);
738
558 /* 739 /*
559 * Look up the hash value in the leaf entries. 740 * Look up the hash value in the leaf entries.
560 */ 741 */
@@ -569,9 +750,9 @@ xfs_dir2_leafn_lookup_for_entry(
569 /* 750 /*
570 * Loop over leaf entries with the right hash value. 751 * Loop over leaf entries with the right hash value.
571 */ 752 */
572 for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) && 753 for (lep = &ents[index];
573 be32_to_cpu(lep->hashval) == args->hashval; 754 index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval;
574 lep++, index++) { 755 lep++, index++) {
575 /* 756 /*
576 * Skip stale leaf entries. 757 * Skip stale leaf entries.
577 */ 758 */
@@ -604,13 +785,13 @@ xfs_dir2_leafn_lookup_for_entry(
604 ASSERT(state->extravalid); 785 ASSERT(state->extravalid);
605 curbp = state->extrablk.bp; 786 curbp = state->extrablk.bp;
606 } else { 787 } else {
607 error = xfs_dir2_data_read(tp, dp, 788 error = xfs_dir3_data_read(tp, dp,
608 xfs_dir2_db_to_da(mp, newdb), 789 xfs_dir2_db_to_da(mp, newdb),
609 -1, &curbp); 790 -1, &curbp);
610 if (error) 791 if (error)
611 return error; 792 return error;
612 } 793 }
613 xfs_dir2_data_check(dp, curbp); 794 xfs_dir3_data_check(dp, curbp);
614 curdb = newdb; 795 curdb = newdb;
615 } 796 }
616 /* 797 /*
@@ -638,13 +819,13 @@ xfs_dir2_leafn_lookup_for_entry(
638 state->extrablk.index = (int)((char *)dep - 819 state->extrablk.index = (int)((char *)dep -
639 (char *)curbp->b_addr); 820 (char *)curbp->b_addr);
640 state->extrablk.magic = XFS_DIR2_DATA_MAGIC; 821 state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
641 curbp->b_ops = &xfs_dir2_data_buf_ops; 822 curbp->b_ops = &xfs_dir3_data_buf_ops;
823 xfs_trans_buf_set_type(tp, curbp, XFS_BLFT_DIR_DATA_BUF);
642 if (cmp == XFS_CMP_EXACT) 824 if (cmp == XFS_CMP_EXACT)
643 return XFS_ERROR(EEXIST); 825 return XFS_ERROR(EEXIST);
644 } 826 }
645 } 827 }
646 ASSERT(index == be16_to_cpu(leaf->hdr.count) || 828 ASSERT(index == leafhdr.count || (args->op_flags & XFS_DA_OP_OKNOENT));
647 (args->op_flags & XFS_DA_OP_OKNOENT));
648 if (curbp) { 829 if (curbp) {
649 if (args->cmpresult == XFS_CMP_DIFFERENT) { 830 if (args->cmpresult == XFS_CMP_DIFFERENT) {
650 /* Giving back last used data block. */ 831 /* Giving back last used data block. */
@@ -653,7 +834,8 @@ xfs_dir2_leafn_lookup_for_entry(
653 state->extrablk.index = -1; 834 state->extrablk.index = -1;
654 state->extrablk.blkno = curdb; 835 state->extrablk.blkno = curdb;
655 state->extrablk.magic = XFS_DIR2_DATA_MAGIC; 836 state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
656 curbp->b_ops = &xfs_dir2_data_buf_ops; 837 curbp->b_ops = &xfs_dir3_data_buf_ops;
838 xfs_trans_buf_set_type(tp, curbp, XFS_BLFT_DIR_DATA_BUF);
657 } else { 839 } else {
658 /* If the curbp is not the CI match block, drop it */ 840 /* If the curbp is not the CI match block, drop it */
659 if (state->extrablk.bp != curbp) 841 if (state->extrablk.bp != curbp)
@@ -689,52 +871,50 @@ xfs_dir2_leafn_lookup_int(
689 * Log entries and headers. Stale entries are preserved. 871 * Log entries and headers. Stale entries are preserved.
690 */ 872 */
691static void 873static void
692xfs_dir2_leafn_moveents( 874xfs_dir3_leafn_moveents(
693 xfs_da_args_t *args, /* operation arguments */ 875 xfs_da_args_t *args, /* operation arguments */
694 struct xfs_buf *bp_s, /* source leaf buffer */ 876 struct xfs_buf *bp_s, /* source */
695 int start_s, /* source leaf index */ 877 struct xfs_dir3_icleaf_hdr *shdr,
696 struct xfs_buf *bp_d, /* destination leaf buffer */ 878 struct xfs_dir2_leaf_entry *sents,
697 int start_d, /* destination leaf index */ 879 int start_s,/* source leaf index */
698 int count) /* count of leaves to copy */ 880 struct xfs_buf *bp_d, /* destination */
881 struct xfs_dir3_icleaf_hdr *dhdr,
882 struct xfs_dir2_leaf_entry *dents,
883 int start_d,/* destination leaf index */
884 int count) /* count of leaves to copy */
699{ 885{
700 xfs_dir2_leaf_t *leaf_d; /* destination leaf structure */ 886 struct xfs_trans *tp = args->trans;
701 xfs_dir2_leaf_t *leaf_s; /* source leaf structure */ 887 int stale; /* count stale leaves copied */
702 int stale; /* count stale leaves copied */
703 xfs_trans_t *tp; /* transaction pointer */
704 888
705 trace_xfs_dir2_leafn_moveents(args, start_s, start_d, count); 889 trace_xfs_dir2_leafn_moveents(args, start_s, start_d, count);
706 890
707 /* 891 /*
708 * Silently return if nothing to do. 892 * Silently return if nothing to do.
709 */ 893 */
710 if (count == 0) { 894 if (count == 0)
711 return; 895 return;
712 } 896
713 tp = args->trans;
714 leaf_s = bp_s->b_addr;
715 leaf_d = bp_d->b_addr;
716 /* 897 /*
717 * If the destination index is not the end of the current 898 * If the destination index is not the end of the current
718 * destination leaf entries, open up a hole in the destination 899 * destination leaf entries, open up a hole in the destination
719 * to hold the new entries. 900 * to hold the new entries.
720 */ 901 */
721 if (start_d < be16_to_cpu(leaf_d->hdr.count)) { 902 if (start_d < dhdr->count) {
722 memmove(&leaf_d->ents[start_d + count], &leaf_d->ents[start_d], 903 memmove(&dents[start_d + count], &dents[start_d],
723 (be16_to_cpu(leaf_d->hdr.count) - start_d) * 904 (dhdr->count - start_d) * sizeof(xfs_dir2_leaf_entry_t));
724 sizeof(xfs_dir2_leaf_entry_t)); 905 xfs_dir3_leaf_log_ents(tp, bp_d, start_d + count,
725 xfs_dir2_leaf_log_ents(tp, bp_d, start_d + count, 906 count + dhdr->count - 1);
726 count + be16_to_cpu(leaf_d->hdr.count) - 1);
727 } 907 }
728 /* 908 /*
729 * If the source has stale leaves, count the ones in the copy range 909 * If the source has stale leaves, count the ones in the copy range
730 * so we can update the header correctly. 910 * so we can update the header correctly.
731 */ 911 */
732 if (leaf_s->hdr.stale) { 912 if (shdr->stale) {
733 int i; /* temp leaf index */ 913 int i; /* temp leaf index */
734 914
735 for (i = start_s, stale = 0; i < start_s + count; i++) { 915 for (i = start_s, stale = 0; i < start_s + count; i++) {
736 if (leaf_s->ents[i].address == 916 if (sents[i].address ==
737 cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) 917 cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
738 stale++; 918 stale++;
739 } 919 }
740 } else 920 } else
@@ -742,29 +922,27 @@ xfs_dir2_leafn_moveents(
742 /* 922 /*
743 * Copy the leaf entries from source to destination. 923 * Copy the leaf entries from source to destination.
744 */ 924 */
745 memcpy(&leaf_d->ents[start_d], &leaf_s->ents[start_s], 925 memcpy(&dents[start_d], &sents[start_s],
746 count * sizeof(xfs_dir2_leaf_entry_t)); 926 count * sizeof(xfs_dir2_leaf_entry_t));
747 xfs_dir2_leaf_log_ents(tp, bp_d, start_d, start_d + count - 1); 927 xfs_dir3_leaf_log_ents(tp, bp_d, start_d, start_d + count - 1);
928
748 /* 929 /*
749 * If there are source entries after the ones we copied, 930 * If there are source entries after the ones we copied,
750 * delete the ones we copied by sliding the next ones down. 931 * delete the ones we copied by sliding the next ones down.
751 */ 932 */
752 if (start_s + count < be16_to_cpu(leaf_s->hdr.count)) { 933 if (start_s + count < shdr->count) {
753 memmove(&leaf_s->ents[start_s], &leaf_s->ents[start_s + count], 934 memmove(&sents[start_s], &sents[start_s + count],
754 count * sizeof(xfs_dir2_leaf_entry_t)); 935 count * sizeof(xfs_dir2_leaf_entry_t));
755 xfs_dir2_leaf_log_ents(tp, bp_s, start_s, start_s + count - 1); 936 xfs_dir3_leaf_log_ents(tp, bp_s, start_s, start_s + count - 1);
756 } 937 }
938
757 /* 939 /*
758 * Update the headers and log them. 940 * Update the headers and log them.
759 */ 941 */
760 be16_add_cpu(&leaf_s->hdr.count, -(count)); 942 shdr->count -= count;
761 be16_add_cpu(&leaf_s->hdr.stale, -(stale)); 943 shdr->stale -= stale;
762 be16_add_cpu(&leaf_d->hdr.count, count); 944 dhdr->count += count;
763 be16_add_cpu(&leaf_d->hdr.stale, stale); 945 dhdr->stale += stale;
764 xfs_dir2_leaf_log_header(tp, bp_s);
765 xfs_dir2_leaf_log_header(tp, bp_d);
766 xfs_dir2_leafn_check(args->dp, bp_s);
767 xfs_dir2_leafn_check(args->dp, bp_d);
768} 946}
769 947
770/* 948/*
@@ -773,21 +951,25 @@ xfs_dir2_leafn_moveents(
773 */ 951 */
774int /* sort order */ 952int /* sort order */
775xfs_dir2_leafn_order( 953xfs_dir2_leafn_order(
776 struct xfs_buf *leaf1_bp, /* leaf1 buffer */ 954 struct xfs_buf *leaf1_bp, /* leaf1 buffer */
777 struct xfs_buf *leaf2_bp) /* leaf2 buffer */ 955 struct xfs_buf *leaf2_bp) /* leaf2 buffer */
778{ 956{
779 xfs_dir2_leaf_t *leaf1; /* leaf1 structure */ 957 struct xfs_dir2_leaf *leaf1 = leaf1_bp->b_addr;
780 xfs_dir2_leaf_t *leaf2; /* leaf2 structure */ 958 struct xfs_dir2_leaf *leaf2 = leaf2_bp->b_addr;
781 959 struct xfs_dir2_leaf_entry *ents1;
782 leaf1 = leaf1_bp->b_addr; 960 struct xfs_dir2_leaf_entry *ents2;
783 leaf2 = leaf2_bp->b_addr; 961 struct xfs_dir3_icleaf_hdr hdr1;
784 ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 962 struct xfs_dir3_icleaf_hdr hdr2;
785 ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 963
786 if (be16_to_cpu(leaf1->hdr.count) > 0 && 964 xfs_dir3_leaf_hdr_from_disk(&hdr1, leaf1);
787 be16_to_cpu(leaf2->hdr.count) > 0 && 965 xfs_dir3_leaf_hdr_from_disk(&hdr2, leaf2);
788 (be32_to_cpu(leaf2->ents[0].hashval) < be32_to_cpu(leaf1->ents[0].hashval) || 966 ents1 = xfs_dir3_leaf_ents_p(leaf1);
789 be32_to_cpu(leaf2->ents[be16_to_cpu(leaf2->hdr.count) - 1].hashval) < 967 ents2 = xfs_dir3_leaf_ents_p(leaf2);
790 be32_to_cpu(leaf1->ents[be16_to_cpu(leaf1->hdr.count) - 1].hashval))) 968
969 if (hdr1.count > 0 && hdr2.count > 0 &&
970 (be32_to_cpu(ents2[0].hashval) < be32_to_cpu(ents1[0].hashval) ||
971 be32_to_cpu(ents2[hdr2.count - 1].hashval) <
972 be32_to_cpu(ents1[hdr1.count - 1].hashval)))
791 return 1; 973 return 1;
792 return 0; 974 return 0;
793} 975}
@@ -816,6 +998,10 @@ xfs_dir2_leafn_rebalance(
816#endif 998#endif
817 int oldsum; /* old total leaf count */ 999 int oldsum; /* old total leaf count */
818 int swap; /* swapped leaf blocks */ 1000 int swap; /* swapped leaf blocks */
1001 struct xfs_dir2_leaf_entry *ents1;
1002 struct xfs_dir2_leaf_entry *ents2;
1003 struct xfs_dir3_icleaf_hdr hdr1;
1004 struct xfs_dir3_icleaf_hdr hdr2;
819 1005
820 args = state->args; 1006 args = state->args;
821 /* 1007 /*
@@ -830,11 +1016,17 @@ xfs_dir2_leafn_rebalance(
830 } 1016 }
831 leaf1 = blk1->bp->b_addr; 1017 leaf1 = blk1->bp->b_addr;
832 leaf2 = blk2->bp->b_addr; 1018 leaf2 = blk2->bp->b_addr;
833 oldsum = be16_to_cpu(leaf1->hdr.count) + be16_to_cpu(leaf2->hdr.count); 1019 xfs_dir3_leaf_hdr_from_disk(&hdr1, leaf1);
1020 xfs_dir3_leaf_hdr_from_disk(&hdr2, leaf2);
1021 ents1 = xfs_dir3_leaf_ents_p(leaf1);
1022 ents2 = xfs_dir3_leaf_ents_p(leaf2);
1023
1024 oldsum = hdr1.count + hdr2.count;
834#ifdef DEBUG 1025#ifdef DEBUG
835 oldstale = be16_to_cpu(leaf1->hdr.stale) + be16_to_cpu(leaf2->hdr.stale); 1026 oldstale = hdr1.stale + hdr2.stale;
836#endif 1027#endif
837 mid = oldsum >> 1; 1028 mid = oldsum >> 1;
1029
838 /* 1030 /*
839 * If the old leaf count was odd then the new one will be even, 1031 * If the old leaf count was odd then the new one will be even,
840 * so we need to divide the new count evenly. 1032 * so we need to divide the new count evenly.
@@ -842,10 +1034,10 @@ xfs_dir2_leafn_rebalance(
842 if (oldsum & 1) { 1034 if (oldsum & 1) {
843 xfs_dahash_t midhash; /* middle entry hash value */ 1035 xfs_dahash_t midhash; /* middle entry hash value */
844 1036
845 if (mid >= be16_to_cpu(leaf1->hdr.count)) 1037 if (mid >= hdr1.count)
846 midhash = be32_to_cpu(leaf2->ents[mid - be16_to_cpu(leaf1->hdr.count)].hashval); 1038 midhash = be32_to_cpu(ents2[mid - hdr1.count].hashval);
847 else 1039 else
848 midhash = be32_to_cpu(leaf1->ents[mid].hashval); 1040 midhash = be32_to_cpu(ents1[mid].hashval);
849 isleft = args->hashval <= midhash; 1041 isleft = args->hashval <= midhash;
850 } 1042 }
851 /* 1043 /*
@@ -859,30 +1051,42 @@ xfs_dir2_leafn_rebalance(
859 * Calculate moved entry count. Positive means left-to-right, 1051 * Calculate moved entry count. Positive means left-to-right,
860 * negative means right-to-left. Then move the entries. 1052 * negative means right-to-left. Then move the entries.
861 */ 1053 */
862 count = be16_to_cpu(leaf1->hdr.count) - mid + (isleft == 0); 1054 count = hdr1.count - mid + (isleft == 0);
863 if (count > 0) 1055 if (count > 0)
864 xfs_dir2_leafn_moveents(args, blk1->bp, 1056 xfs_dir3_leafn_moveents(args, blk1->bp, &hdr1, ents1,
865 be16_to_cpu(leaf1->hdr.count) - count, blk2->bp, 0, count); 1057 hdr1.count - count, blk2->bp,
1058 &hdr2, ents2, 0, count);
866 else if (count < 0) 1059 else if (count < 0)
867 xfs_dir2_leafn_moveents(args, blk2->bp, 0, blk1->bp, 1060 xfs_dir3_leafn_moveents(args, blk2->bp, &hdr2, ents2, 0,
868 be16_to_cpu(leaf1->hdr.count), count); 1061 blk1->bp, &hdr1, ents1,
869 ASSERT(be16_to_cpu(leaf1->hdr.count) + be16_to_cpu(leaf2->hdr.count) == oldsum); 1062 hdr1.count, count);
870 ASSERT(be16_to_cpu(leaf1->hdr.stale) + be16_to_cpu(leaf2->hdr.stale) == oldstale); 1063
1064 ASSERT(hdr1.count + hdr2.count == oldsum);
1065 ASSERT(hdr1.stale + hdr2.stale == oldstale);
1066
1067 /* log the changes made when moving the entries */
1068 xfs_dir3_leaf_hdr_to_disk(leaf1, &hdr1);
1069 xfs_dir3_leaf_hdr_to_disk(leaf2, &hdr2);
1070 xfs_dir3_leaf_log_header(args->trans, blk1->bp);
1071 xfs_dir3_leaf_log_header(args->trans, blk2->bp);
1072
1073 xfs_dir3_leaf_check(args->dp->i_mount, blk1->bp);
1074 xfs_dir3_leaf_check(args->dp->i_mount, blk2->bp);
1075
871 /* 1076 /*
872 * Mark whether we're inserting into the old or new leaf. 1077 * Mark whether we're inserting into the old or new leaf.
873 */ 1078 */
874 if (be16_to_cpu(leaf1->hdr.count) < be16_to_cpu(leaf2->hdr.count)) 1079 if (hdr1.count < hdr2.count)
875 state->inleaf = swap; 1080 state->inleaf = swap;
876 else if (be16_to_cpu(leaf1->hdr.count) > be16_to_cpu(leaf2->hdr.count)) 1081 else if (hdr1.count > hdr2.count)
877 state->inleaf = !swap; 1082 state->inleaf = !swap;
878 else 1083 else
879 state->inleaf = 1084 state->inleaf = swap ^ (blk1->index <= hdr1.count);
880 swap ^ (blk1->index <= be16_to_cpu(leaf1->hdr.count));
881 /* 1085 /*
882 * Adjust the expected index for insertion. 1086 * Adjust the expected index for insertion.
883 */ 1087 */
884 if (!state->inleaf) 1088 if (!state->inleaf)
885 blk2->index = blk1->index - be16_to_cpu(leaf1->hdr.count); 1089 blk2->index = blk1->index - hdr1.count;
886 1090
887 /* 1091 /*
888 * Finally sanity check just to make sure we are not returning a 1092 * Finally sanity check just to make sure we are not returning a
@@ -898,7 +1102,7 @@ xfs_dir2_leafn_rebalance(
898} 1102}
899 1103
900static int 1104static int
901xfs_dir2_data_block_free( 1105xfs_dir3_data_block_free(
902 xfs_da_args_t *args, 1106 xfs_da_args_t *args,
903 struct xfs_dir2_data_hdr *hdr, 1107 struct xfs_dir2_data_hdr *hdr,
904 struct xfs_dir2_free *free, 1108 struct xfs_dir2_free *free,
@@ -909,57 +1113,66 @@ xfs_dir2_data_block_free(
909{ 1113{
910 struct xfs_trans *tp = args->trans; 1114 struct xfs_trans *tp = args->trans;
911 int logfree = 0; 1115 int logfree = 0;
1116 __be16 *bests;
1117 struct xfs_dir3_icfree_hdr freehdr;
912 1118
913 if (!hdr) { 1119 xfs_dir3_free_hdr_from_disk(&freehdr, free);
914 /* One less used entry in the free table. */
915 be32_add_cpu(&free->hdr.nused, -1);
916 xfs_dir2_free_log_header(tp, fbp);
917 1120
1121 bests = xfs_dir3_free_bests_p(tp->t_mountp, free);
1122 if (hdr) {
918 /* 1123 /*
919 * If this was the last entry in the table, we can trim the 1124 * Data block is not empty, just set the free entry to the new
920 * table size back. There might be other entries at the end 1125 * value.
921 * referring to non-existent data blocks, get those too.
922 */ 1126 */
923 if (findex == be32_to_cpu(free->hdr.nvalid) - 1) { 1127 bests[findex] = cpu_to_be16(longest);
924 int i; /* free entry index */ 1128 xfs_dir2_free_log_bests(tp, fbp, findex, findex);
1129 return 0;
1130 }
925 1131
926 for (i = findex - 1; i >= 0; i--) { 1132 /* One less used entry in the free table. */
927 if (free->bests[i] != cpu_to_be16(NULLDATAOFF)) 1133 freehdr.nused--;
928 break;
929 }
930 free->hdr.nvalid = cpu_to_be32(i + 1);
931 logfree = 0;
932 } else {
933 /* Not the last entry, just punch it out. */
934 free->bests[findex] = cpu_to_be16(NULLDATAOFF);
935 logfree = 1;
936 }
937 /*
938 * If there are no useful entries left in the block,
939 * get rid of the block if we can.
940 */
941 if (!free->hdr.nused) {
942 int error;
943 1134
944 error = xfs_dir2_shrink_inode(args, fdb, fbp); 1135 /*
945 if (error == 0) { 1136 * If this was the last entry in the table, we can trim the table size
946 fbp = NULL; 1137 * back. There might be other entries at the end referring to
947 logfree = 0; 1138 * non-existent data blocks, get those too.
948 } else if (error != ENOSPC || args->total != 0) 1139 */
949 return error; 1140 if (findex == freehdr.nvalid - 1) {
950 /* 1141 int i; /* free entry index */
951 * It's possible to get ENOSPC if there is no 1142
952 * space reservation. In this case some one 1143 for (i = findex - 1; i >= 0; i--) {
953 * else will eventually get rid of this block. 1144 if (bests[i] != cpu_to_be16(NULLDATAOFF))
954 */ 1145 break;
955 } 1146 }
1147 freehdr.nvalid = i + 1;
1148 logfree = 0;
956 } else { 1149 } else {
1150 /* Not the last entry, just punch it out. */
1151 bests[findex] = cpu_to_be16(NULLDATAOFF);
1152 logfree = 1;
1153 }
1154
1155 xfs_dir3_free_hdr_to_disk(free, &freehdr);
1156 xfs_dir2_free_log_header(tp, fbp);
1157
1158 /*
1159 * If there are no useful entries left in the block, get rid of the
1160 * block if we can.
1161 */
1162 if (!freehdr.nused) {
1163 int error;
1164
1165 error = xfs_dir2_shrink_inode(args, fdb, fbp);
1166 if (error == 0) {
1167 fbp = NULL;
1168 logfree = 0;
1169 } else if (error != ENOSPC || args->total != 0)
1170 return error;
957 /* 1171 /*
958 * Data block is not empty, just set the free entry to the new 1172 * It's possible to get ENOSPC if there is no
959 * value. 1173 * space reservation. In this case some one
1174 * else will eventually get rid of this block.
960 */ 1175 */
961 free->bests[findex] = cpu_to_be16(longest);
962 logfree = 1;
963 } 1176 }
964 1177
965 /* Log the free entry that changed, unless we got rid of it. */ 1178 /* Log the free entry that changed, unless we got rid of it. */
@@ -994,6 +1207,9 @@ xfs_dir2_leafn_remove(
994 int needlog; /* need to log data header */ 1207 int needlog; /* need to log data header */
995 int needscan; /* need to rescan data frees */ 1208 int needscan; /* need to rescan data frees */
996 xfs_trans_t *tp; /* transaction pointer */ 1209 xfs_trans_t *tp; /* transaction pointer */
1210 struct xfs_dir2_data_free *bf; /* bestfree table */
1211 struct xfs_dir3_icleaf_hdr leafhdr;
1212 struct xfs_dir2_leaf_entry *ents;
997 1213
998 trace_xfs_dir2_leafn_remove(args, index); 1214 trace_xfs_dir2_leafn_remove(args, index);
999 1215
@@ -1001,11 +1217,14 @@ xfs_dir2_leafn_remove(
1001 tp = args->trans; 1217 tp = args->trans;
1002 mp = dp->i_mount; 1218 mp = dp->i_mount;
1003 leaf = bp->b_addr; 1219 leaf = bp->b_addr;
1004 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 1220 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
1221 ents = xfs_dir3_leaf_ents_p(leaf);
1222
1005 /* 1223 /*
1006 * Point to the entry we're removing. 1224 * Point to the entry we're removing.
1007 */ 1225 */
1008 lep = &leaf->ents[index]; 1226 lep = &ents[index];
1227
1009 /* 1228 /*
1010 * Extract the data block and offset from the entry. 1229 * Extract the data block and offset from the entry.
1011 */ 1230 */
@@ -1013,14 +1232,18 @@ xfs_dir2_leafn_remove(
1013 ASSERT(dblk->blkno == db); 1232 ASSERT(dblk->blkno == db);
1014 off = xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)); 1233 off = xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address));
1015 ASSERT(dblk->index == off); 1234 ASSERT(dblk->index == off);
1235
1016 /* 1236 /*
1017 * Kill the leaf entry by marking it stale. 1237 * Kill the leaf entry by marking it stale.
1018 * Log the leaf block changes. 1238 * Log the leaf block changes.
1019 */ 1239 */
1020 be16_add_cpu(&leaf->hdr.stale, 1); 1240 leafhdr.stale++;
1021 xfs_dir2_leaf_log_header(tp, bp); 1241 xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr);
1242 xfs_dir3_leaf_log_header(tp, bp);
1243
1022 lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); 1244 lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
1023 xfs_dir2_leaf_log_ents(tp, bp, index, index); 1245 xfs_dir3_leaf_log_ents(tp, bp, index, index);
1246
1024 /* 1247 /*
1025 * Make the data entry free. Keep track of the longest freespace 1248 * Make the data entry free. Keep track of the longest freespace
1026 * in the data block in case it changes. 1249 * in the data block in case it changes.
@@ -1028,7 +1251,8 @@ xfs_dir2_leafn_remove(
1028 dbp = dblk->bp; 1251 dbp = dblk->bp;
1029 hdr = dbp->b_addr; 1252 hdr = dbp->b_addr;
1030 dep = (xfs_dir2_data_entry_t *)((char *)hdr + off); 1253 dep = (xfs_dir2_data_entry_t *)((char *)hdr + off);
1031 longest = be16_to_cpu(hdr->bestfree[0].length); 1254 bf = xfs_dir3_data_bestfree_p(hdr);
1255 longest = be16_to_cpu(bf[0].length);
1032 needlog = needscan = 0; 1256 needlog = needscan = 0;
1033 xfs_dir2_data_make_free(tp, dbp, off, 1257 xfs_dir2_data_make_free(tp, dbp, off,
1034 xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan); 1258 xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
@@ -1040,12 +1264,12 @@ xfs_dir2_leafn_remove(
1040 xfs_dir2_data_freescan(mp, hdr, &needlog); 1264 xfs_dir2_data_freescan(mp, hdr, &needlog);
1041 if (needlog) 1265 if (needlog)
1042 xfs_dir2_data_log_header(tp, dbp); 1266 xfs_dir2_data_log_header(tp, dbp);
1043 xfs_dir2_data_check(dp, dbp); 1267 xfs_dir3_data_check(dp, dbp);
1044 /* 1268 /*
1045 * If the longest data block freespace changes, need to update 1269 * If the longest data block freespace changes, need to update
1046 * the corresponding freeblock entry. 1270 * the corresponding freeblock entry.
1047 */ 1271 */
1048 if (longest < be16_to_cpu(hdr->bestfree[0].length)) { 1272 if (longest < be16_to_cpu(bf[0].length)) {
1049 int error; /* error return value */ 1273 int error; /* error return value */
1050 struct xfs_buf *fbp; /* freeblock buffer */ 1274 struct xfs_buf *fbp; /* freeblock buffer */
1051 xfs_dir2_db_t fdb; /* freeblock block number */ 1275 xfs_dir2_db_t fdb; /* freeblock block number */
@@ -1062,20 +1286,25 @@ xfs_dir2_leafn_remove(
1062 if (error) 1286 if (error)
1063 return error; 1287 return error;
1064 free = fbp->b_addr; 1288 free = fbp->b_addr;
1065 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); 1289#ifdef DEBUG
1066 ASSERT(be32_to_cpu(free->hdr.firstdb) == 1290 {
1067 xfs_dir2_free_max_bests(mp) * 1291 struct xfs_dir3_icfree_hdr freehdr;
1068 (fdb - XFS_DIR2_FREE_FIRSTDB(mp))); 1292 xfs_dir3_free_hdr_from_disk(&freehdr, free);
1293 ASSERT(freehdr.firstdb == xfs_dir3_free_max_bests(mp) *
1294 (fdb - XFS_DIR2_FREE_FIRSTDB(mp)));
1295 }
1296#endif
1069 /* 1297 /*
1070 * Calculate which entry we need to fix. 1298 * Calculate which entry we need to fix.
1071 */ 1299 */
1072 findex = xfs_dir2_db_to_fdindex(mp, db); 1300 findex = xfs_dir2_db_to_fdindex(mp, db);
1073 longest = be16_to_cpu(hdr->bestfree[0].length); 1301 longest = be16_to_cpu(bf[0].length);
1074 /* 1302 /*
1075 * If the data block is now empty we can get rid of it 1303 * If the data block is now empty we can get rid of it
1076 * (usually). 1304 * (usually).
1077 */ 1305 */
1078 if (longest == mp->m_dirblksize - (uint)sizeof(*hdr)) { 1306 if (longest == mp->m_dirblksize -
1307 xfs_dir3_data_entry_offset(hdr)) {
1079 /* 1308 /*
1080 * Try to punch out the data block. 1309 * Try to punch out the data block.
1081 */ 1310 */
@@ -1096,21 +1325,19 @@ xfs_dir2_leafn_remove(
1096 * If we got rid of the data block, we can eliminate that entry 1325 * If we got rid of the data block, we can eliminate that entry
1097 * in the free block. 1326 * in the free block.
1098 */ 1327 */
1099 error = xfs_dir2_data_block_free(args, hdr, free, 1328 error = xfs_dir3_data_block_free(args, hdr, free,
1100 fdb, findex, fbp, longest); 1329 fdb, findex, fbp, longest);
1101 if (error) 1330 if (error)
1102 return error; 1331 return error;
1103 } 1332 }
1104 1333
1105 xfs_dir2_leafn_check(dp, bp); 1334 xfs_dir3_leaf_check(mp, bp);
1106 /* 1335 /*
1107 * Return indication of whether this leaf block is empty enough 1336 * Return indication of whether this leaf block is empty enough
1108 * to justify trying to join it with a neighbor. 1337 * to justify trying to join it with a neighbor.
1109 */ 1338 */
1110 *rval = 1339 *rval = (xfs_dir3_leaf_hdr_size(leaf) +
1111 ((uint)sizeof(leaf->hdr) + 1340 (uint)sizeof(ents[0]) * (leafhdr.count - leafhdr.stale)) <
1112 (uint)sizeof(leaf->ents[0]) *
1113 (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale))) <
1114 mp->m_dir_magicpct; 1341 mp->m_dir_magicpct;
1115 return 0; 1342 return 0;
1116} 1343}
@@ -1143,11 +1370,11 @@ xfs_dir2_leafn_split(
1143 /* 1370 /*
1144 * Initialize the new leaf block. 1371 * Initialize the new leaf block.
1145 */ 1372 */
1146 error = xfs_dir2_leaf_init(args, xfs_dir2_da_to_db(mp, blkno), 1373 error = xfs_dir3_leaf_get_buf(args, xfs_dir2_da_to_db(mp, blkno),
1147 &newblk->bp, XFS_DIR2_LEAFN_MAGIC); 1374 &newblk->bp, XFS_DIR2_LEAFN_MAGIC);
1148 if (error) { 1375 if (error)
1149 return error; 1376 return error;
1150 } 1377
1151 newblk->blkno = blkno; 1378 newblk->blkno = blkno;
1152 newblk->magic = XFS_DIR2_LEAFN_MAGIC; 1379 newblk->magic = XFS_DIR2_LEAFN_MAGIC;
1153 /* 1380 /*
@@ -1155,7 +1382,7 @@ xfs_dir2_leafn_split(
1155 * block into the leaves. 1382 * block into the leaves.
1156 */ 1383 */
1157 xfs_dir2_leafn_rebalance(state, oldblk, newblk); 1384 xfs_dir2_leafn_rebalance(state, oldblk, newblk);
1158 error = xfs_da_blk_link(state, oldblk, newblk); 1385 error = xfs_da3_blk_link(state, oldblk, newblk);
1159 if (error) { 1386 if (error) {
1160 return error; 1387 return error;
1161 } 1388 }
@@ -1171,8 +1398,8 @@ xfs_dir2_leafn_split(
1171 */ 1398 */
1172 oldblk->hashval = xfs_dir2_leafn_lasthash(oldblk->bp, NULL); 1399 oldblk->hashval = xfs_dir2_leafn_lasthash(oldblk->bp, NULL);
1173 newblk->hashval = xfs_dir2_leafn_lasthash(newblk->bp, NULL); 1400 newblk->hashval = xfs_dir2_leafn_lasthash(newblk->bp, NULL);
1174 xfs_dir2_leafn_check(args->dp, oldblk->bp); 1401 xfs_dir3_leaf_check(mp, oldblk->bp);
1175 xfs_dir2_leafn_check(args->dp, newblk->bp); 1402 xfs_dir3_leaf_check(mp, newblk->bp);
1176 return error; 1403 return error;
1177} 1404}
1178 1405
@@ -1198,9 +1425,10 @@ xfs_dir2_leafn_toosmall(
1198 int error; /* error return value */ 1425 int error; /* error return value */
1199 int forward; /* sibling block direction */ 1426 int forward; /* sibling block direction */
1200 int i; /* sibling counter */ 1427 int i; /* sibling counter */
1201 xfs_da_blkinfo_t *info; /* leaf block header */
1202 xfs_dir2_leaf_t *leaf; /* leaf structure */ 1428 xfs_dir2_leaf_t *leaf; /* leaf structure */
1203 int rval; /* result from path_shift */ 1429 int rval; /* result from path_shift */
1430 struct xfs_dir3_icleaf_hdr leafhdr;
1431 struct xfs_dir2_leaf_entry *ents;
1204 1432
1205 /* 1433 /*
1206 * Check for the degenerate case of the block being over 50% full. 1434 * Check for the degenerate case of the block being over 50% full.
@@ -1208,11 +1436,13 @@ xfs_dir2_leafn_toosmall(
1208 * to coalesce with a sibling. 1436 * to coalesce with a sibling.
1209 */ 1437 */
1210 blk = &state->path.blk[state->path.active - 1]; 1438 blk = &state->path.blk[state->path.active - 1];
1211 info = blk->bp->b_addr; 1439 leaf = blk->bp->b_addr;
1212 ASSERT(info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 1440 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
1213 leaf = (xfs_dir2_leaf_t *)info; 1441 ents = xfs_dir3_leaf_ents_p(leaf);
1214 count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale); 1442 xfs_dir3_leaf_check(state->args->dp->i_mount, blk->bp);
1215 bytes = (uint)sizeof(leaf->hdr) + count * (uint)sizeof(leaf->ents[0]); 1443
1444 count = leafhdr.count - leafhdr.stale;
1445 bytes = xfs_dir3_leaf_hdr_size(leaf) + count * sizeof(ents[0]);
1216 if (bytes > (state->blocksize >> 1)) { 1446 if (bytes > (state->blocksize >> 1)) {
1217 /* 1447 /*
1218 * Blk over 50%, don't try to join. 1448 * Blk over 50%, don't try to join.
@@ -1231,9 +1461,9 @@ xfs_dir2_leafn_toosmall(
1231 * Make altpath point to the block we want to keep and 1461 * Make altpath point to the block we want to keep and
1232 * path point to the block we want to drop (this one). 1462 * path point to the block we want to drop (this one).
1233 */ 1463 */
1234 forward = (info->forw != 0); 1464 forward = (leafhdr.forw != 0);
1235 memcpy(&state->altpath, &state->path, sizeof(state->path)); 1465 memcpy(&state->altpath, &state->path, sizeof(state->path));
1236 error = xfs_da_path_shift(state, &state->altpath, forward, 0, 1466 error = xfs_da3_path_shift(state, &state->altpath, forward, 0,
1237 &rval); 1467 &rval);
1238 if (error) 1468 if (error)
1239 return error; 1469 return error;
@@ -1247,15 +1477,17 @@ xfs_dir2_leafn_toosmall(
1247 * We prefer coalescing with the lower numbered sibling so as 1477 * We prefer coalescing with the lower numbered sibling so as
1248 * to shrink a directory over time. 1478 * to shrink a directory over time.
1249 */ 1479 */
1250 forward = be32_to_cpu(info->forw) < be32_to_cpu(info->back); 1480 forward = leafhdr.forw < leafhdr.back;
1251 for (i = 0, bp = NULL; i < 2; forward = !forward, i++) { 1481 for (i = 0, bp = NULL; i < 2; forward = !forward, i++) {
1252 blkno = forward ? be32_to_cpu(info->forw) : be32_to_cpu(info->back); 1482 struct xfs_dir3_icleaf_hdr hdr2;
1483
1484 blkno = forward ? leafhdr.forw : leafhdr.back;
1253 if (blkno == 0) 1485 if (blkno == 0)
1254 continue; 1486 continue;
1255 /* 1487 /*
1256 * Read the sibling leaf block. 1488 * Read the sibling leaf block.
1257 */ 1489 */
1258 error = xfs_dir2_leafn_read(state->args->trans, state->args->dp, 1490 error = xfs_dir3_leafn_read(state->args->trans, state->args->dp,
1259 blkno, -1, &bp); 1491 blkno, -1, &bp);
1260 if (error) 1492 if (error)
1261 return error; 1493 return error;
@@ -1263,13 +1495,15 @@ xfs_dir2_leafn_toosmall(
1263 /* 1495 /*
1264 * Count bytes in the two blocks combined. 1496 * Count bytes in the two blocks combined.
1265 */ 1497 */
1266 leaf = (xfs_dir2_leaf_t *)info; 1498 count = leafhdr.count - leafhdr.stale;
1267 count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
1268 bytes = state->blocksize - (state->blocksize >> 2); 1499 bytes = state->blocksize - (state->blocksize >> 2);
1500
1269 leaf = bp->b_addr; 1501 leaf = bp->b_addr;
1270 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 1502 xfs_dir3_leaf_hdr_from_disk(&hdr2, leaf);
1271 count += be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale); 1503 ents = xfs_dir3_leaf_ents_p(leaf);
1272 bytes -= count * (uint)sizeof(leaf->ents[0]); 1504 count += hdr2.count - hdr2.stale;
1505 bytes -= count * sizeof(ents[0]);
1506
1273 /* 1507 /*
1274 * Fits with at least 25% to spare. 1508 * Fits with at least 25% to spare.
1275 */ 1509 */
@@ -1291,10 +1525,10 @@ xfs_dir2_leafn_toosmall(
1291 */ 1525 */
1292 memcpy(&state->altpath, &state->path, sizeof(state->path)); 1526 memcpy(&state->altpath, &state->path, sizeof(state->path));
1293 if (blkno < blk->blkno) 1527 if (blkno < blk->blkno)
1294 error = xfs_da_path_shift(state, &state->altpath, forward, 0, 1528 error = xfs_da3_path_shift(state, &state->altpath, forward, 0,
1295 &rval); 1529 &rval);
1296 else 1530 else
1297 error = xfs_da_path_shift(state, &state->path, forward, 0, 1531 error = xfs_da3_path_shift(state, &state->path, forward, 0,
1298 &rval); 1532 &rval);
1299 if (error) { 1533 if (error) {
1300 return error; 1534 return error;
@@ -1316,34 +1550,53 @@ xfs_dir2_leafn_unbalance(
1316 xfs_da_args_t *args; /* operation arguments */ 1550 xfs_da_args_t *args; /* operation arguments */
1317 xfs_dir2_leaf_t *drop_leaf; /* dead leaf structure */ 1551 xfs_dir2_leaf_t *drop_leaf; /* dead leaf structure */
1318 xfs_dir2_leaf_t *save_leaf; /* surviving leaf structure */ 1552 xfs_dir2_leaf_t *save_leaf; /* surviving leaf structure */
1553 struct xfs_dir3_icleaf_hdr savehdr;
1554 struct xfs_dir3_icleaf_hdr drophdr;
1555 struct xfs_dir2_leaf_entry *sents;
1556 struct xfs_dir2_leaf_entry *dents;
1319 1557
1320 args = state->args; 1558 args = state->args;
1321 ASSERT(drop_blk->magic == XFS_DIR2_LEAFN_MAGIC); 1559 ASSERT(drop_blk->magic == XFS_DIR2_LEAFN_MAGIC);
1322 ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC); 1560 ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC);
1323 drop_leaf = drop_blk->bp->b_addr; 1561 drop_leaf = drop_blk->bp->b_addr;
1324 save_leaf = save_blk->bp->b_addr; 1562 save_leaf = save_blk->bp->b_addr;
1325 ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 1563
1326 ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 1564 xfs_dir3_leaf_hdr_from_disk(&savehdr, save_leaf);
1565 xfs_dir3_leaf_hdr_from_disk(&drophdr, drop_leaf);
1566 sents = xfs_dir3_leaf_ents_p(save_leaf);
1567 dents = xfs_dir3_leaf_ents_p(drop_leaf);
1568
1327 /* 1569 /*
1328 * If there are any stale leaf entries, take this opportunity 1570 * If there are any stale leaf entries, take this opportunity
1329 * to purge them. 1571 * to purge them.
1330 */ 1572 */
1331 if (drop_leaf->hdr.stale) 1573 if (drophdr.stale)
1332 xfs_dir2_leaf_compact(args, drop_blk->bp); 1574 xfs_dir3_leaf_compact(args, &drophdr, drop_blk->bp);
1333 if (save_leaf->hdr.stale) 1575 if (savehdr.stale)
1334 xfs_dir2_leaf_compact(args, save_blk->bp); 1576 xfs_dir3_leaf_compact(args, &savehdr, save_blk->bp);
1577
1335 /* 1578 /*
1336 * Move the entries from drop to the appropriate end of save. 1579 * Move the entries from drop to the appropriate end of save.
1337 */ 1580 */
1338 drop_blk->hashval = be32_to_cpu(drop_leaf->ents[be16_to_cpu(drop_leaf->hdr.count) - 1].hashval); 1581 drop_blk->hashval = be32_to_cpu(dents[drophdr.count - 1].hashval);
1339 if (xfs_dir2_leafn_order(save_blk->bp, drop_blk->bp)) 1582 if (xfs_dir2_leafn_order(save_blk->bp, drop_blk->bp))
1340 xfs_dir2_leafn_moveents(args, drop_blk->bp, 0, save_blk->bp, 0, 1583 xfs_dir3_leafn_moveents(args, drop_blk->bp, &drophdr, dents, 0,
1341 be16_to_cpu(drop_leaf->hdr.count)); 1584 save_blk->bp, &savehdr, sents, 0,
1585 drophdr.count);
1342 else 1586 else
1343 xfs_dir2_leafn_moveents(args, drop_blk->bp, 0, save_blk->bp, 1587 xfs_dir3_leafn_moveents(args, drop_blk->bp, &drophdr, dents, 0,
1344 be16_to_cpu(save_leaf->hdr.count), be16_to_cpu(drop_leaf->hdr.count)); 1588 save_blk->bp, &savehdr, sents,
1345 save_blk->hashval = be32_to_cpu(save_leaf->ents[be16_to_cpu(save_leaf->hdr.count) - 1].hashval); 1589 savehdr.count, drophdr.count);
1346 xfs_dir2_leafn_check(args->dp, save_blk->bp); 1590 save_blk->hashval = be32_to_cpu(sents[savehdr.count - 1].hashval);
1591
1592 /* log the changes made when moving the entries */
1593 xfs_dir3_leaf_hdr_to_disk(save_leaf, &savehdr);
1594 xfs_dir3_leaf_hdr_to_disk(drop_leaf, &drophdr);
1595 xfs_dir3_leaf_log_header(args->trans, save_blk->bp);
1596 xfs_dir3_leaf_log_header(args->trans, drop_blk->bp);
1597
1598 xfs_dir3_leaf_check(args->dp->i_mount, save_blk->bp);
1599 xfs_dir3_leaf_check(args->dp->i_mount, drop_blk->bp);
1347} 1600}
1348 1601
1349/* 1602/*
@@ -1372,7 +1625,7 @@ xfs_dir2_node_addname(
1372 * Look up the name. We're not supposed to find it, but 1625 * Look up the name. We're not supposed to find it, but
1373 * this gives us the insertion point. 1626 * this gives us the insertion point.
1374 */ 1627 */
1375 error = xfs_da_node_lookup_int(state, &rval); 1628 error = xfs_da3_node_lookup_int(state, &rval);
1376 if (error) 1629 if (error)
1377 rval = error; 1630 rval = error;
1378 if (rval != ENOENT) { 1631 if (rval != ENOENT) {
@@ -1398,7 +1651,7 @@ xfs_dir2_node_addname(
1398 * It worked, fix the hash values up the btree. 1651 * It worked, fix the hash values up the btree.
1399 */ 1652 */
1400 if (!(args->op_flags & XFS_DA_OP_JUSTCHECK)) 1653 if (!(args->op_flags & XFS_DA_OP_JUSTCHECK))
1401 xfs_da_fixhashpath(state, &state->path); 1654 xfs_da3_fixhashpath(state, &state->path);
1402 } else { 1655 } else {
1403 /* 1656 /*
1404 * It didn't work, we need to split the leaf block. 1657 * It didn't work, we need to split the leaf block.
@@ -1410,7 +1663,7 @@ xfs_dir2_node_addname(
1410 /* 1663 /*
1411 * Split the leaf block and insert the new entry. 1664 * Split the leaf block and insert the new entry.
1412 */ 1665 */
1413 rval = xfs_da_split(state); 1666 rval = xfs_da3_split(state);
1414 } 1667 }
1415done: 1668done:
1416 xfs_da_state_free(state); 1669 xfs_da_state_free(state);
@@ -1447,6 +1700,9 @@ xfs_dir2_node_addname_int(
1447 int needscan; /* need to rescan data frees */ 1700 int needscan; /* need to rescan data frees */
1448 __be16 *tagp; /* data entry tag pointer */ 1701 __be16 *tagp; /* data entry tag pointer */
1449 xfs_trans_t *tp; /* transaction pointer */ 1702 xfs_trans_t *tp; /* transaction pointer */
1703 __be16 *bests;
1704 struct xfs_dir3_icfree_hdr freehdr;
1705 struct xfs_dir2_data_free *bf;
1450 1706
1451 dp = args->dp; 1707 dp = args->dp;
1452 mp = dp->i_mount; 1708 mp = dp->i_mount;
@@ -1464,36 +1720,37 @@ xfs_dir2_node_addname_int(
1464 */ 1720 */
1465 ifbno = fblk->blkno; 1721 ifbno = fblk->blkno;
1466 free = fbp->b_addr; 1722 free = fbp->b_addr;
1467 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
1468 findex = fblk->index; 1723 findex = fblk->index;
1724 bests = xfs_dir3_free_bests_p(mp, free);
1725 xfs_dir3_free_hdr_from_disk(&freehdr, free);
1726
1469 /* 1727 /*
1470 * This means the free entry showed that the data block had 1728 * This means the free entry showed that the data block had
1471 * space for our entry, so we remembered it. 1729 * space for our entry, so we remembered it.
1472 * Use that data block. 1730 * Use that data block.
1473 */ 1731 */
1474 if (findex >= 0) { 1732 if (findex >= 0) {
1475 ASSERT(findex < be32_to_cpu(free->hdr.nvalid)); 1733 ASSERT(findex < freehdr.nvalid);
1476 ASSERT(be16_to_cpu(free->bests[findex]) != NULLDATAOFF); 1734 ASSERT(be16_to_cpu(bests[findex]) != NULLDATAOFF);
1477 ASSERT(be16_to_cpu(free->bests[findex]) >= length); 1735 ASSERT(be16_to_cpu(bests[findex]) >= length);
1478 dbno = be32_to_cpu(free->hdr.firstdb) + findex; 1736 dbno = freehdr.firstdb + findex;
1479 } 1737 } else {
1480 /* 1738 /*
1481 * The data block looked at didn't have enough room. 1739 * The data block looked at didn't have enough room.
1482 * We'll start at the beginning of the freespace entries. 1740 * We'll start at the beginning of the freespace entries.
1483 */ 1741 */
1484 else {
1485 dbno = -1; 1742 dbno = -1;
1486 findex = 0; 1743 findex = 0;
1487 } 1744 }
1488 } 1745 } else {
1489 /* 1746 /*
1490 * Didn't come in with a freespace block, so don't have a data block. 1747 * Didn't come in with a freespace block, so no data block.
1491 */ 1748 */
1492 else {
1493 ifbno = dbno = -1; 1749 ifbno = dbno = -1;
1494 fbp = NULL; 1750 fbp = NULL;
1495 findex = 0; 1751 findex = 0;
1496 } 1752 }
1753
1497 /* 1754 /*
1498 * If we don't have a data block yet, we're going to scan the 1755 * If we don't have a data block yet, we're going to scan the
1499 * freespace blocks looking for one. Figure out what the 1756 * freespace blocks looking for one. Figure out what the
@@ -1547,20 +1804,26 @@ xfs_dir2_node_addname_int(
1547 if (!fbp) 1804 if (!fbp)
1548 continue; 1805 continue;
1549 free = fbp->b_addr; 1806 free = fbp->b_addr;
1550 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
1551 findex = 0; 1807 findex = 0;
1552 } 1808 }
1553 /* 1809 /*
1554 * Look at the current free entry. Is it good enough? 1810 * Look at the current free entry. Is it good enough?
1811 *
1812 * The bests initialisation should be where the bufer is read in
1813 * the above branch. But gcc is too stupid to realise that bests
1814 * and the freehdr are actually initialised if they are placed
1815 * there, so we have to do it here to avoid warnings. Blech.
1555 */ 1816 */
1556 if (be16_to_cpu(free->bests[findex]) != NULLDATAOFF && 1817 bests = xfs_dir3_free_bests_p(mp, free);
1557 be16_to_cpu(free->bests[findex]) >= length) 1818 xfs_dir3_free_hdr_from_disk(&freehdr, free);
1558 dbno = be32_to_cpu(free->hdr.firstdb) + findex; 1819 if (be16_to_cpu(bests[findex]) != NULLDATAOFF &&
1820 be16_to_cpu(bests[findex]) >= length)
1821 dbno = freehdr.firstdb + findex;
1559 else { 1822 else {
1560 /* 1823 /*
1561 * Are we done with the freeblock? 1824 * Are we done with the freeblock?
1562 */ 1825 */
1563 if (++findex == be32_to_cpu(free->hdr.nvalid)) { 1826 if (++findex == freehdr.nvalid) {
1564 /* 1827 /*
1565 * Drop the block. 1828 * Drop the block.
1566 */ 1829 */
@@ -1588,7 +1851,7 @@ xfs_dir2_node_addname_int(
1588 if (unlikely((error = xfs_dir2_grow_inode(args, 1851 if (unlikely((error = xfs_dir2_grow_inode(args,
1589 XFS_DIR2_DATA_SPACE, 1852 XFS_DIR2_DATA_SPACE,
1590 &dbno)) || 1853 &dbno)) ||
1591 (error = xfs_dir2_data_init(args, dbno, &dbp)))) 1854 (error = xfs_dir3_data_init(args, dbno, &dbp))))
1592 return error; 1855 return error;
1593 1856
1594 /* 1857 /*
@@ -1614,11 +1877,11 @@ xfs_dir2_node_addname_int(
1614 * If there wasn't a freespace block, the read will 1877 * If there wasn't a freespace block, the read will
1615 * return a NULL fbp. Allocate and initialize a new one. 1878 * return a NULL fbp. Allocate and initialize a new one.
1616 */ 1879 */
1617 if( fbp == NULL ) { 1880 if (!fbp) {
1618 if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, 1881 error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE,
1619 &fbno))) { 1882 &fbno);
1883 if (error)
1620 return error; 1884 return error;
1621 }
1622 1885
1623 if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) { 1886 if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) {
1624 xfs_alert(mp, 1887 xfs_alert(mp,
@@ -1646,27 +1909,24 @@ xfs_dir2_node_addname_int(
1646 /* 1909 /*
1647 * Get a buffer for the new block. 1910 * Get a buffer for the new block.
1648 */ 1911 */
1649 error = xfs_da_get_buf(tp, dp, 1912 error = xfs_dir3_free_get_buf(tp, dp, fbno, &fbp);
1650 xfs_dir2_db_to_da(mp, fbno),
1651 -1, &fbp, XFS_DATA_FORK);
1652 if (error) 1913 if (error)
1653 return error; 1914 return error;
1654 fbp->b_ops = &xfs_dir2_free_buf_ops; 1915 free = fbp->b_addr;
1916 bests = xfs_dir3_free_bests_p(mp, free);
1917 xfs_dir3_free_hdr_from_disk(&freehdr, free);
1655 1918
1656 /* 1919 /*
1657 * Initialize the new block to be empty, and remember 1920 * Remember the first slot as our empty slot.
1658 * its first slot as our empty slot.
1659 */ 1921 */
1660 free = fbp->b_addr; 1922 freehdr.firstdb = (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) *
1661 free->hdr.magic = cpu_to_be32(XFS_DIR2_FREE_MAGIC); 1923 xfs_dir3_free_max_bests(mp);
1662 free->hdr.firstdb = cpu_to_be32(
1663 (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) *
1664 xfs_dir2_free_max_bests(mp));
1665 free->hdr.nvalid = 0; 1924 free->hdr.nvalid = 0;
1666 free->hdr.nused = 0; 1925 free->hdr.nused = 0;
1667 } else { 1926 } else {
1668 free = fbp->b_addr; 1927 free = fbp->b_addr;
1669 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); 1928 bests = xfs_dir3_free_bests_p(mp, free);
1929 xfs_dir3_free_hdr_from_disk(&freehdr, free);
1670 } 1930 }
1671 1931
1672 /* 1932 /*
@@ -1677,20 +1937,21 @@ xfs_dir2_node_addname_int(
1677 * If it's after the end of the current entries in the 1937 * If it's after the end of the current entries in the
1678 * freespace block, extend that table. 1938 * freespace block, extend that table.
1679 */ 1939 */
1680 if (findex >= be32_to_cpu(free->hdr.nvalid)) { 1940 if (findex >= freehdr.nvalid) {
1681 ASSERT(findex < xfs_dir2_free_max_bests(mp)); 1941 ASSERT(findex < xfs_dir3_free_max_bests(mp));
1682 free->hdr.nvalid = cpu_to_be32(findex + 1); 1942 freehdr.nvalid = findex + 1;
1683 /* 1943 /*
1684 * Tag new entry so nused will go up. 1944 * Tag new entry so nused will go up.
1685 */ 1945 */
1686 free->bests[findex] = cpu_to_be16(NULLDATAOFF); 1946 bests[findex] = cpu_to_be16(NULLDATAOFF);
1687 } 1947 }
1688 /* 1948 /*
1689 * If this entry was for an empty data block 1949 * If this entry was for an empty data block
1690 * (this should always be true) then update the header. 1950 * (this should always be true) then update the header.
1691 */ 1951 */
1692 if (free->bests[findex] == cpu_to_be16(NULLDATAOFF)) { 1952 if (bests[findex] == cpu_to_be16(NULLDATAOFF)) {
1693 be32_add_cpu(&free->hdr.nused, 1); 1953 freehdr.nused++;
1954 xfs_dir3_free_hdr_to_disk(fbp->b_addr, &freehdr);
1694 xfs_dir2_free_log_header(tp, fbp); 1955 xfs_dir2_free_log_header(tp, fbp);
1695 } 1956 }
1696 /* 1957 /*
@@ -1699,7 +1960,8 @@ xfs_dir2_node_addname_int(
1699 * change again. 1960 * change again.
1700 */ 1961 */
1701 hdr = dbp->b_addr; 1962 hdr = dbp->b_addr;
1702 free->bests[findex] = hdr->bestfree[0].length; 1963 bf = xfs_dir3_data_bestfree_p(hdr);
1964 bests[findex] = bf[0].length;
1703 logfree = 1; 1965 logfree = 1;
1704 } 1966 }
1705 /* 1967 /*
@@ -1715,19 +1977,20 @@ xfs_dir2_node_addname_int(
1715 /* 1977 /*
1716 * Read the data block in. 1978 * Read the data block in.
1717 */ 1979 */
1718 error = xfs_dir2_data_read(tp, dp, xfs_dir2_db_to_da(mp, dbno), 1980 error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(mp, dbno),
1719 -1, &dbp); 1981 -1, &dbp);
1720 if (error) 1982 if (error)
1721 return error; 1983 return error;
1722 hdr = dbp->b_addr; 1984 hdr = dbp->b_addr;
1985 bf = xfs_dir3_data_bestfree_p(hdr);
1723 logfree = 0; 1986 logfree = 0;
1724 } 1987 }
1725 ASSERT(be16_to_cpu(hdr->bestfree[0].length) >= length); 1988 ASSERT(be16_to_cpu(bf[0].length) >= length);
1726 /* 1989 /*
1727 * Point to the existing unused space. 1990 * Point to the existing unused space.
1728 */ 1991 */
1729 dup = (xfs_dir2_data_unused_t *) 1992 dup = (xfs_dir2_data_unused_t *)
1730 ((char *)hdr + be16_to_cpu(hdr->bestfree[0].offset)); 1993 ((char *)hdr + be16_to_cpu(bf[0].offset));
1731 needscan = needlog = 0; 1994 needscan = needlog = 0;
1732 /* 1995 /*
1733 * Mark the first part of the unused space, inuse for us. 1996 * Mark the first part of the unused space, inuse for us.
@@ -1758,8 +2021,9 @@ xfs_dir2_node_addname_int(
1758 /* 2021 /*
1759 * If the freespace entry is now wrong, update it. 2022 * If the freespace entry is now wrong, update it.
1760 */ 2023 */
1761 if (be16_to_cpu(free->bests[findex]) != be16_to_cpu(hdr->bestfree[0].length)) { 2024 bests = xfs_dir3_free_bests_p(mp, free); /* gcc is so stupid */
1762 free->bests[findex] = hdr->bestfree[0].length; 2025 if (be16_to_cpu(bests[findex]) != be16_to_cpu(bf[0].length)) {
2026 bests[findex] = bf[0].length;
1763 logfree = 1; 2027 logfree = 1;
1764 } 2028 }
1765 /* 2029 /*
@@ -1777,7 +2041,7 @@ xfs_dir2_node_addname_int(
1777 2041
1778/* 2042/*
1779 * Lookup an entry in a node-format directory. 2043 * Lookup an entry in a node-format directory.
1780 * All the real work happens in xfs_da_node_lookup_int. 2044 * All the real work happens in xfs_da3_node_lookup_int.
1781 * The only real output is the inode number of the entry. 2045 * The only real output is the inode number of the entry.
1782 */ 2046 */
1783int /* error */ 2047int /* error */
@@ -1802,7 +2066,7 @@ xfs_dir2_node_lookup(
1802 /* 2066 /*
1803 * Fill in the path to the entry in the cursor. 2067 * Fill in the path to the entry in the cursor.
1804 */ 2068 */
1805 error = xfs_da_node_lookup_int(state, &rval); 2069 error = xfs_da3_node_lookup_int(state, &rval);
1806 if (error) 2070 if (error)
1807 rval = error; 2071 rval = error;
1808 else if (rval == ENOENT && args->cmpresult == XFS_CMP_CASE) { 2072 else if (rval == ENOENT && args->cmpresult == XFS_CMP_CASE) {
@@ -1857,7 +2121,7 @@ xfs_dir2_node_removename(
1857 /* 2121 /*
1858 * Look up the entry we're deleting, set up the cursor. 2122 * Look up the entry we're deleting, set up the cursor.
1859 */ 2123 */
1860 error = xfs_da_node_lookup_int(state, &rval); 2124 error = xfs_da3_node_lookup_int(state, &rval);
1861 if (error) 2125 if (error)
1862 rval = error; 2126 rval = error;
1863 /* 2127 /*
@@ -1881,12 +2145,12 @@ xfs_dir2_node_removename(
1881 /* 2145 /*
1882 * Fix the hash values up the btree. 2146 * Fix the hash values up the btree.
1883 */ 2147 */
1884 xfs_da_fixhashpath(state, &state->path); 2148 xfs_da3_fixhashpath(state, &state->path);
1885 /* 2149 /*
1886 * If we need to join leaf blocks, do it. 2150 * If we need to join leaf blocks, do it.
1887 */ 2151 */
1888 if (rval && state->path.active > 1) 2152 if (rval && state->path.active > 1)
1889 error = xfs_da_join(state); 2153 error = xfs_da3_join(state);
1890 /* 2154 /*
1891 * If no errors so far, try conversion to leaf format. 2155 * If no errors so far, try conversion to leaf format.
1892 */ 2156 */
@@ -1928,7 +2192,7 @@ xfs_dir2_node_replace(
1928 /* 2192 /*
1929 * Lookup the entry to change in the btree. 2193 * Lookup the entry to change in the btree.
1930 */ 2194 */
1931 error = xfs_da_node_lookup_int(state, &rval); 2195 error = xfs_da3_node_lookup_int(state, &rval);
1932 if (error) { 2196 if (error) {
1933 rval = error; 2197 rval = error;
1934 } 2198 }
@@ -1937,19 +2201,22 @@ xfs_dir2_node_replace(
1937 * and locked it. But paranoia is good. 2201 * and locked it. But paranoia is good.
1938 */ 2202 */
1939 if (rval == EEXIST) { 2203 if (rval == EEXIST) {
2204 struct xfs_dir2_leaf_entry *ents;
1940 /* 2205 /*
1941 * Find the leaf entry. 2206 * Find the leaf entry.
1942 */ 2207 */
1943 blk = &state->path.blk[state->path.active - 1]; 2208 blk = &state->path.blk[state->path.active - 1];
1944 ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); 2209 ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
1945 leaf = blk->bp->b_addr; 2210 leaf = blk->bp->b_addr;
1946 lep = &leaf->ents[blk->index]; 2211 ents = xfs_dir3_leaf_ents_p(leaf);
2212 lep = &ents[blk->index];
1947 ASSERT(state->extravalid); 2213 ASSERT(state->extravalid);
1948 /* 2214 /*
1949 * Point to the data entry. 2215 * Point to the data entry.
1950 */ 2216 */
1951 hdr = state->extrablk.bp->b_addr; 2217 hdr = state->extrablk.bp->b_addr;
1952 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)); 2218 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
2219 hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC));
1953 dep = (xfs_dir2_data_entry_t *) 2220 dep = (xfs_dir2_data_entry_t *)
1954 ((char *)hdr + 2221 ((char *)hdr +
1955 xfs_dir2_dataptr_to_off(state->mp, be32_to_cpu(lep->address))); 2222 xfs_dir2_dataptr_to_off(state->mp, be32_to_cpu(lep->address)));
@@ -1995,6 +2262,7 @@ xfs_dir2_node_trim_free(
1995 xfs_dir2_free_t *free; /* freespace structure */ 2262 xfs_dir2_free_t *free; /* freespace structure */
1996 xfs_mount_t *mp; /* filesystem mount point */ 2263 xfs_mount_t *mp; /* filesystem mount point */
1997 xfs_trans_t *tp; /* transaction pointer */ 2264 xfs_trans_t *tp; /* transaction pointer */
2265 struct xfs_dir3_icfree_hdr freehdr;
1998 2266
1999 dp = args->dp; 2267 dp = args->dp;
2000 mp = dp->i_mount; 2268 mp = dp->i_mount;
@@ -2012,11 +2280,12 @@ xfs_dir2_node_trim_free(
2012 if (!bp) 2280 if (!bp)
2013 return 0; 2281 return 0;
2014 free = bp->b_addr; 2282 free = bp->b_addr;
2015 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); 2283 xfs_dir3_free_hdr_from_disk(&freehdr, free);
2284
2016 /* 2285 /*
2017 * If there are used entries, there's nothing to do. 2286 * If there are used entries, there's nothing to do.
2018 */ 2287 */
2019 if (be32_to_cpu(free->hdr.nused) > 0) { 2288 if (freehdr.nused > 0) {
2020 xfs_trans_brelse(tp, bp); 2289 xfs_trans_brelse(tp, bp);
2021 *rvalp = 0; 2290 *rvalp = 0;
2022 return 0; 2291 return 0;
diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/xfs_dir2_priv.h
index 7da79f6515fd..7cf573c88aad 100644
--- a/fs/xfs/xfs_dir2_priv.h
+++ b/fs/xfs/xfs_dir2_priv.h
@@ -30,7 +30,7 @@ extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
30 const unsigned char *name, int len); 30 const unsigned char *name, int len);
31 31
32/* xfs_dir2_block.c */ 32/* xfs_dir2_block.c */
33extern const struct xfs_buf_ops xfs_dir2_block_buf_ops; 33extern const struct xfs_buf_ops xfs_dir3_block_buf_ops;
34 34
35extern int xfs_dir2_block_addname(struct xfs_da_args *args); 35extern int xfs_dir2_block_addname(struct xfs_da_args *args);
36extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent, 36extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent,
@@ -43,17 +43,18 @@ extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
43 43
44/* xfs_dir2_data.c */ 44/* xfs_dir2_data.c */
45#ifdef DEBUG 45#ifdef DEBUG
46#define xfs_dir2_data_check(dp,bp) __xfs_dir2_data_check(dp, bp); 46#define xfs_dir3_data_check(dp,bp) __xfs_dir3_data_check(dp, bp);
47#else 47#else
48#define xfs_dir2_data_check(dp,bp) 48#define xfs_dir3_data_check(dp,bp)
49#endif 49#endif
50 50
51extern const struct xfs_buf_ops xfs_dir2_data_buf_ops; 51extern const struct xfs_buf_ops xfs_dir3_data_buf_ops;
52extern const struct xfs_buf_ops xfs_dir3_free_buf_ops;
52 53
53extern int __xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_buf *bp); 54extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp);
54extern int xfs_dir2_data_read(struct xfs_trans *tp, struct xfs_inode *dp, 55extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp,
55 xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp); 56 xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp);
56extern int xfs_dir2_data_readahead(struct xfs_trans *tp, struct xfs_inode *dp, 57extern int xfs_dir3_data_readahead(struct xfs_trans *tp, struct xfs_inode *dp,
57 xfs_dablk_t bno, xfs_daddr_t mapped_bno); 58 xfs_dablk_t bno, xfs_daddr_t mapped_bno);
58 59
59extern struct xfs_dir2_data_free * 60extern struct xfs_dir2_data_free *
@@ -61,7 +62,7 @@ xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr,
61 struct xfs_dir2_data_unused *dup, int *loghead); 62 struct xfs_dir2_data_unused *dup, int *loghead);
62extern void xfs_dir2_data_freescan(struct xfs_mount *mp, 63extern void xfs_dir2_data_freescan(struct xfs_mount *mp,
63 struct xfs_dir2_data_hdr *hdr, int *loghead); 64 struct xfs_dir2_data_hdr *hdr, int *loghead);
64extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno, 65extern int xfs_dir3_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
65 struct xfs_buf **bpp); 66 struct xfs_buf **bpp);
66extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_buf *bp, 67extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_buf *bp,
67 struct xfs_dir2_data_entry *dep); 68 struct xfs_dir2_data_entry *dep);
@@ -77,24 +78,26 @@ extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_buf *bp,
77 xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp); 78 xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp);
78 79
79/* xfs_dir2_leaf.c */ 80/* xfs_dir2_leaf.c */
80extern const struct xfs_buf_ops xfs_dir2_leafn_buf_ops; 81extern const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops;
82extern const struct xfs_buf_ops xfs_dir3_leafn_buf_ops;
81 83
82extern int xfs_dir2_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp, 84extern int xfs_dir3_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp,
83 xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp); 85 xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp);
84extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args, 86extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
85 struct xfs_buf *dbp); 87 struct xfs_buf *dbp);
86extern int xfs_dir2_leaf_addname(struct xfs_da_args *args); 88extern int xfs_dir2_leaf_addname(struct xfs_da_args *args);
87extern void xfs_dir2_leaf_compact(struct xfs_da_args *args, 89extern void xfs_dir3_leaf_compact(struct xfs_da_args *args,
88 struct xfs_buf *bp); 90 struct xfs_dir3_icleaf_hdr *leafhdr, struct xfs_buf *bp);
89extern void xfs_dir2_leaf_compact_x1(struct xfs_buf *bp, int *indexp, 91extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr,
92 struct xfs_dir2_leaf_entry *ents, int *indexp,
90 int *lowstalep, int *highstalep, int *lowlogp, int *highlogp); 93 int *lowstalep, int *highstalep, int *lowlogp, int *highlogp);
91extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent, 94extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent,
92 size_t bufsize, xfs_off_t *offset, filldir_t filldir); 95 size_t bufsize, xfs_off_t *offset, filldir_t filldir);
93extern int xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno, 96extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno,
94 struct xfs_buf **bpp, int magic); 97 struct xfs_buf **bpp, __uint16_t magic);
95extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_buf *bp, 98extern void xfs_dir3_leaf_log_ents(struct xfs_trans *tp, struct xfs_buf *bp,
96 int first, int last); 99 int first, int last);
97extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp, 100extern void xfs_dir3_leaf_log_header(struct xfs_trans *tp,
98 struct xfs_buf *bp); 101 struct xfs_buf *bp);
99extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args); 102extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args);
100extern int xfs_dir2_leaf_removename(struct xfs_da_args *args); 103extern int xfs_dir2_leaf_removename(struct xfs_da_args *args);
@@ -104,11 +107,18 @@ extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args,
104extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args, 107extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args,
105 struct xfs_buf *lbp, xfs_dir2_db_t db); 108 struct xfs_buf *lbp, xfs_dir2_db_t db);
106extern struct xfs_dir2_leaf_entry * 109extern struct xfs_dir2_leaf_entry *
107xfs_dir2_leaf_find_entry(struct xfs_dir2_leaf *leaf, int index, int compact, 110xfs_dir3_leaf_find_entry(struct xfs_dir3_icleaf_hdr *leafhdr,
108 int lowstale, int highstale, 111 struct xfs_dir2_leaf_entry *ents, int index, int compact,
109 int *lfloglow, int *lfloghigh); 112 int lowstale, int highstale, int *lfloglow, int *lfloghigh);
110extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state); 113extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state);
111 114
115extern void xfs_dir3_leaf_hdr_from_disk(struct xfs_dir3_icleaf_hdr *to,
116 struct xfs_dir2_leaf *from);
117extern void xfs_dir3_leaf_hdr_to_disk(struct xfs_dir2_leaf *to,
118 struct xfs_dir3_icleaf_hdr *from);
119extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp,
120 struct xfs_dir3_icleaf_hdr *hdr, struct xfs_dir2_leaf *leaf);
121
112/* xfs_dir2_node.c */ 122/* xfs_dir2_node.c */
113extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, 123extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
114 struct xfs_buf *lbp); 124 struct xfs_buf *lbp);
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 1b9fc3ec7e4b..6157424dbf8f 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -278,7 +278,7 @@ xfs_dir2_block_to_sf(
278 * Set up to loop over the block's entries. 278 * Set up to loop over the block's entries.
279 */ 279 */
280 btp = xfs_dir2_block_tail_p(mp, hdr); 280 btp = xfs_dir2_block_tail_p(mp, hdr);
281 ptr = (char *)(hdr + 1); 281 ptr = (char *)xfs_dir3_data_entry_p(hdr);
282 endptr = (char *)xfs_dir2_block_leaf_p(btp); 282 endptr = (char *)xfs_dir2_block_leaf_p(btp);
283 sfep = xfs_dir2_sf_firstentry(sfp); 283 sfep = xfs_dir2_sf_firstentry(sfp);
284 /* 284 /*
@@ -535,7 +535,7 @@ xfs_dir2_sf_addname_hard(
535 * to insert the new entry. 535 * to insert the new entry.
536 * If it's going to end up at the end then oldsfep will point there. 536 * If it's going to end up at the end then oldsfep will point there.
537 */ 537 */
538 for (offset = XFS_DIR2_DATA_FIRST_OFFSET, 538 for (offset = XFS_DIR3_DATA_FIRST_OFFSET(dp->i_mount),
539 oldsfep = xfs_dir2_sf_firstentry(oldsfp), 539 oldsfep = xfs_dir2_sf_firstentry(oldsfp),
540 add_datasize = xfs_dir2_data_entsize(args->namelen), 540 add_datasize = xfs_dir2_data_entsize(args->namelen),
541 eof = (char *)oldsfep == &buf[old_isize]; 541 eof = (char *)oldsfep == &buf[old_isize];
@@ -617,7 +617,7 @@ xfs_dir2_sf_addname_pick(
617 617
618 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; 618 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
619 size = xfs_dir2_data_entsize(args->namelen); 619 size = xfs_dir2_data_entsize(args->namelen);
620 offset = XFS_DIR2_DATA_FIRST_OFFSET; 620 offset = XFS_DIR3_DATA_FIRST_OFFSET(mp);
621 sfep = xfs_dir2_sf_firstentry(sfp); 621 sfep = xfs_dir2_sf_firstentry(sfp);
622 holefit = 0; 622 holefit = 0;
623 /* 623 /*
@@ -688,7 +688,7 @@ xfs_dir2_sf_check(
688 dp = args->dp; 688 dp = args->dp;
689 689
690 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; 690 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
691 offset = XFS_DIR2_DATA_FIRST_OFFSET; 691 offset = XFS_DIR3_DATA_FIRST_OFFSET(dp->i_mount);
692 ino = xfs_dir2_sf_get_parent_ino(sfp); 692 ino = xfs_dir2_sf_get_parent_ino(sfp);
693 i8count = ino > XFS_DIR2_MAX_SHORT_INUM; 693 i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
694 694
@@ -812,9 +812,9 @@ xfs_dir2_sf_getdents(
812 * mp->m_dirdatablk. 812 * mp->m_dirdatablk.
813 */ 813 */
814 dot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, 814 dot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
815 XFS_DIR2_DATA_DOT_OFFSET); 815 XFS_DIR3_DATA_DOT_OFFSET(mp));
816 dotdot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, 816 dotdot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
817 XFS_DIR2_DATA_DOTDOT_OFFSET); 817 XFS_DIR3_DATA_DOTDOT_OFFSET(mp));
818 818
819 /* 819 /*
820 * Put . entry unless we're starting past it. 820 * Put . entry unless we're starting past it.
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 8025eb23ad72..a41f8bf1da37 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -36,6 +36,7 @@
36#include "xfs_trans_space.h" 36#include "xfs_trans_space.h"
37#include "xfs_trans_priv.h" 37#include "xfs_trans_priv.h"
38#include "xfs_qm.h" 38#include "xfs_qm.h"
39#include "xfs_cksum.h"
39#include "xfs_trace.h" 40#include "xfs_trace.h"
40 41
41/* 42/*
@@ -85,17 +86,23 @@ xfs_qm_dqdestroy(
85 */ 86 */
86void 87void
87xfs_qm_adjust_dqlimits( 88xfs_qm_adjust_dqlimits(
88 xfs_mount_t *mp, 89 struct xfs_mount *mp,
89 xfs_disk_dquot_t *d) 90 struct xfs_dquot *dq)
90{ 91{
91 xfs_quotainfo_t *q = mp->m_quotainfo; 92 struct xfs_quotainfo *q = mp->m_quotainfo;
93 struct xfs_disk_dquot *d = &dq->q_core;
94 int prealloc = 0;
92 95
93 ASSERT(d->d_id); 96 ASSERT(d->d_id);
94 97
95 if (q->qi_bsoftlimit && !d->d_blk_softlimit) 98 if (q->qi_bsoftlimit && !d->d_blk_softlimit) {
96 d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit); 99 d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit);
97 if (q->qi_bhardlimit && !d->d_blk_hardlimit) 100 prealloc = 1;
101 }
102 if (q->qi_bhardlimit && !d->d_blk_hardlimit) {
98 d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit); 103 d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit);
104 prealloc = 1;
105 }
99 if (q->qi_isoftlimit && !d->d_ino_softlimit) 106 if (q->qi_isoftlimit && !d->d_ino_softlimit)
100 d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit); 107 d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit);
101 if (q->qi_ihardlimit && !d->d_ino_hardlimit) 108 if (q->qi_ihardlimit && !d->d_ino_hardlimit)
@@ -104,6 +111,9 @@ xfs_qm_adjust_dqlimits(
104 d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit); 111 d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit);
105 if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit) 112 if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit)
106 d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit); 113 d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit);
114
115 if (prealloc)
116 xfs_dquot_set_prealloc_limits(dq);
107} 117}
108 118
109/* 119/*
@@ -239,6 +249,8 @@ xfs_qm_init_dquot_blk(
239 d->dd_diskdq.d_version = XFS_DQUOT_VERSION; 249 d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
240 d->dd_diskdq.d_id = cpu_to_be32(curid); 250 d->dd_diskdq.d_id = cpu_to_be32(curid);
241 d->dd_diskdq.d_flags = type; 251 d->dd_diskdq.d_flags = type;
252 if (xfs_sb_version_hascrc(&mp->m_sb))
253 uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid);
242 } 254 }
243 255
244 xfs_trans_dquot_buf(tp, bp, 256 xfs_trans_dquot_buf(tp, bp,
@@ -248,25 +260,113 @@ xfs_qm_init_dquot_blk(
248 xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1); 260 xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
249} 261}
250 262
251static void 263/*
264 * Initialize the dynamic speculative preallocation thresholds. The lo/hi
265 * watermarks correspond to the soft and hard limits by default. If a soft limit
266 * is not specified, we use 95% of the hard limit.
267 */
268void
269xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp)
270{
271 __uint64_t space;
272
273 dqp->q_prealloc_hi_wmark = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
274 dqp->q_prealloc_lo_wmark = be64_to_cpu(dqp->q_core.d_blk_softlimit);
275 if (!dqp->q_prealloc_lo_wmark) {
276 dqp->q_prealloc_lo_wmark = dqp->q_prealloc_hi_wmark;
277 do_div(dqp->q_prealloc_lo_wmark, 100);
278 dqp->q_prealloc_lo_wmark *= 95;
279 }
280
281 space = dqp->q_prealloc_hi_wmark;
282
283 do_div(space, 100);
284 dqp->q_low_space[XFS_QLOWSP_1_PCNT] = space;
285 dqp->q_low_space[XFS_QLOWSP_3_PCNT] = space * 3;
286 dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5;
287}
288
289STATIC void
290xfs_dquot_buf_calc_crc(
291 struct xfs_mount *mp,
292 struct xfs_buf *bp)
293{
294 struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
295 int i;
296
297 if (!xfs_sb_version_hascrc(&mp->m_sb))
298 return;
299
300 for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++, d++) {
301 xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
302 offsetof(struct xfs_dqblk, dd_crc));
303 }
304}
305
306STATIC bool
307xfs_dquot_buf_verify_crc(
308 struct xfs_mount *mp,
309 struct xfs_buf *bp)
310{
311 struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
312 int ndquots;
313 int i;
314
315 if (!xfs_sb_version_hascrc(&mp->m_sb))
316 return true;
317
318 /*
319 * if we are in log recovery, the quota subsystem has not been
320 * initialised so we have no quotainfo structure. In that case, we need
321 * to manually calculate the number of dquots in the buffer.
322 */
323 if (mp->m_quotainfo)
324 ndquots = mp->m_quotainfo->qi_dqperchunk;
325 else
326 ndquots = xfs_qm_calc_dquots_per_chunk(mp,
327 XFS_BB_TO_FSB(mp, bp->b_length));
328
329 for (i = 0; i < ndquots; i++, d++) {
330 if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk),
331 offsetof(struct xfs_dqblk, dd_crc)))
332 return false;
333 if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid))
334 return false;
335 }
336
337 return true;
338}
339
340STATIC bool
252xfs_dquot_buf_verify( 341xfs_dquot_buf_verify(
342 struct xfs_mount *mp,
253 struct xfs_buf *bp) 343 struct xfs_buf *bp)
254{ 344{
255 struct xfs_mount *mp = bp->b_target->bt_mount;
256 struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; 345 struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
257 struct xfs_disk_dquot *ddq;
258 xfs_dqid_t id = 0; 346 xfs_dqid_t id = 0;
347 int ndquots;
259 int i; 348 int i;
260 349
261 /* 350 /*
351 * if we are in log recovery, the quota subsystem has not been
352 * initialised so we have no quotainfo structure. In that case, we need
353 * to manually calculate the number of dquots in the buffer.
354 */
355 if (mp->m_quotainfo)
356 ndquots = mp->m_quotainfo->qi_dqperchunk;
357 else
358 ndquots = xfs_qm_calc_dquots_per_chunk(mp, bp->b_length);
359
360 /*
262 * On the first read of the buffer, verify that each dquot is valid. 361 * On the first read of the buffer, verify that each dquot is valid.
263 * We don't know what the id of the dquot is supposed to be, just that 362 * We don't know what the id of the dquot is supposed to be, just that
264 * they should be increasing monotonically within the buffer. If the 363 * they should be increasing monotonically within the buffer. If the
265 * first id is corrupt, then it will fail on the second dquot in the 364 * first id is corrupt, then it will fail on the second dquot in the
266 * buffer so corruptions could point to the wrong dquot in this case. 365 * buffer so corruptions could point to the wrong dquot in this case.
267 */ 366 */
268 for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) { 367 for (i = 0; i < ndquots; i++) {
269 int error; 368 struct xfs_disk_dquot *ddq;
369 int error;
270 370
271 ddq = &d[i].dd_diskdq; 371 ddq = &d[i].dd_diskdq;
272 372
@@ -274,27 +374,37 @@ xfs_dquot_buf_verify(
274 id = be32_to_cpu(ddq->d_id); 374 id = be32_to_cpu(ddq->d_id);
275 375
276 error = xfs_qm_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN, 376 error = xfs_qm_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN,
277 "xfs_dquot_read_verify"); 377 "xfs_dquot_buf_verify");
278 if (error) { 378 if (error)
279 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, d); 379 return false;
280 xfs_buf_ioerror(bp, EFSCORRUPTED);
281 break;
282 }
283 } 380 }
381 return true;
284} 382}
285 383
286static void 384static void
287xfs_dquot_buf_read_verify( 385xfs_dquot_buf_read_verify(
288 struct xfs_buf *bp) 386 struct xfs_buf *bp)
289{ 387{
290 xfs_dquot_buf_verify(bp); 388 struct xfs_mount *mp = bp->b_target->bt_mount;
389
390 if (!xfs_dquot_buf_verify_crc(mp, bp) || !xfs_dquot_buf_verify(mp, bp)) {
391 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
392 xfs_buf_ioerror(bp, EFSCORRUPTED);
393 }
291} 394}
292 395
293void 396void
294xfs_dquot_buf_write_verify( 397xfs_dquot_buf_write_verify(
295 struct xfs_buf *bp) 398 struct xfs_buf *bp)
296{ 399{
297 xfs_dquot_buf_verify(bp); 400 struct xfs_mount *mp = bp->b_target->bt_mount;
401
402 if (!xfs_dquot_buf_verify(mp, bp)) {
403 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
404 xfs_buf_ioerror(bp, EFSCORRUPTED);
405 return;
406 }
407 xfs_dquot_buf_calc_crc(mp, bp);
298} 408}
299 409
300const struct xfs_buf_ops xfs_dquot_buf_ops = { 410const struct xfs_buf_ops xfs_dquot_buf_ops = {
@@ -648,6 +758,9 @@ xfs_qm_dqread(
648 dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); 758 dqp->q_res_icount = be64_to_cpu(ddqp->d_icount);
649 dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount); 759 dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount);
650 760
761 /* initialize the dquot speculative prealloc thresholds */
762 xfs_dquot_set_prealloc_limits(dqp);
763
651 /* Mark the buf so that this will stay incore a little longer */ 764 /* Mark the buf so that this will stay incore a little longer */
652 xfs_buf_set_ref(bp, XFS_DQUOT_REF); 765 xfs_buf_set_ref(bp, XFS_DQUOT_REF);
653 766
@@ -1035,6 +1148,17 @@ xfs_qm_dqflush(
1035 &dqp->q_logitem.qli_item.li_lsn); 1148 &dqp->q_logitem.qli_item.li_lsn);
1036 1149
1037 /* 1150 /*
1151 * copy the lsn into the on-disk dquot now while we have the in memory
1152 * dquot here. This can't be done later in the write verifier as we
1153 * can't get access to the log item at that point in time.
1154 */
1155 if (xfs_sb_version_hascrc(&mp->m_sb)) {
1156 struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddqp;
1157
1158 dqb->dd_lsn = cpu_to_be64(dqp->q_logitem.qli_item.li_lsn);
1159 }
1160
1161 /*
1038 * Attach an iodone routine so that we can remove this dquot from the 1162 * Attach an iodone routine so that we can remove this dquot from the
1039 * AIL and release the flush lock once the dquot is synced to disk. 1163 * AIL and release the flush lock once the dquot is synced to disk.
1040 */ 1164 */
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index c694a8469c4a..4f0ebfc43cc9 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -32,6 +32,13 @@
32struct xfs_mount; 32struct xfs_mount;
33struct xfs_trans; 33struct xfs_trans;
34 34
35enum {
36 XFS_QLOWSP_1_PCNT = 0,
37 XFS_QLOWSP_3_PCNT,
38 XFS_QLOWSP_5_PCNT,
39 XFS_QLOWSP_MAX
40};
41
35/* 42/*
36 * The incore dquot structure 43 * The incore dquot structure
37 */ 44 */
@@ -51,6 +58,9 @@ typedef struct xfs_dquot {
51 xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */ 58 xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */
52 xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */ 59 xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */
53 xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */ 60 xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */
61 xfs_qcnt_t q_prealloc_lo_wmark;/* prealloc throttle wmark */
62 xfs_qcnt_t q_prealloc_hi_wmark;/* prealloc disabled wmark */
63 int64_t q_low_space[XFS_QLOWSP_MAX];
54 struct mutex q_qlock; /* quota lock */ 64 struct mutex q_qlock; /* quota lock */
55 struct completion q_flush; /* flush completion queue */ 65 struct completion q_flush; /* flush completion queue */
56 atomic_t q_pincount; /* dquot pin count */ 66 atomic_t q_pincount; /* dquot pin count */
@@ -145,14 +155,16 @@ extern int xfs_qm_dqflush(struct xfs_dquot *, struct xfs_buf **);
145extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); 155extern void xfs_qm_dqunpin_wait(xfs_dquot_t *);
146extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, 156extern void xfs_qm_adjust_dqtimers(xfs_mount_t *,
147 xfs_disk_dquot_t *); 157 xfs_disk_dquot_t *);
148extern void xfs_qm_adjust_dqlimits(xfs_mount_t *, 158extern void xfs_qm_adjust_dqlimits(struct xfs_mount *,
149 xfs_disk_dquot_t *); 159 struct xfs_dquot *);
150extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *, 160extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
151 xfs_dqid_t, uint, uint, xfs_dquot_t **); 161 xfs_dqid_t, uint, uint, xfs_dquot_t **);
152extern void xfs_qm_dqput(xfs_dquot_t *); 162extern void xfs_qm_dqput(xfs_dquot_t *);
153 163
154extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *); 164extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *);
155 165
166extern void xfs_dquot_set_prealloc_limits(struct xfs_dquot *);
167
156static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp) 168static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp)
157{ 169{
158 xfs_dqlock(dqp); 170 xfs_dqlock(dqp);
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 610456054dc2..35d3f5b041dd 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -66,7 +66,7 @@ xfs_error_test(int error_tag, int *fsidp, char *expression,
66 int i; 66 int i;
67 int64_t fsid; 67 int64_t fsid;
68 68
69 if (random32() % randfactor) 69 if (prandom_u32() % randfactor)
70 return 0; 70 return 0;
71 71
72 memcpy(&fsid, fsidp, sizeof(xfs_fsid_t)); 72 memcpy(&fsid, fsidp, sizeof(xfs_fsid_t));
@@ -178,7 +178,7 @@ xfs_corruption_error(
178 inst_t *ra) 178 inst_t *ra)
179{ 179{
180 if (level <= xfs_error_level) 180 if (level <= xfs_error_level)
181 xfs_hex_dump(p, 16); 181 xfs_hex_dump(p, 64);
182 xfs_error_report(tag, level, mp, filename, linenum, ra); 182 xfs_error_report(tag, level, mp, filename, linenum, ra);
183 xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair"); 183 xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair");
184} 184}
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index feb36d7551ae..c0f375087efc 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -50,9 +50,8 @@ xfs_efi_item_free(
50 * Freeing the efi requires that we remove it from the AIL if it has already 50 * Freeing the efi requires that we remove it from the AIL if it has already
51 * been placed there. However, the EFI may not yet have been placed in the AIL 51 * been placed there. However, the EFI may not yet have been placed in the AIL
52 * when called by xfs_efi_release() from EFD processing due to the ordering of 52 * when called by xfs_efi_release() from EFD processing due to the ordering of
53 * committed vs unpin operations in bulk insert operations. Hence the 53 * committed vs unpin operations in bulk insert operations. Hence the reference
54 * test_and_clear_bit(XFS_EFI_COMMITTED) to ensure only the last caller frees 54 * count to ensure only the last caller frees the EFI.
55 * the EFI.
56 */ 55 */
57STATIC void 56STATIC void
58__xfs_efi_release( 57__xfs_efi_release(
@@ -60,7 +59,7 @@ __xfs_efi_release(
60{ 59{
61 struct xfs_ail *ailp = efip->efi_item.li_ailp; 60 struct xfs_ail *ailp = efip->efi_item.li_ailp;
62 61
63 if (!test_and_clear_bit(XFS_EFI_COMMITTED, &efip->efi_flags)) { 62 if (atomic_dec_and_test(&efip->efi_refcount)) {
64 spin_lock(&ailp->xa_lock); 63 spin_lock(&ailp->xa_lock);
65 /* xfs_trans_ail_delete() drops the AIL lock. */ 64 /* xfs_trans_ail_delete() drops the AIL lock. */
66 xfs_trans_ail_delete(ailp, &efip->efi_item, 65 xfs_trans_ail_delete(ailp, &efip->efi_item,
@@ -126,8 +125,8 @@ xfs_efi_item_pin(
126 * which the EFI is manipulated during a transaction. If we are being asked to 125 * which the EFI is manipulated during a transaction. If we are being asked to
127 * remove the EFI it's because the transaction has been cancelled and by 126 * remove the EFI it's because the transaction has been cancelled and by
128 * definition that means the EFI cannot be in the AIL so remove it from the 127 * definition that means the EFI cannot be in the AIL so remove it from the
129 * transaction and free it. Otherwise coordinate with xfs_efi_release() (via 128 * transaction and free it. Otherwise coordinate with xfs_efi_release()
130 * XFS_EFI_COMMITTED) to determine who gets to free the EFI. 129 * to determine who gets to free the EFI.
131 */ 130 */
132STATIC void 131STATIC void
133xfs_efi_item_unpin( 132xfs_efi_item_unpin(
@@ -171,19 +170,13 @@ xfs_efi_item_unlock(
171 170
172/* 171/*
173 * The EFI is logged only once and cannot be moved in the log, so simply return 172 * The EFI is logged only once and cannot be moved in the log, so simply return
174 * the lsn at which it's been logged. For bulk transaction committed 173 * the lsn at which it's been logged.
175 * processing, the EFI may be processed but not yet unpinned prior to the EFD
176 * being processed. Set the XFS_EFI_COMMITTED flag so this case can be detected
177 * when processing the EFD.
178 */ 174 */
179STATIC xfs_lsn_t 175STATIC xfs_lsn_t
180xfs_efi_item_committed( 176xfs_efi_item_committed(
181 struct xfs_log_item *lip, 177 struct xfs_log_item *lip,
182 xfs_lsn_t lsn) 178 xfs_lsn_t lsn)
183{ 179{
184 struct xfs_efi_log_item *efip = EFI_ITEM(lip);
185
186 set_bit(XFS_EFI_COMMITTED, &efip->efi_flags);
187 return lsn; 180 return lsn;
188} 181}
189 182
@@ -241,6 +234,7 @@ xfs_efi_init(
241 efip->efi_format.efi_nextents = nextents; 234 efip->efi_format.efi_nextents = nextents;
242 efip->efi_format.efi_id = (__psint_t)(void*)efip; 235 efip->efi_format.efi_id = (__psint_t)(void*)efip;
243 atomic_set(&efip->efi_next_extent, 0); 236 atomic_set(&efip->efi_next_extent, 0);
237 atomic_set(&efip->efi_refcount, 2);
244 238
245 return efip; 239 return efip;
246} 240}
@@ -310,8 +304,13 @@ xfs_efi_release(xfs_efi_log_item_t *efip,
310 uint nextents) 304 uint nextents)
311{ 305{
312 ASSERT(atomic_read(&efip->efi_next_extent) >= nextents); 306 ASSERT(atomic_read(&efip->efi_next_extent) >= nextents);
313 if (atomic_sub_and_test(nextents, &efip->efi_next_extent)) 307 if (atomic_sub_and_test(nextents, &efip->efi_next_extent)) {
314 __xfs_efi_release(efip); 308 __xfs_efi_release(efip);
309
310 /* recovery needs us to drop the EFI reference, too */
311 if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags))
312 __xfs_efi_release(efip);
313 }
315} 314}
316 315
317static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip) 316static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip)
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 375f68e42531..432222418c56 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -114,16 +114,20 @@ typedef struct xfs_efd_log_format_64 {
114 * Define EFI flag bits. Manipulated by set/clear/test_bit operators. 114 * Define EFI flag bits. Manipulated by set/clear/test_bit operators.
115 */ 115 */
116#define XFS_EFI_RECOVERED 1 116#define XFS_EFI_RECOVERED 1
117#define XFS_EFI_COMMITTED 2
118 117
119/* 118/*
120 * This is the "extent free intention" log item. It is used 119 * This is the "extent free intention" log item. It is used to log the fact
121 * to log the fact that some extents need to be free. It is 120 * that some extents need to be free. It is used in conjunction with the
122 * used in conjunction with the "extent free done" log item 121 * "extent free done" log item described below.
123 * described below. 122 *
123 * The EFI is reference counted so that it is not freed prior to both the EFI
124 * and EFD being committed and unpinned. This ensures that when the last
125 * reference goes away the EFI will always be in the AIL as it has been
126 * unpinned, regardless of whether the EFD is processed before or after the EFI.
124 */ 127 */
125typedef struct xfs_efi_log_item { 128typedef struct xfs_efi_log_item {
126 xfs_log_item_t efi_item; 129 xfs_log_item_t efi_item;
130 atomic_t efi_refcount;
127 atomic_t efi_next_extent; 131 atomic_t efi_next_extent;
128 unsigned long efi_flags; /* misc flags */ 132 unsigned long efi_flags; /* misc flags */
129 xfs_efi_log_format_t efi_format; 133 xfs_efi_log_format_t efi_format;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 3800128d2171..054d60c0ac57 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -890,7 +890,7 @@ xfs_dir_open(
890 */ 890 */
891 mode = xfs_ilock_map_shared(ip); 891 mode = xfs_ilock_map_shared(ip);
892 if (ip->i_d.di_nextents > 0) 892 if (ip->i_d.di_nextents > 0)
893 xfs_dir2_data_readahead(NULL, ip, 0, -1); 893 xfs_dir3_data_readahead(NULL, ip, 0, -1);
894 xfs_iunlock(ip, mode); 894 xfs_iunlock(ip, mode);
895 return 0; 895 return 0;
896} 896}
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 2866b8c78b7a..87595b211da1 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -247,6 +247,9 @@ xfs_growfs_data_private(
247 tmpsize = agsize - XFS_PREALLOC_BLOCKS(mp); 247 tmpsize = agsize - XFS_PREALLOC_BLOCKS(mp);
248 agf->agf_freeblks = cpu_to_be32(tmpsize); 248 agf->agf_freeblks = cpu_to_be32(tmpsize);
249 agf->agf_longest = cpu_to_be32(tmpsize); 249 agf->agf_longest = cpu_to_be32(tmpsize);
250 if (xfs_sb_version_hascrc(&mp->m_sb))
251 uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_uuid);
252
250 error = xfs_bwrite(bp); 253 error = xfs_bwrite(bp);
251 xfs_buf_relse(bp); 254 xfs_buf_relse(bp);
252 if (error) 255 if (error)
@@ -265,6 +268,11 @@ xfs_growfs_data_private(
265 } 268 }
266 269
267 agfl = XFS_BUF_TO_AGFL(bp); 270 agfl = XFS_BUF_TO_AGFL(bp);
271 if (xfs_sb_version_hascrc(&mp->m_sb)) {
272 agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC);
273 agfl->agfl_seqno = cpu_to_be32(agno);
274 uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_uuid);
275 }
268 for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++) 276 for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++)
269 agfl->agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK); 277 agfl->agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
270 278
@@ -296,8 +304,11 @@ xfs_growfs_data_private(
296 agi->agi_freecount = 0; 304 agi->agi_freecount = 0;
297 agi->agi_newino = cpu_to_be32(NULLAGINO); 305 agi->agi_newino = cpu_to_be32(NULLAGINO);
298 agi->agi_dirino = cpu_to_be32(NULLAGINO); 306 agi->agi_dirino = cpu_to_be32(NULLAGINO);
307 if (xfs_sb_version_hascrc(&mp->m_sb))
308 uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_uuid);
299 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) 309 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++)
300 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); 310 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
311
301 error = xfs_bwrite(bp); 312 error = xfs_bwrite(bp);
302 xfs_buf_relse(bp); 313 xfs_buf_relse(bp);
303 if (error) 314 if (error)
@@ -316,7 +327,13 @@ xfs_growfs_data_private(
316 goto error0; 327 goto error0;
317 } 328 }
318 329
319 xfs_btree_init_block(mp, bp, XFS_ABTB_MAGIC, 0, 1, 0); 330 if (xfs_sb_version_hascrc(&mp->m_sb))
331 xfs_btree_init_block(mp, bp, XFS_ABTB_CRC_MAGIC, 0, 1,
332 agno, XFS_BTREE_CRC_BLOCKS);
333 else
334 xfs_btree_init_block(mp, bp, XFS_ABTB_MAGIC, 0, 1,
335 agno, 0);
336
320 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); 337 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
321 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); 338 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
322 arec->ar_blockcount = cpu_to_be32( 339 arec->ar_blockcount = cpu_to_be32(
@@ -339,7 +356,13 @@ xfs_growfs_data_private(
339 goto error0; 356 goto error0;
340 } 357 }
341 358
342 xfs_btree_init_block(mp, bp, XFS_ABTC_MAGIC, 0, 1, 0); 359 if (xfs_sb_version_hascrc(&mp->m_sb))
360 xfs_btree_init_block(mp, bp, XFS_ABTC_CRC_MAGIC, 0, 1,
361 agno, XFS_BTREE_CRC_BLOCKS);
362 else
363 xfs_btree_init_block(mp, bp, XFS_ABTC_MAGIC, 0, 1,
364 agno, 0);
365
343 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); 366 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
344 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); 367 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
345 arec->ar_blockcount = cpu_to_be32( 368 arec->ar_blockcount = cpu_to_be32(
@@ -363,7 +386,12 @@ xfs_growfs_data_private(
363 goto error0; 386 goto error0;
364 } 387 }
365 388
366 xfs_btree_init_block(mp, bp, XFS_IBT_MAGIC, 0, 0, 0); 389 if (xfs_sb_version_hascrc(&mp->m_sb))
390 xfs_btree_init_block(mp, bp, XFS_IBT_CRC_MAGIC, 0, 0,
391 agno, XFS_BTREE_CRC_BLOCKS);
392 else
393 xfs_btree_init_block(mp, bp, XFS_IBT_MAGIC, 0, 0,
394 agno, 0);
367 395
368 error = xfs_bwrite(bp); 396 error = xfs_bwrite(bp);
369 xfs_buf_relse(bp); 397 xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 515bf71ce01c..c8f5ae1debf2 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -36,6 +36,8 @@
36#include "xfs_rtalloc.h" 36#include "xfs_rtalloc.h"
37#include "xfs_error.h" 37#include "xfs_error.h"
38#include "xfs_bmap.h" 38#include "xfs_bmap.h"
39#include "xfs_cksum.h"
40#include "xfs_buf_item.h"
39 41
40 42
41/* 43/*
@@ -165,6 +167,7 @@ xfs_ialloc_inode_init(
165 int version; 167 int version;
166 int i, j; 168 int i, j;
167 xfs_daddr_t d; 169 xfs_daddr_t d;
170 xfs_ino_t ino = 0;
168 171
169 /* 172 /*
170 * Loop over the new block(s), filling in the inodes. 173 * Loop over the new block(s), filling in the inodes.
@@ -183,13 +186,29 @@ xfs_ialloc_inode_init(
183 } 186 }
184 187
185 /* 188 /*
186 * Figure out what version number to use in the inodes we create. 189 * Figure out what version number to use in the inodes we create. If
187 * If the superblock version has caught up to the one that supports 190 * the superblock version has caught up to the one that supports the new
188 * the new inode format, then use the new inode version. Otherwise 191 * inode format, then use the new inode version. Otherwise use the old
189 * use the old version so that old kernels will continue to be 192 * version so that old kernels will continue to be able to use the file
190 * able to use the file system. 193 * system.
194 *
195 * For v3 inodes, we also need to write the inode number into the inode,
196 * so calculate the first inode number of the chunk here as
197 * XFS_OFFBNO_TO_AGINO() only works within a filesystem block, not
198 * across multiple filesystem blocks (such as a cluster) and so cannot
199 * be used in the cluster buffer loop below.
200 *
201 * Further, because we are writing the inode directly into the buffer
202 * and calculating a CRC on the entire inode, we have ot log the entire
203 * inode so that the entire range the CRC covers is present in the log.
204 * That means for v3 inode we log the entire buffer rather than just the
205 * inode cores.
191 */ 206 */
192 if (xfs_sb_version_hasnlink(&mp->m_sb)) 207 if (xfs_sb_version_hascrc(&mp->m_sb)) {
208 version = 3;
209 ino = XFS_AGINO_TO_INO(mp, agno,
210 XFS_OFFBNO_TO_AGINO(mp, agbno, 0));
211 } else if (xfs_sb_version_hasnlink(&mp->m_sb))
193 version = 2; 212 version = 2;
194 else 213 else
195 version = 1; 214 version = 1;
@@ -212,17 +231,32 @@ xfs_ialloc_inode_init(
212 * individual transactions causing a lot of log traffic. 231 * individual transactions causing a lot of log traffic.
213 */ 232 */
214 fbuf->b_ops = &xfs_inode_buf_ops; 233 fbuf->b_ops = &xfs_inode_buf_ops;
215 xfs_buf_zero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog); 234 xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length));
216 for (i = 0; i < ninodes; i++) { 235 for (i = 0; i < ninodes; i++) {
217 int ioffset = i << mp->m_sb.sb_inodelog; 236 int ioffset = i << mp->m_sb.sb_inodelog;
218 uint isize = sizeof(struct xfs_dinode); 237 uint isize = xfs_dinode_size(version);
219 238
220 free = xfs_make_iptr(mp, fbuf, i); 239 free = xfs_make_iptr(mp, fbuf, i);
221 free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 240 free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
222 free->di_version = version; 241 free->di_version = version;
223 free->di_gen = cpu_to_be32(gen); 242 free->di_gen = cpu_to_be32(gen);
224 free->di_next_unlinked = cpu_to_be32(NULLAGINO); 243 free->di_next_unlinked = cpu_to_be32(NULLAGINO);
225 xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1); 244
245 if (version == 3) {
246 free->di_ino = cpu_to_be64(ino);
247 ino++;
248 uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid);
249 xfs_dinode_calc_crc(mp, free);
250 } else {
251 /* just log the inode core */
252 xfs_trans_log_buf(tp, fbuf, ioffset,
253 ioffset + isize - 1);
254 }
255 }
256 if (version == 3) {
257 /* need to log the entire buffer */
258 xfs_trans_log_buf(tp, fbuf, 0,
259 BBTOB(fbuf->b_length) - 1);
226 } 260 }
227 xfs_trans_inode_alloc_buf(tp, fbuf); 261 xfs_trans_inode_alloc_buf(tp, fbuf);
228 } 262 }
@@ -369,7 +403,7 @@ xfs_ialloc_ag_alloc(
369 * number from being easily guessable. 403 * number from being easily guessable.
370 */ 404 */
371 error = xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno, 405 error = xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno,
372 args.len, random32()); 406 args.len, prandom_u32());
373 407
374 if (error) 408 if (error)
375 return error; 409 return error;
@@ -1453,6 +1487,7 @@ xfs_ialloc_log_agi(
1453 /* 1487 /*
1454 * Log the allocation group inode header buffer. 1488 * Log the allocation group inode header buffer.
1455 */ 1489 */
1490 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF);
1456 xfs_trans_log_buf(tp, bp, first, last); 1491 xfs_trans_log_buf(tp, bp, first, last);
1457} 1492}
1458 1493
@@ -1470,19 +1505,23 @@ xfs_check_agi_unlinked(
1470#define xfs_check_agi_unlinked(agi) 1505#define xfs_check_agi_unlinked(agi)
1471#endif 1506#endif
1472 1507
1473static void 1508static bool
1474xfs_agi_verify( 1509xfs_agi_verify(
1475 struct xfs_buf *bp) 1510 struct xfs_buf *bp)
1476{ 1511{
1477 struct xfs_mount *mp = bp->b_target->bt_mount; 1512 struct xfs_mount *mp = bp->b_target->bt_mount;
1478 struct xfs_agi *agi = XFS_BUF_TO_AGI(bp); 1513 struct xfs_agi *agi = XFS_BUF_TO_AGI(bp);
1479 int agi_ok;
1480 1514
1515 if (xfs_sb_version_hascrc(&mp->m_sb) &&
1516 !uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_uuid))
1517 return false;
1481 /* 1518 /*
1482 * Validate the magic number of the agi block. 1519 * Validate the magic number of the agi block.
1483 */ 1520 */
1484 agi_ok = agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC) && 1521 if (agi->agi_magicnum != cpu_to_be32(XFS_AGI_MAGIC))
1485 XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)); 1522 return false;
1523 if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)))
1524 return false;
1486 1525
1487 /* 1526 /*
1488 * during growfs operations, the perag is not fully initialised, 1527 * during growfs operations, the perag is not fully initialised,
@@ -1490,30 +1529,52 @@ xfs_agi_verify(
1490 * use it by using uncached buffers that don't have the perag attached 1529 * use it by using uncached buffers that don't have the perag attached
1491 * so we can detect and avoid this problem. 1530 * so we can detect and avoid this problem.
1492 */ 1531 */
1493 if (bp->b_pag) 1532 if (bp->b_pag && be32_to_cpu(agi->agi_seqno) != bp->b_pag->pag_agno)
1494 agi_ok = agi_ok && be32_to_cpu(agi->agi_seqno) == 1533 return false;
1495 bp->b_pag->pag_agno;
1496 1534
1497 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
1498 XFS_RANDOM_IALLOC_READ_AGI))) {
1499 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agi);
1500 xfs_buf_ioerror(bp, EFSCORRUPTED);
1501 }
1502 xfs_check_agi_unlinked(agi); 1535 xfs_check_agi_unlinked(agi);
1536 return true;
1503} 1537}
1504 1538
1505static void 1539static void
1506xfs_agi_read_verify( 1540xfs_agi_read_verify(
1507 struct xfs_buf *bp) 1541 struct xfs_buf *bp)
1508{ 1542{
1509 xfs_agi_verify(bp); 1543 struct xfs_mount *mp = bp->b_target->bt_mount;
1544 int agi_ok = 1;
1545
1546 if (xfs_sb_version_hascrc(&mp->m_sb))
1547 agi_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
1548 offsetof(struct xfs_agi, agi_crc));
1549 agi_ok = agi_ok && xfs_agi_verify(bp);
1550
1551 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
1552 XFS_RANDOM_IALLOC_READ_AGI))) {
1553 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
1554 xfs_buf_ioerror(bp, EFSCORRUPTED);
1555 }
1510} 1556}
1511 1557
1512static void 1558static void
1513xfs_agi_write_verify( 1559xfs_agi_write_verify(
1514 struct xfs_buf *bp) 1560 struct xfs_buf *bp)
1515{ 1561{
1516 xfs_agi_verify(bp); 1562 struct xfs_mount *mp = bp->b_target->bt_mount;
1563 struct xfs_buf_log_item *bip = bp->b_fspriv;
1564
1565 if (!xfs_agi_verify(bp)) {
1566 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
1567 xfs_buf_ioerror(bp, EFSCORRUPTED);
1568 return;
1569 }
1570
1571 if (!xfs_sb_version_hascrc(&mp->m_sb))
1572 return;
1573
1574 if (bip)
1575 XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn);
1576 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
1577 offsetof(struct xfs_agi, agi_crc));
1517} 1578}
1518 1579
1519const struct xfs_buf_ops xfs_agi_buf_ops = { 1580const struct xfs_buf_ops xfs_agi_buf_ops = {
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index bec344b36507..c82ac8867421 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -34,6 +34,7 @@
34#include "xfs_alloc.h" 34#include "xfs_alloc.h"
35#include "xfs_error.h" 35#include "xfs_error.h"
36#include "xfs_trace.h" 36#include "xfs_trace.h"
37#include "xfs_cksum.h"
37 38
38 39
39STATIC int 40STATIC int
@@ -182,52 +183,88 @@ xfs_inobt_key_diff(
182 cur->bc_rec.i.ir_startino; 183 cur->bc_rec.i.ir_startino;
183} 184}
184 185
185void 186static int
186xfs_inobt_verify( 187xfs_inobt_verify(
187 struct xfs_buf *bp) 188 struct xfs_buf *bp)
188{ 189{
189 struct xfs_mount *mp = bp->b_target->bt_mount; 190 struct xfs_mount *mp = bp->b_target->bt_mount;
190 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 191 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
192 struct xfs_perag *pag = bp->b_pag;
191 unsigned int level; 193 unsigned int level;
192 int sblock_ok; /* block passes checks */
193 194
194 /* magic number and level verification */ 195 /*
195 level = be16_to_cpu(block->bb_level); 196 * During growfs operations, we can't verify the exact owner as the
196 sblock_ok = block->bb_magic == cpu_to_be32(XFS_IBT_MAGIC) && 197 * perag is not fully initialised and hence not attached to the buffer.
197 level < mp->m_in_maxlevels; 198 *
199 * Similarly, during log recovery we will have a perag structure
200 * attached, but the agi information will not yet have been initialised
201 * from the on disk AGI. We don't currently use any of this information,
202 * but beware of the landmine (i.e. need to check pag->pagi_init) if we
203 * ever do.
204 */
205 switch (block->bb_magic) {
206 case cpu_to_be32(XFS_IBT_CRC_MAGIC):
207 if (!xfs_sb_version_hascrc(&mp->m_sb))
208 return false;
209 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
210 return false;
211 if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
212 return false;
213 if (pag &&
214 be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
215 return false;
216 /* fall through */
217 case cpu_to_be32(XFS_IBT_MAGIC):
218 break;
219 default:
220 return 0;
221 }
198 222
199 /* numrecs verification */ 223 /* numrecs and level verification */
200 sblock_ok = sblock_ok && 224 level = be16_to_cpu(block->bb_level);
201 be16_to_cpu(block->bb_numrecs) <= mp->m_inobt_mxr[level != 0]; 225 if (level >= mp->m_in_maxlevels)
226 return false;
227 if (be16_to_cpu(block->bb_numrecs) > mp->m_inobt_mxr[level != 0])
228 return false;
202 229
203 /* sibling pointer verification */ 230 /* sibling pointer verification */
204 sblock_ok = sblock_ok && 231 if (!block->bb_u.s.bb_leftsib ||
205 (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) || 232 (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
206 be32_to_cpu(block->bb_u.s.bb_leftsib) < mp->m_sb.sb_agblocks) && 233 block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
207 block->bb_u.s.bb_leftsib && 234 return false;
208 (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || 235 if (!block->bb_u.s.bb_rightsib ||
209 be32_to_cpu(block->bb_u.s.bb_rightsib) < mp->m_sb.sb_agblocks) && 236 (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
210 block->bb_u.s.bb_rightsib; 237 block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
211 238 return false;
212 if (!sblock_ok) { 239
213 trace_xfs_btree_corrupt(bp, _RET_IP_); 240 return true;
214 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block);
215 xfs_buf_ioerror(bp, EFSCORRUPTED);
216 }
217} 241}
218 242
219static void 243static void
220xfs_inobt_read_verify( 244xfs_inobt_read_verify(
221 struct xfs_buf *bp) 245 struct xfs_buf *bp)
222{ 246{
223 xfs_inobt_verify(bp); 247 if (!(xfs_btree_sblock_verify_crc(bp) &&
248 xfs_inobt_verify(bp))) {
249 trace_xfs_btree_corrupt(bp, _RET_IP_);
250 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
251 bp->b_target->bt_mount, bp->b_addr);
252 xfs_buf_ioerror(bp, EFSCORRUPTED);
253 }
224} 254}
225 255
226static void 256static void
227xfs_inobt_write_verify( 257xfs_inobt_write_verify(
228 struct xfs_buf *bp) 258 struct xfs_buf *bp)
229{ 259{
230 xfs_inobt_verify(bp); 260 if (!xfs_inobt_verify(bp)) {
261 trace_xfs_btree_corrupt(bp, _RET_IP_);
262 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
263 bp->b_target->bt_mount, bp->b_addr);
264 xfs_buf_ioerror(bp, EFSCORRUPTED);
265 }
266 xfs_btree_sblock_calc_crc(bp);
267
231} 268}
232 269
233const struct xfs_buf_ops xfs_inobt_buf_ops = { 270const struct xfs_buf_ops xfs_inobt_buf_ops = {
@@ -301,6 +338,8 @@ xfs_inobt_init_cursor(
301 cur->bc_blocklog = mp->m_sb.sb_blocklog; 338 cur->bc_blocklog = mp->m_sb.sb_blocklog;
302 339
303 cur->bc_ops = &xfs_inobt_ops; 340 cur->bc_ops = &xfs_inobt_ops;
341 if (xfs_sb_version_hascrc(&mp->m_sb))
342 cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
304 343
305 cur->bc_private.a.agbp = agbp; 344 cur->bc_private.a.agbp = agbp;
306 cur->bc_private.a.agno = agno; 345 cur->bc_private.a.agno = agno;
diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h
index 25c0239a8eab..3ac36b7642e9 100644
--- a/fs/xfs/xfs_ialloc_btree.h
+++ b/fs/xfs/xfs_ialloc_btree.h
@@ -29,7 +29,8 @@ struct xfs_mount;
29/* 29/*
30 * There is a btree for the inode map per allocation group. 30 * There is a btree for the inode map per allocation group.
31 */ 31 */
32#define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */ 32#define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */
33#define XFS_IBT_CRC_MAGIC 0x49414233 /* 'IAB3' */
33 34
34typedef __uint64_t xfs_inofree_t; 35typedef __uint64_t xfs_inofree_t;
35#define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t)) 36#define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t))
@@ -76,10 +77,10 @@ typedef __be32 xfs_inobt_ptr_t;
76 77
77/* 78/*
78 * Btree block header size depends on a superblock flag. 79 * Btree block header size depends on a superblock flag.
79 *
80 * (not quite yet, but soon)
81 */ 80 */
82#define XFS_INOBT_BLOCK_LEN(mp) XFS_BTREE_SBLOCK_LEN 81#define XFS_INOBT_BLOCK_LEN(mp) \
82 (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
83 XFS_BTREE_SBLOCK_CRC_LEN : XFS_BTREE_SBLOCK_LEN)
83 84
84/* 85/*
85 * Record, key, and pointer address macros for btree blocks. 86 * Record, key, and pointer address macros for btree blocks.
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 4f201656d2d9..558ef4947206 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -44,6 +44,7 @@
44#include "xfs_quota.h" 44#include "xfs_quota.h"
45#include "xfs_filestream.h" 45#include "xfs_filestream.h"
46#include "xfs_vnodeops.h" 46#include "xfs_vnodeops.h"
47#include "xfs_cksum.h"
47#include "xfs_trace.h" 48#include "xfs_trace.h"
48#include "xfs_icache.h" 49#include "xfs_icache.h"
49 50
@@ -786,6 +787,7 @@ xfs_iformat_btree(
786 xfs_dinode_t *dip, 787 xfs_dinode_t *dip,
787 int whichfork) 788 int whichfork)
788{ 789{
790 struct xfs_mount *mp = ip->i_mount;
789 xfs_bmdr_block_t *dfp; 791 xfs_bmdr_block_t *dfp;
790 xfs_ifork_t *ifp; 792 xfs_ifork_t *ifp;
791 /* REFERENCED */ 793 /* REFERENCED */
@@ -794,7 +796,7 @@ xfs_iformat_btree(
794 796
795 ifp = XFS_IFORK_PTR(ip, whichfork); 797 ifp = XFS_IFORK_PTR(ip, whichfork);
796 dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); 798 dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
797 size = XFS_BMAP_BROOT_SPACE(dfp); 799 size = XFS_BMAP_BROOT_SPACE(mp, dfp);
798 nrecs = be16_to_cpu(dfp->bb_numrecs); 800 nrecs = be16_to_cpu(dfp->bb_numrecs);
799 801
800 /* 802 /*
@@ -805,14 +807,14 @@ xfs_iformat_btree(
805 * blocks. 807 * blocks.
806 */ 808 */
807 if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= 809 if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
808 XFS_IFORK_MAXEXT(ip, whichfork) || 810 XFS_IFORK_MAXEXT(ip, whichfork) ||
809 XFS_BMDR_SPACE_CALC(nrecs) > 811 XFS_BMDR_SPACE_CALC(nrecs) >
810 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) || 812 XFS_DFORK_SIZE(dip, mp, whichfork) ||
811 XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { 813 XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
812 xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).", 814 xfs_warn(mp, "corrupt inode %Lu (btree).",
813 (unsigned long long) ip->i_ino); 815 (unsigned long long) ip->i_ino);
814 XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, 816 XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
815 ip->i_mount, dip); 817 mp, dip);
816 return XFS_ERROR(EFSCORRUPTED); 818 return XFS_ERROR(EFSCORRUPTED);
817 } 819 }
818 820
@@ -823,8 +825,7 @@ xfs_iformat_btree(
823 * Copy and convert from the on-disk structure 825 * Copy and convert from the on-disk structure
824 * to the in-memory structure. 826 * to the in-memory structure.
825 */ 827 */
826 xfs_bmdr_to_bmbt(ip->i_mount, dfp, 828 xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
827 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
828 ifp->if_broot, size); 829 ifp->if_broot, size);
829 ifp->if_flags &= ~XFS_IFEXTENTS; 830 ifp->if_flags &= ~XFS_IFEXTENTS;
830 ifp->if_flags |= XFS_IFBROOT; 831 ifp->if_flags |= XFS_IFBROOT;
@@ -866,6 +867,17 @@ xfs_dinode_from_disk(
866 to->di_dmstate = be16_to_cpu(from->di_dmstate); 867 to->di_dmstate = be16_to_cpu(from->di_dmstate);
867 to->di_flags = be16_to_cpu(from->di_flags); 868 to->di_flags = be16_to_cpu(from->di_flags);
868 to->di_gen = be32_to_cpu(from->di_gen); 869 to->di_gen = be32_to_cpu(from->di_gen);
870
871 if (to->di_version == 3) {
872 to->di_changecount = be64_to_cpu(from->di_changecount);
873 to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
874 to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
875 to->di_flags2 = be64_to_cpu(from->di_flags2);
876 to->di_ino = be64_to_cpu(from->di_ino);
877 to->di_lsn = be64_to_cpu(from->di_lsn);
878 memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
879 uuid_copy(&to->di_uuid, &from->di_uuid);
880 }
869} 881}
870 882
871void 883void
@@ -902,6 +914,17 @@ xfs_dinode_to_disk(
902 to->di_dmstate = cpu_to_be16(from->di_dmstate); 914 to->di_dmstate = cpu_to_be16(from->di_dmstate);
903 to->di_flags = cpu_to_be16(from->di_flags); 915 to->di_flags = cpu_to_be16(from->di_flags);
904 to->di_gen = cpu_to_be32(from->di_gen); 916 to->di_gen = cpu_to_be32(from->di_gen);
917
918 if (from->di_version == 3) {
919 to->di_changecount = cpu_to_be64(from->di_changecount);
920 to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
921 to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
922 to->di_flags2 = cpu_to_be64(from->di_flags2);
923 to->di_ino = cpu_to_be64(from->di_ino);
924 to->di_lsn = cpu_to_be64(from->di_lsn);
925 memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
926 uuid_copy(&to->di_uuid, &from->di_uuid);
927 }
905} 928}
906 929
907STATIC uint 930STATIC uint
@@ -962,6 +985,47 @@ xfs_dic2xflags(
962 (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0); 985 (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
963} 986}
964 987
988static bool
989xfs_dinode_verify(
990 struct xfs_mount *mp,
991 struct xfs_inode *ip,
992 struct xfs_dinode *dip)
993{
994 if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
995 return false;
996
997 /* only version 3 or greater inodes are extensively verified here */
998 if (dip->di_version < 3)
999 return true;
1000
1001 if (!xfs_sb_version_hascrc(&mp->m_sb))
1002 return false;
1003 if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
1004 offsetof(struct xfs_dinode, di_crc)))
1005 return false;
1006 if (be64_to_cpu(dip->di_ino) != ip->i_ino)
1007 return false;
1008 if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_uuid))
1009 return false;
1010 return true;
1011}
1012
1013void
1014xfs_dinode_calc_crc(
1015 struct xfs_mount *mp,
1016 struct xfs_dinode *dip)
1017{
1018 __uint32_t crc;
1019
1020 if (dip->di_version < 3)
1021 return;
1022
1023 ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
1024 crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
1025 offsetof(struct xfs_dinode, di_crc));
1026 dip->di_crc = xfs_end_cksum(crc);
1027}
1028
965/* 1029/*
966 * Read the disk inode attributes into the in-core inode structure. 1030 * Read the disk inode attributes into the in-core inode structure.
967 */ 1031 */
@@ -990,17 +1054,13 @@ xfs_iread(
990 if (error) 1054 if (error)
991 return error; 1055 return error;
992 1056
993 /* 1057 /* even unallocated inodes are verified */
994 * If we got something that isn't an inode it means someone 1058 if (!xfs_dinode_verify(mp, ip, dip)) {
995 * (nfs or dmi) has a stale handle. 1059 xfs_alert(mp, "%s: validation failed for inode %lld failed",
996 */ 1060 __func__, ip->i_ino);
997 if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) { 1061
998#ifdef DEBUG 1062 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip);
999 xfs_alert(mp, 1063 error = XFS_ERROR(EFSCORRUPTED);
1000 "%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)",
1001 __func__, be16_to_cpu(dip->di_magic), XFS_DINODE_MAGIC);
1002#endif /* DEBUG */
1003 error = XFS_ERROR(EINVAL);
1004 goto out_brelse; 1064 goto out_brelse;
1005 } 1065 }
1006 1066
@@ -1022,10 +1082,20 @@ xfs_iread(
1022 goto out_brelse; 1082 goto out_brelse;
1023 } 1083 }
1024 } else { 1084 } else {
1085 /*
1086 * Partial initialisation of the in-core inode. Just the bits
1087 * that xfs_ialloc won't overwrite or relies on being correct.
1088 */
1025 ip->i_d.di_magic = be16_to_cpu(dip->di_magic); 1089 ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
1026 ip->i_d.di_version = dip->di_version; 1090 ip->i_d.di_version = dip->di_version;
1027 ip->i_d.di_gen = be32_to_cpu(dip->di_gen); 1091 ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
1028 ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); 1092 ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
1093
1094 if (dip->di_version == 3) {
1095 ip->i_d.di_ino = be64_to_cpu(dip->di_ino);
1096 uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid);
1097 }
1098
1029 /* 1099 /*
1030 * Make sure to pull in the mode here as well in 1100 * Make sure to pull in the mode here as well in
1031 * case the inode is released without being used. 1101 * case the inode is released without being used.
@@ -1161,6 +1231,7 @@ xfs_ialloc(
1161 xfs_buf_t **ialloc_context, 1231 xfs_buf_t **ialloc_context,
1162 xfs_inode_t **ipp) 1232 xfs_inode_t **ipp)
1163{ 1233{
1234 struct xfs_mount *mp = tp->t_mountp;
1164 xfs_ino_t ino; 1235 xfs_ino_t ino;
1165 xfs_inode_t *ip; 1236 xfs_inode_t *ip;
1166 uint flags; 1237 uint flags;
@@ -1187,7 +1258,7 @@ xfs_ialloc(
1187 * This is because we're setting fields here we need 1258 * This is because we're setting fields here we need
1188 * to prevent others from looking at until we're done. 1259 * to prevent others from looking at until we're done.
1189 */ 1260 */
1190 error = xfs_iget(tp->t_mountp, tp, ino, XFS_IGET_CREATE, 1261 error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE,
1191 XFS_ILOCK_EXCL, &ip); 1262 XFS_ILOCK_EXCL, &ip);
1192 if (error) 1263 if (error)
1193 return error; 1264 return error;
@@ -1208,7 +1279,7 @@ xfs_ialloc(
1208 * the inode version number now. This way we only do the conversion 1279 * the inode version number now. This way we only do the conversion
1209 * here rather than here and in the flush/logging code. 1280 * here rather than here and in the flush/logging code.
1210 */ 1281 */
1211 if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) && 1282 if (xfs_sb_version_hasnlink(&mp->m_sb) &&
1212 ip->i_d.di_version == 1) { 1283 ip->i_d.di_version == 1) {
1213 ip->i_d.di_version = 2; 1284 ip->i_d.di_version = 2;
1214 /* 1285 /*
@@ -1258,6 +1329,19 @@ xfs_ialloc(
1258 ip->i_d.di_dmevmask = 0; 1329 ip->i_d.di_dmevmask = 0;
1259 ip->i_d.di_dmstate = 0; 1330 ip->i_d.di_dmstate = 0;
1260 ip->i_d.di_flags = 0; 1331 ip->i_d.di_flags = 0;
1332
1333 if (ip->i_d.di_version == 3) {
1334 ASSERT(ip->i_d.di_ino == ino);
1335 ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid));
1336 ip->i_d.di_crc = 0;
1337 ip->i_d.di_changecount = 1;
1338 ip->i_d.di_lsn = 0;
1339 ip->i_d.di_flags2 = 0;
1340 memset(&(ip->i_d.di_pad2[0]), 0, sizeof(ip->i_d.di_pad2));
1341 ip->i_d.di_crtime = ip->i_d.di_mtime;
1342 }
1343
1344
1261 flags = XFS_ILOG_CORE; 1345 flags = XFS_ILOG_CORE;
1262 switch (mode & S_IFMT) { 1346 switch (mode & S_IFMT) {
1263 case S_IFIFO: 1347 case S_IFIFO:
@@ -2037,7 +2121,7 @@ xfs_iroot_realloc(
2037 * allocate it now and get out. 2121 * allocate it now and get out.
2038 */ 2122 */
2039 if (ifp->if_broot_bytes == 0) { 2123 if (ifp->if_broot_bytes == 0) {
2040 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff); 2124 new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff);
2041 ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS); 2125 ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
2042 ifp->if_broot_bytes = (int)new_size; 2126 ifp->if_broot_bytes = (int)new_size;
2043 return; 2127 return;
@@ -2051,9 +2135,9 @@ xfs_iroot_realloc(
2051 */ 2135 */
2052 cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); 2136 cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
2053 new_max = cur_max + rec_diff; 2137 new_max = cur_max + rec_diff;
2054 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); 2138 new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
2055 ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, 2139 ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
2056 (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */ 2140 XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max),
2057 KM_SLEEP | KM_NOFS); 2141 KM_SLEEP | KM_NOFS);
2058 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, 2142 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
2059 ifp->if_broot_bytes); 2143 ifp->if_broot_bytes);
@@ -2061,7 +2145,7 @@ xfs_iroot_realloc(
2061 (int)new_size); 2145 (int)new_size);
2062 ifp->if_broot_bytes = (int)new_size; 2146 ifp->if_broot_bytes = (int)new_size;
2063 ASSERT(ifp->if_broot_bytes <= 2147 ASSERT(ifp->if_broot_bytes <=
2064 XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); 2148 XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ(ip));
2065 memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t)); 2149 memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t));
2066 return; 2150 return;
2067 } 2151 }
@@ -2076,7 +2160,7 @@ xfs_iroot_realloc(
2076 new_max = cur_max + rec_diff; 2160 new_max = cur_max + rec_diff;
2077 ASSERT(new_max >= 0); 2161 ASSERT(new_max >= 0);
2078 if (new_max > 0) 2162 if (new_max > 0)
2079 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); 2163 new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
2080 else 2164 else
2081 new_size = 0; 2165 new_size = 0;
2082 if (new_size > 0) { 2166 if (new_size > 0) {
@@ -2084,7 +2168,8 @@ xfs_iroot_realloc(
2084 /* 2168 /*
2085 * First copy over the btree block header. 2169 * First copy over the btree block header.
2086 */ 2170 */
2087 memcpy(new_broot, ifp->if_broot, XFS_BTREE_LBLOCK_LEN); 2171 memcpy(new_broot, ifp->if_broot,
2172 XFS_BMBT_BLOCK_LEN(ip->i_mount));
2088 } else { 2173 } else {
2089 new_broot = NULL; 2174 new_broot = NULL;
2090 ifp->if_flags &= ~XFS_IFBROOT; 2175 ifp->if_flags &= ~XFS_IFBROOT;
@@ -2114,7 +2199,7 @@ xfs_iroot_realloc(
2114 ifp->if_broot = new_broot; 2199 ifp->if_broot = new_broot;
2115 ifp->if_broot_bytes = (int)new_size; 2200 ifp->if_broot_bytes = (int)new_size;
2116 ASSERT(ifp->if_broot_bytes <= 2201 ASSERT(ifp->if_broot_bytes <=
2117 XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); 2202 XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ(ip));
2118 return; 2203 return;
2119} 2204}
2120 2205
@@ -2427,7 +2512,7 @@ xfs_iflush_fork(
2427 ASSERT(ifp->if_broot != NULL); 2512 ASSERT(ifp->if_broot != NULL);
2428 ASSERT(ifp->if_broot_bytes <= 2513 ASSERT(ifp->if_broot_bytes <=
2429 (XFS_IFORK_SIZE(ip, whichfork) + 2514 (XFS_IFORK_SIZE(ip, whichfork) +
2430 XFS_BROOT_SIZE_ADJ)); 2515 XFS_BROOT_SIZE_ADJ(ip)));
2431 xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes, 2516 xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
2432 (xfs_bmdr_block_t *)cp, 2517 (xfs_bmdr_block_t *)cp,
2433 XFS_DFORK_SIZE(dip, mp, whichfork)); 2518 XFS_DFORK_SIZE(dip, mp, whichfork));
@@ -2715,20 +2800,18 @@ abort_out:
2715 2800
2716STATIC int 2801STATIC int
2717xfs_iflush_int( 2802xfs_iflush_int(
2718 xfs_inode_t *ip, 2803 struct xfs_inode *ip,
2719 xfs_buf_t *bp) 2804 struct xfs_buf *bp)
2720{ 2805{
2721 xfs_inode_log_item_t *iip; 2806 struct xfs_inode_log_item *iip = ip->i_itemp;
2722 xfs_dinode_t *dip; 2807 struct xfs_dinode *dip;
2723 xfs_mount_t *mp; 2808 struct xfs_mount *mp = ip->i_mount;
2724 2809
2725 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2810 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2726 ASSERT(xfs_isiflocked(ip)); 2811 ASSERT(xfs_isiflocked(ip));
2727 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 2812 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
2728 ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); 2813 ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
2729 2814 ASSERT(iip != NULL && iip->ili_fields != 0);
2730 iip = ip->i_itemp;
2731 mp = ip->i_mount;
2732 2815
2733 /* set *dip = inode's place in the buffer */ 2816 /* set *dip = inode's place in the buffer */
2734 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); 2817 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
@@ -2789,9 +2872,9 @@ xfs_iflush_int(
2789 } 2872 }
2790 /* 2873 /*
2791 * bump the flush iteration count, used to detect flushes which 2874 * bump the flush iteration count, used to detect flushes which
2792 * postdate a log record during recovery. 2875 * postdate a log record during recovery. This is redundant as we now
2876 * log every change and hence this can't happen. Still, it doesn't hurt.
2793 */ 2877 */
2794
2795 ip->i_d.di_flushiter++; 2878 ip->i_d.di_flushiter++;
2796 2879
2797 /* 2880 /*
@@ -2867,41 +2950,30 @@ xfs_iflush_int(
2867 * need the AIL lock, because it is a 64 bit value that cannot be read 2950 * need the AIL lock, because it is a 64 bit value that cannot be read
2868 * atomically. 2951 * atomically.
2869 */ 2952 */
2870 if (iip != NULL && iip->ili_fields != 0) { 2953 iip->ili_last_fields = iip->ili_fields;
2871 iip->ili_last_fields = iip->ili_fields; 2954 iip->ili_fields = 0;
2872 iip->ili_fields = 0; 2955 iip->ili_logged = 1;
2873 iip->ili_logged = 1;
2874 2956
2875 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 2957 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
2876 &iip->ili_item.li_lsn); 2958 &iip->ili_item.li_lsn);
2877 2959
2878 /* 2960 /*
2879 * Attach the function xfs_iflush_done to the inode's 2961 * Attach the function xfs_iflush_done to the inode's
2880 * buffer. This will remove the inode from the AIL 2962 * buffer. This will remove the inode from the AIL
2881 * and unlock the inode's flush lock when the inode is 2963 * and unlock the inode's flush lock when the inode is
2882 * completely written to disk. 2964 * completely written to disk.
2883 */ 2965 */
2884 xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item); 2966 xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
2885 2967
2886 ASSERT(bp->b_fspriv != NULL); 2968 /* update the lsn in the on disk inode if required */
2887 ASSERT(bp->b_iodone != NULL); 2969 if (ip->i_d.di_version == 3)
2888 } else { 2970 dip->di_lsn = cpu_to_be64(iip->ili_item.li_lsn);
2889 /* 2971
2890 * We're flushing an inode which is not in the AIL and has 2972 /* generate the checksum. */
2891 * not been logged. For this case we can immediately drop 2973 xfs_dinode_calc_crc(mp, dip);
2892 * the inode flush lock because we can avoid the whole
2893 * AIL state thing. It's OK to drop the flush lock now,
2894 * because we've already locked the buffer and to do anything
2895 * you really need both.
2896 */
2897 if (iip != NULL) {
2898 ASSERT(iip->ili_logged == 0);
2899 ASSERT(iip->ili_last_fields == 0);
2900 ASSERT((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0);
2901 }
2902 xfs_ifunlock(ip);
2903 }
2904 2974
2975 ASSERT(bp->b_fspriv != NULL);
2976 ASSERT(bp->b_iodone != NULL);
2905 return 0; 2977 return 0;
2906 2978
2907corrupt_out: 2979corrupt_out:
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 237e7f6f2ab3..91129794aaec 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -150,13 +150,38 @@ typedef struct xfs_icdinode {
150 __uint16_t di_dmstate; /* DMIG state info */ 150 __uint16_t di_dmstate; /* DMIG state info */
151 __uint16_t di_flags; /* random flags, XFS_DIFLAG_... */ 151 __uint16_t di_flags; /* random flags, XFS_DIFLAG_... */
152 __uint32_t di_gen; /* generation number */ 152 __uint32_t di_gen; /* generation number */
153
154 /* di_next_unlinked is the only non-core field in the old dinode */
155 xfs_agino_t di_next_unlinked;/* agi unlinked list ptr */
156
157 /* start of the extended dinode, writable fields */
158 __uint32_t di_crc; /* CRC of the inode */
159 __uint64_t di_changecount; /* number of attribute changes */
160 xfs_lsn_t di_lsn; /* flush sequence */
161 __uint64_t di_flags2; /* more random flags */
162 __uint8_t di_pad2[16]; /* more padding for future expansion */
163
164 /* fields only written to during inode creation */
165 xfs_ictimestamp_t di_crtime; /* time created */
166 xfs_ino_t di_ino; /* inode number */
167 uuid_t di_uuid; /* UUID of the filesystem */
168
169 /* structure must be padded to 64 bit alignment */
153} xfs_icdinode_t; 170} xfs_icdinode_t;
154 171
172static inline uint xfs_icdinode_size(int version)
173{
174 if (version == 3)
175 return sizeof(struct xfs_icdinode);
176 return offsetof(struct xfs_icdinode, di_next_unlinked);
177}
178
155/* 179/*
156 * Flags for xfs_ichgtime(). 180 * Flags for xfs_ichgtime().
157 */ 181 */
158#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ 182#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */
159#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */ 183#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */
184#define XFS_ICHGTIME_CREATE 0x4 /* inode create timestamp */
160 185
161/* 186/*
162 * Per-fork incore inode flags. 187 * Per-fork incore inode flags.
@@ -180,10 +205,11 @@ typedef struct xfs_icdinode {
180#define XFS_IFORK_DSIZE(ip) \ 205#define XFS_IFORK_DSIZE(ip) \
181 (XFS_IFORK_Q(ip) ? \ 206 (XFS_IFORK_Q(ip) ? \
182 XFS_IFORK_BOFF(ip) : \ 207 XFS_IFORK_BOFF(ip) : \
183 XFS_LITINO((ip)->i_mount)) 208 XFS_LITINO((ip)->i_mount, (ip)->i_d.di_version))
184#define XFS_IFORK_ASIZE(ip) \ 209#define XFS_IFORK_ASIZE(ip) \
185 (XFS_IFORK_Q(ip) ? \ 210 (XFS_IFORK_Q(ip) ? \
186 XFS_LITINO((ip)->i_mount) - XFS_IFORK_BOFF(ip) : \ 211 XFS_LITINO((ip)->i_mount, (ip)->i_d.di_version) - \
212 XFS_IFORK_BOFF(ip) : \
187 0) 213 0)
188#define XFS_IFORK_SIZE(ip,w) \ 214#define XFS_IFORK_SIZE(ip,w) \
189 ((w) == XFS_DATA_FORK ? \ 215 ((w) == XFS_DATA_FORK ? \
@@ -555,6 +581,7 @@ int xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
555 struct xfs_buf **, uint, uint); 581 struct xfs_buf **, uint, uint);
556int xfs_iread(struct xfs_mount *, struct xfs_trans *, 582int xfs_iread(struct xfs_mount *, struct xfs_trans *,
557 struct xfs_inode *, uint); 583 struct xfs_inode *, uint);
584void xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *);
558void xfs_dinode_to_disk(struct xfs_dinode *, 585void xfs_dinode_to_disk(struct xfs_dinode *,
559 struct xfs_icdinode *); 586 struct xfs_icdinode *);
560void xfs_idestroy_fork(struct xfs_inode *, int); 587void xfs_idestroy_fork(struct xfs_inode *, int);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index f034bd1652f0..f76ff52e43c0 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -179,7 +179,7 @@ xfs_inode_item_format(
179 nvecs = 1; 179 nvecs = 1;
180 180
181 vecp->i_addr = &ip->i_d; 181 vecp->i_addr = &ip->i_d;
182 vecp->i_len = sizeof(struct xfs_icdinode); 182 vecp->i_len = xfs_icdinode_size(ip->i_d.di_version);
183 vecp->i_type = XLOG_REG_TYPE_ICORE; 183 vecp->i_type = XLOG_REG_TYPE_ICORE;
184 vecp++; 184 vecp++;
185 nvecs++; 185 nvecs++;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 5a30dd899d2b..8f8aaee7f379 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -42,6 +42,8 @@
42#include "xfs_iomap.h" 42#include "xfs_iomap.h"
43#include "xfs_trace.h" 43#include "xfs_trace.h"
44#include "xfs_icache.h" 44#include "xfs_icache.h"
45#include "xfs_dquot_item.h"
46#include "xfs_dquot.h"
45 47
46 48
47#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ 49#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \
@@ -362,10 +364,65 @@ xfs_iomap_eof_prealloc_initial_size(
362 if (imap[0].br_startblock == HOLESTARTBLOCK) 364 if (imap[0].br_startblock == HOLESTARTBLOCK)
363 return 0; 365 return 0;
364 if (imap[0].br_blockcount <= (MAXEXTLEN >> 1)) 366 if (imap[0].br_blockcount <= (MAXEXTLEN >> 1))
365 return imap[0].br_blockcount; 367 return imap[0].br_blockcount << 1;
366 return XFS_B_TO_FSB(mp, offset); 368 return XFS_B_TO_FSB(mp, offset);
367} 369}
368 370
371STATIC bool
372xfs_quota_need_throttle(
373 struct xfs_inode *ip,
374 int type,
375 xfs_fsblock_t alloc_blocks)
376{
377 struct xfs_dquot *dq = xfs_inode_dquot(ip, type);
378
379 if (!dq || !xfs_this_quota_on(ip->i_mount, type))
380 return false;
381
382 /* no hi watermark, no throttle */
383 if (!dq->q_prealloc_hi_wmark)
384 return false;
385
386 /* under the lo watermark, no throttle */
387 if (dq->q_res_bcount + alloc_blocks < dq->q_prealloc_lo_wmark)
388 return false;
389
390 return true;
391}
392
393STATIC void
394xfs_quota_calc_throttle(
395 struct xfs_inode *ip,
396 int type,
397 xfs_fsblock_t *qblocks,
398 int *qshift)
399{
400 int64_t freesp;
401 int shift = 0;
402 struct xfs_dquot *dq = xfs_inode_dquot(ip, type);
403
404 /* over hi wmark, squash the prealloc completely */
405 if (dq->q_res_bcount >= dq->q_prealloc_hi_wmark) {
406 *qblocks = 0;
407 return;
408 }
409
410 freesp = dq->q_prealloc_hi_wmark - dq->q_res_bcount;
411 if (freesp < dq->q_low_space[XFS_QLOWSP_5_PCNT]) {
412 shift = 2;
413 if (freesp < dq->q_low_space[XFS_QLOWSP_3_PCNT])
414 shift += 2;
415 if (freesp < dq->q_low_space[XFS_QLOWSP_1_PCNT])
416 shift += 2;
417 }
418
419 /* only overwrite the throttle values if we are more aggressive */
420 if ((freesp >> shift) < (*qblocks >> *qshift)) {
421 *qblocks = freesp;
422 *qshift = shift;
423 }
424}
425
369/* 426/*
370 * If we don't have a user specified preallocation size, dynamically increase 427 * If we don't have a user specified preallocation size, dynamically increase
371 * the preallocation size as the size of the file grows. Cap the maximum size 428 * the preallocation size as the size of the file grows. Cap the maximum size
@@ -381,45 +438,89 @@ xfs_iomap_prealloc_size(
381 int nimaps) 438 int nimaps)
382{ 439{
383 xfs_fsblock_t alloc_blocks = 0; 440 xfs_fsblock_t alloc_blocks = 0;
441 int shift = 0;
442 int64_t freesp;
443 xfs_fsblock_t qblocks;
444 int qshift = 0;
384 445
385 alloc_blocks = xfs_iomap_eof_prealloc_initial_size(mp, ip, offset, 446 alloc_blocks = xfs_iomap_eof_prealloc_initial_size(mp, ip, offset,
386 imap, nimaps); 447 imap, nimaps);
387 if (alloc_blocks > 0) { 448 if (!alloc_blocks)
388 int shift = 0; 449 goto check_writeio;
389 int64_t freesp; 450 qblocks = alloc_blocks;
390
391 alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
392 rounddown_pow_of_two(alloc_blocks));
393
394 xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
395 freesp = mp->m_sb.sb_fdblocks;
396 if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) {
397 shift = 2;
398 if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT])
399 shift++;
400 if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT])
401 shift++;
402 if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT])
403 shift++;
404 if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT])
405 shift++;
406 }
407 if (shift)
408 alloc_blocks >>= shift;
409 451
410 /* 452 /*
411 * If we are still trying to allocate more space than is 453 * MAXEXTLEN is not a power of two value but we round the prealloc down
412 * available, squash the prealloc hard. This can happen if we 454 * to the nearest power of two value after throttling. To prevent the
413 * have a large file on a small filesystem and the above 455 * round down from unconditionally reducing the maximum supported prealloc
414 * lowspace thresholds are smaller than MAXEXTLEN. 456 * size, we round up first, apply appropriate throttling, round down and
415 */ 457 * cap the value to MAXEXTLEN.
416 while (alloc_blocks && alloc_blocks >= freesp) 458 */
417 alloc_blocks >>= 4; 459 alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN),
460 alloc_blocks);
461
462 xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
463 freesp = mp->m_sb.sb_fdblocks;
464 if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) {
465 shift = 2;
466 if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT])
467 shift++;
468 if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT])
469 shift++;
470 if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT])
471 shift++;
472 if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT])
473 shift++;
418 } 474 }
419 475
476 /*
477 * Check each quota to cap the prealloc size and provide a shift
478 * value to throttle with.
479 */
480 if (xfs_quota_need_throttle(ip, XFS_DQ_USER, alloc_blocks))
481 xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift);
482 if (xfs_quota_need_throttle(ip, XFS_DQ_GROUP, alloc_blocks))
483 xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift);
484 if (xfs_quota_need_throttle(ip, XFS_DQ_PROJ, alloc_blocks))
485 xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift);
486
487 /*
488 * The final prealloc size is set to the minimum of free space available
489 * in each of the quotas and the overall filesystem.
490 *
491 * The shift throttle value is set to the maximum value as determined by
492 * the global low free space values and per-quota low free space values.
493 */
494 alloc_blocks = MIN(alloc_blocks, qblocks);
495 shift = MAX(shift, qshift);
496
497 if (shift)
498 alloc_blocks >>= shift;
499 /*
500 * rounddown_pow_of_two() returns an undefined result if we pass in
501 * alloc_blocks = 0.
502 */
503 if (alloc_blocks)
504 alloc_blocks = rounddown_pow_of_two(alloc_blocks);
505 if (alloc_blocks > MAXEXTLEN)
506 alloc_blocks = MAXEXTLEN;
507
508 /*
509 * If we are still trying to allocate more space than is
510 * available, squash the prealloc hard. This can happen if we
511 * have a large file on a small filesystem and the above
512 * lowspace thresholds are smaller than MAXEXTLEN.
513 */
514 while (alloc_blocks && alloc_blocks >= freesp)
515 alloc_blocks >>= 4;
516
517check_writeio:
420 if (alloc_blocks < mp->m_writeio_blocks) 518 if (alloc_blocks < mp->m_writeio_blocks)
421 alloc_blocks = mp->m_writeio_blocks; 519 alloc_blocks = mp->m_writeio_blocks;
422 520
521 trace_xfs_iomap_prealloc_size(ip, alloc_blocks, shift,
522 mp->m_writeio_blocks);
523
423 return alloc_blocks; 524 return alloc_blocks;
424} 525}
425 526
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index fe7e4df85a7b..14e59d953b7b 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -72,6 +72,7 @@
72#include <linux/kthread.h> 72#include <linux/kthread.h>
73#include <linux/freezer.h> 73#include <linux/freezer.h>
74#include <linux/list_sort.h> 74#include <linux/list_sort.h>
75#include <linux/ratelimit.h>
75 76
76#include <asm/page.h> 77#include <asm/page.h>
77#include <asm/div64.h> 78#include <asm/div64.h>
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index eec226f78a40..b345a7c85153 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -3485,7 +3485,7 @@ xlog_ticket_alloc(
3485 tic->t_curr_res = unit_bytes; 3485 tic->t_curr_res = unit_bytes;
3486 tic->t_cnt = cnt; 3486 tic->t_cnt = cnt;
3487 tic->t_ocnt = cnt; 3487 tic->t_ocnt = cnt;
3488 tic->t_tid = random32(); 3488 tic->t_tid = prandom_u32();
3489 tic->t_clientid = client; 3489 tic->t_clientid = client;
3490 tic->t_flags = XLOG_TIC_INITED; 3490 tic->t_flags = XLOG_TIC_INITED;
3491 tic->t_trans_type = 0; 3491 tic->t_trans_type = 0;
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index ddc4529d07d3..e3d0b85d852b 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -668,10 +668,6 @@ xlog_cil_push_foreground(
668 * transaction to the checkpoint context so we carry the busy extents through 668 * transaction to the checkpoint context so we carry the busy extents through
669 * to checkpoint completion, and then unlock all the items in the transaction. 669 * to checkpoint completion, and then unlock all the items in the transaction.
670 * 670 *
671 * For more specific information about the order of operations in
672 * xfs_log_commit_cil() please refer to the comments in
673 * xfs_trans_commit_iclog().
674 *
675 * Called with the context lock already held in read mode to lock out 671 * Called with the context lock already held in read mode to lock out
676 * background commit, returns without it held once background commits are 672 * background commit, returns without it held once background commits are
677 * allowed again. 673 * allowed again.
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 16d8d12ea3b4..b9ea262dd1c2 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -468,7 +468,6 @@ struct xfs_cil {
468 * threshold, yet give us plenty of space for aggregation on large logs. 468 * threshold, yet give us plenty of space for aggregation on large logs.
469 */ 469 */
470#define XLOG_CIL_SPACE_LIMIT(log) (log->l_logsize >> 3) 470#define XLOG_CIL_SPACE_LIMIT(log) (log->l_logsize >> 3)
471#define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4))
472 471
473/* 472/*
474 * ticket grant locks, queues and accounting have their own cachlines 473 * ticket grant locks, queues and accounting have their own cachlines
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index d1dba7ce75ae..93f03ec17eec 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -29,6 +29,7 @@
29#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
30#include "xfs_alloc_btree.h" 30#include "xfs_alloc_btree.h"
31#include "xfs_ialloc_btree.h" 31#include "xfs_ialloc_btree.h"
32#include "xfs_btree.h"
32#include "xfs_dinode.h" 33#include "xfs_dinode.h"
33#include "xfs_inode.h" 34#include "xfs_inode.h"
34#include "xfs_inode_item.h" 35#include "xfs_inode_item.h"
@@ -45,6 +46,14 @@
45#include "xfs_trace.h" 46#include "xfs_trace.h"
46#include "xfs_icache.h" 47#include "xfs_icache.h"
47 48
49/* Need all the magic numbers and buffer ops structures from these headers */
50#include "xfs_symlink.h"
51#include "xfs_da_btree.h"
52#include "xfs_dir2_format.h"
53#include "xfs_dir2_priv.h"
54#include "xfs_attr_leaf.h"
55#include "xfs_attr_remote.h"
56
48STATIC int 57STATIC int
49xlog_find_zeroed( 58xlog_find_zeroed(
50 struct xlog *, 59 struct xlog *,
@@ -1785,6 +1794,7 @@ xlog_recover_do_inode_buffer(
1785 xfs_agino_t *buffer_nextp; 1794 xfs_agino_t *buffer_nextp;
1786 1795
1787 trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); 1796 trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
1797 bp->b_ops = &xfs_inode_buf_ops;
1788 1798
1789 inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog; 1799 inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog;
1790 for (i = 0; i < inodes_per_buf; i++) { 1800 for (i = 0; i < inodes_per_buf; i++) {
@@ -1857,6 +1867,201 @@ xlog_recover_do_inode_buffer(
1857} 1867}
1858 1868
1859/* 1869/*
1870 * Validate the recovered buffer is of the correct type and attach the
1871 * appropriate buffer operations to them for writeback. Magic numbers are in a
1872 * few places:
1873 * the first 16 bits of the buffer (inode buffer, dquot buffer),
1874 * the first 32 bits of the buffer (most blocks),
1875 * inside a struct xfs_da_blkinfo at the start of the buffer.
1876 */
1877static void
1878xlog_recovery_validate_buf_type(
1879 struct xfs_mount *mp,
1880 struct xfs_buf *bp,
1881 xfs_buf_log_format_t *buf_f)
1882{
1883 struct xfs_da_blkinfo *info = bp->b_addr;
1884 __uint32_t magic32;
1885 __uint16_t magic16;
1886 __uint16_t magicda;
1887
1888 magic32 = be32_to_cpu(*(__be32 *)bp->b_addr);
1889 magic16 = be16_to_cpu(*(__be16*)bp->b_addr);
1890 magicda = be16_to_cpu(info->magic);
1891 switch (xfs_blft_from_flags(buf_f)) {
1892 case XFS_BLFT_BTREE_BUF:
1893 switch (magic32) {
1894 case XFS_ABTB_CRC_MAGIC:
1895 case XFS_ABTC_CRC_MAGIC:
1896 case XFS_ABTB_MAGIC:
1897 case XFS_ABTC_MAGIC:
1898 bp->b_ops = &xfs_allocbt_buf_ops;
1899 break;
1900 case XFS_IBT_CRC_MAGIC:
1901 case XFS_IBT_MAGIC:
1902 bp->b_ops = &xfs_inobt_buf_ops;
1903 break;
1904 case XFS_BMAP_CRC_MAGIC:
1905 case XFS_BMAP_MAGIC:
1906 bp->b_ops = &xfs_bmbt_buf_ops;
1907 break;
1908 default:
1909 xfs_warn(mp, "Bad btree block magic!");
1910 ASSERT(0);
1911 break;
1912 }
1913 break;
1914 case XFS_BLFT_AGF_BUF:
1915 if (magic32 != XFS_AGF_MAGIC) {
1916 xfs_warn(mp, "Bad AGF block magic!");
1917 ASSERT(0);
1918 break;
1919 }
1920 bp->b_ops = &xfs_agf_buf_ops;
1921 break;
1922 case XFS_BLFT_AGFL_BUF:
1923 if (!xfs_sb_version_hascrc(&mp->m_sb))
1924 break;
1925 if (magic32 != XFS_AGFL_MAGIC) {
1926 xfs_warn(mp, "Bad AGFL block magic!");
1927 ASSERT(0);
1928 break;
1929 }
1930 bp->b_ops = &xfs_agfl_buf_ops;
1931 break;
1932 case XFS_BLFT_AGI_BUF:
1933 if (magic32 != XFS_AGI_MAGIC) {
1934 xfs_warn(mp, "Bad AGI block magic!");
1935 ASSERT(0);
1936 break;
1937 }
1938 bp->b_ops = &xfs_agi_buf_ops;
1939 break;
1940 case XFS_BLFT_UDQUOT_BUF:
1941 case XFS_BLFT_PDQUOT_BUF:
1942 case XFS_BLFT_GDQUOT_BUF:
1943#ifdef CONFIG_XFS_QUOTA
1944 if (magic16 != XFS_DQUOT_MAGIC) {
1945 xfs_warn(mp, "Bad DQUOT block magic!");
1946 ASSERT(0);
1947 break;
1948 }
1949 bp->b_ops = &xfs_dquot_buf_ops;
1950#else
1951 xfs_alert(mp,
1952 "Trying to recover dquots without QUOTA support built in!");
1953 ASSERT(0);
1954#endif
1955 break;
1956 case XFS_BLFT_DINO_BUF:
1957 /*
1958 * we get here with inode allocation buffers, not buffers that
1959 * track unlinked list changes.
1960 */
1961 if (magic16 != XFS_DINODE_MAGIC) {
1962 xfs_warn(mp, "Bad INODE block magic!");
1963 ASSERT(0);
1964 break;
1965 }
1966 bp->b_ops = &xfs_inode_buf_ops;
1967 break;
1968 case XFS_BLFT_SYMLINK_BUF:
1969 if (magic32 != XFS_SYMLINK_MAGIC) {
1970 xfs_warn(mp, "Bad symlink block magic!");
1971 ASSERT(0);
1972 break;
1973 }
1974 bp->b_ops = &xfs_symlink_buf_ops;
1975 break;
1976 case XFS_BLFT_DIR_BLOCK_BUF:
1977 if (magic32 != XFS_DIR2_BLOCK_MAGIC &&
1978 magic32 != XFS_DIR3_BLOCK_MAGIC) {
1979 xfs_warn(mp, "Bad dir block magic!");
1980 ASSERT(0);
1981 break;
1982 }
1983 bp->b_ops = &xfs_dir3_block_buf_ops;
1984 break;
1985 case XFS_BLFT_DIR_DATA_BUF:
1986 if (magic32 != XFS_DIR2_DATA_MAGIC &&
1987 magic32 != XFS_DIR3_DATA_MAGIC) {
1988 xfs_warn(mp, "Bad dir data magic!");
1989 ASSERT(0);
1990 break;
1991 }
1992 bp->b_ops = &xfs_dir3_data_buf_ops;
1993 break;
1994 case XFS_BLFT_DIR_FREE_BUF:
1995 if (magic32 != XFS_DIR2_FREE_MAGIC &&
1996 magic32 != XFS_DIR3_FREE_MAGIC) {
1997 xfs_warn(mp, "Bad dir3 free magic!");
1998 ASSERT(0);
1999 break;
2000 }
2001 bp->b_ops = &xfs_dir3_free_buf_ops;
2002 break;
2003 case XFS_BLFT_DIR_LEAF1_BUF:
2004 if (magicda != XFS_DIR2_LEAF1_MAGIC &&
2005 magicda != XFS_DIR3_LEAF1_MAGIC) {
2006 xfs_warn(mp, "Bad dir leaf1 magic!");
2007 ASSERT(0);
2008 break;
2009 }
2010 bp->b_ops = &xfs_dir3_leaf1_buf_ops;
2011 break;
2012 case XFS_BLFT_DIR_LEAFN_BUF:
2013 if (magicda != XFS_DIR2_LEAFN_MAGIC &&
2014 magicda != XFS_DIR3_LEAFN_MAGIC) {
2015 xfs_warn(mp, "Bad dir leafn magic!");
2016 ASSERT(0);
2017 break;
2018 }
2019 bp->b_ops = &xfs_dir3_leafn_buf_ops;
2020 break;
2021 case XFS_BLFT_DA_NODE_BUF:
2022 if (magicda != XFS_DA_NODE_MAGIC &&
2023 magicda != XFS_DA3_NODE_MAGIC) {
2024 xfs_warn(mp, "Bad da node magic!");
2025 ASSERT(0);
2026 break;
2027 }
2028 bp->b_ops = &xfs_da3_node_buf_ops;
2029 break;
2030 case XFS_BLFT_ATTR_LEAF_BUF:
2031 if (magicda != XFS_ATTR_LEAF_MAGIC &&
2032 magicda != XFS_ATTR3_LEAF_MAGIC) {
2033 xfs_warn(mp, "Bad attr leaf magic!");
2034 ASSERT(0);
2035 break;
2036 }
2037 bp->b_ops = &xfs_attr3_leaf_buf_ops;
2038 break;
2039 case XFS_BLFT_ATTR_RMT_BUF:
2040 if (!xfs_sb_version_hascrc(&mp->m_sb))
2041 break;
2042 if (magic32 != XFS_ATTR3_RMT_MAGIC) {
2043 xfs_warn(mp, "Bad attr remote magic!");
2044 ASSERT(0);
2045 break;
2046 }
2047 bp->b_ops = &xfs_attr3_rmt_buf_ops;
2048 break;
2049 case XFS_BLFT_SB_BUF:
2050 if (magic32 != XFS_SB_MAGIC) {
2051 xfs_warn(mp, "Bad SB block magic!");
2052 ASSERT(0);
2053 break;
2054 }
2055 bp->b_ops = &xfs_sb_buf_ops;
2056 break;
2057 default:
2058 xfs_warn(mp, "Unknown buffer type %d!",
2059 xfs_blft_from_flags(buf_f));
2060 break;
2061 }
2062}
2063
2064/*
1860 * Perform a 'normal' buffer recovery. Each logged region of the 2065 * Perform a 'normal' buffer recovery. Each logged region of the
1861 * buffer should be copied over the corresponding region in the 2066 * buffer should be copied over the corresponding region in the
1862 * given buffer. The bitmap in the buf log format structure indicates 2067 * given buffer. The bitmap in the buf log format structure indicates
@@ -1928,6 +2133,8 @@ xlog_recover_do_reg_buffer(
1928 2133
1929 /* Shouldn't be any more regions */ 2134 /* Shouldn't be any more regions */
1930 ASSERT(i == item->ri_total); 2135 ASSERT(i == item->ri_total);
2136
2137 xlog_recovery_validate_buf_type(mp, bp, buf_f);
1931} 2138}
1932 2139
1933/* 2140/*
@@ -2213,6 +2420,7 @@ xlog_recover_inode_pass2(
2213 int attr_index; 2420 int attr_index;
2214 uint fields; 2421 uint fields;
2215 xfs_icdinode_t *dicp; 2422 xfs_icdinode_t *dicp;
2423 uint isize;
2216 int need_free = 0; 2424 int need_free = 0;
2217 2425
2218 if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { 2426 if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
@@ -2238,7 +2446,7 @@ xlog_recover_inode_pass2(
2238 trace_xfs_log_recover_inode_recover(log, in_f); 2446 trace_xfs_log_recover_inode_recover(log, in_f);
2239 2447
2240 bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0, 2448 bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0,
2241 NULL); 2449 &xfs_inode_buf_ops);
2242 if (!bp) { 2450 if (!bp) {
2243 error = ENOMEM; 2451 error = ENOMEM;
2244 goto error; 2452 goto error;
@@ -2349,7 +2557,8 @@ xlog_recover_inode_pass2(
2349 error = EFSCORRUPTED; 2557 error = EFSCORRUPTED;
2350 goto error; 2558 goto error;
2351 } 2559 }
2352 if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) { 2560 isize = xfs_icdinode_size(dicp->di_version);
2561 if (unlikely(item->ri_buf[1].i_len > isize)) {
2353 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", 2562 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
2354 XFS_ERRLEVEL_LOW, mp, dicp); 2563 XFS_ERRLEVEL_LOW, mp, dicp);
2355 xfs_buf_relse(bp); 2564 xfs_buf_relse(bp);
@@ -2361,13 +2570,13 @@ xlog_recover_inode_pass2(
2361 } 2570 }
2362 2571
2363 /* The core is in in-core format */ 2572 /* The core is in in-core format */
2364 xfs_dinode_to_disk(dip, item->ri_buf[1].i_addr); 2573 xfs_dinode_to_disk(dip, dicp);
2365 2574
2366 /* the rest is in on-disk format */ 2575 /* the rest is in on-disk format */
2367 if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) { 2576 if (item->ri_buf[1].i_len > isize) {
2368 memcpy((xfs_caddr_t) dip + sizeof(struct xfs_icdinode), 2577 memcpy((char *)dip + isize,
2369 item->ri_buf[1].i_addr + sizeof(struct xfs_icdinode), 2578 item->ri_buf[1].i_addr + isize,
2370 item->ri_buf[1].i_len - sizeof(struct xfs_icdinode)); 2579 item->ri_buf[1].i_len - isize);
2371 } 2580 }
2372 2581
2373 fields = in_f->ilf_fields; 2582 fields = in_f->ilf_fields;
@@ -2451,6 +2660,9 @@ xlog_recover_inode_pass2(
2451 } 2660 }
2452 2661
2453write_inode_buffer: 2662write_inode_buffer:
2663 /* re-generate the checksum. */
2664 xfs_dinode_calc_crc(log->l_mp, dip);
2665
2454 ASSERT(bp->b_target->bt_mount == mp); 2666 ASSERT(bp->b_target->bt_mount == mp);
2455 bp->b_iodone = xlog_recover_iodone; 2667 bp->b_iodone = xlog_recover_iodone;
2456 xfs_buf_delwri_queue(bp, buffer_list); 2668 xfs_buf_delwri_queue(bp, buffer_list);
@@ -2948,6 +3160,7 @@ xlog_recover_process_efi(
2948 * This will pull the EFI from the AIL and 3160 * This will pull the EFI from the AIL and
2949 * free the memory associated with it. 3161 * free the memory associated with it.
2950 */ 3162 */
3163 set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
2951 xfs_efi_release(efip, efip->efi_format.efi_nextents); 3164 xfs_efi_release(efip, efip->efi_format.efi_nextents);
2952 return XFS_ERROR(EIO); 3165 return XFS_ERROR(EIO);
2953 } 3166 }
@@ -3751,6 +3964,25 @@ xlog_recover(
3751 return error; 3964 return error;
3752 } 3965 }
3753 3966
3967 /*
3968 * Version 5 superblock log feature mask validation. We know the
3969 * log is dirty so check if there are any unknown log features
3970 * in what we need to recover. If there are unknown features
3971 * (e.g. unsupported transactions, then simply reject the
3972 * attempt at recovery before touching anything.
3973 */
3974 if (XFS_SB_VERSION_NUM(&log->l_mp->m_sb) == XFS_SB_VERSION_5 &&
3975 xfs_sb_has_incompat_log_feature(&log->l_mp->m_sb,
3976 XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)) {
3977 xfs_warn(log->l_mp,
3978"Superblock has unknown incompatible log features (0x%x) enabled.\n"
3979"The log can not be fully and/or safely recovered by this kernel.\n"
3980"Please recover the log on a kernel that supports the unknown features.",
3981 (log->l_mp->m_sb.sb_features_log_incompat &
3982 XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN));
3983 return EINVAL;
3984 }
3985
3754 xfs_notice(log->l_mp, "Starting recovery (logdev: %s)", 3986 xfs_notice(log->l_mp, "Starting recovery (logdev: %s)",
3755 log->l_mp->m_logname ? log->l_mp->m_logname 3987 log->l_mp->m_logname ? log->l_mp->m_logname
3756 : "internal"); 3988 : "internal");
diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h
index 56dc0c17f16a..76c81982f964 100644
--- a/fs/xfs/xfs_message.h
+++ b/fs/xfs/xfs_message.h
@@ -30,6 +30,32 @@ void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
30} 30}
31#endif 31#endif
32 32
33#define xfs_printk_ratelimited(func, dev, fmt, ...) \
34do { \
35 static DEFINE_RATELIMIT_STATE(_rs, \
36 DEFAULT_RATELIMIT_INTERVAL, \
37 DEFAULT_RATELIMIT_BURST); \
38 if (__ratelimit(&_rs)) \
39 func(dev, fmt, ##__VA_ARGS__); \
40} while (0)
41
42#define xfs_emerg_ratelimited(dev, fmt, ...) \
43 xfs_printk_ratelimited(xfs_emerg, dev, fmt, ##__VA_ARGS__)
44#define xfs_alert_ratelimited(dev, fmt, ...) \
45 xfs_printk_ratelimited(xfs_alert, dev, fmt, ##__VA_ARGS__)
46#define xfs_crit_ratelimited(dev, fmt, ...) \
47 xfs_printk_ratelimited(xfs_crit, dev, fmt, ##__VA_ARGS__)
48#define xfs_err_ratelimited(dev, fmt, ...) \
49 xfs_printk_ratelimited(xfs_err, dev, fmt, ##__VA_ARGS__)
50#define xfs_warn_ratelimited(dev, fmt, ...) \
51 xfs_printk_ratelimited(xfs_warn, dev, fmt, ##__VA_ARGS__)
52#define xfs_notice_ratelimited(dev, fmt, ...) \
53 xfs_printk_ratelimited(xfs_notice, dev, fmt, ##__VA_ARGS__)
54#define xfs_info_ratelimited(dev, fmt, ...) \
55 xfs_printk_ratelimited(xfs_info, dev, fmt, ##__VA_ARGS__)
56#define xfs_debug_ratelimited(dev, fmt, ...) \
57 xfs_printk_ratelimited(xfs_debug, dev, fmt, ##__VA_ARGS__)
58
33extern void assfail(char *expr, char *f, int l); 59extern void assfail(char *expr, char *f, int l);
34 60
35extern void xfs_hex_dump(void *p, int length); 61extern void xfs_hex_dump(void *p, int length);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 3806088a8f77..f6bfbd734669 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -43,6 +43,8 @@
43#include "xfs_utils.h" 43#include "xfs_utils.h"
44#include "xfs_trace.h" 44#include "xfs_trace.h"
45#include "xfs_icache.h" 45#include "xfs_icache.h"
46#include "xfs_cksum.h"
47#include "xfs_buf_item.h"
46 48
47 49
48#ifdef HAVE_PERCPU_SB 50#ifdef HAVE_PERCPU_SB
@@ -109,6 +111,14 @@ static const struct {
109 { offsetof(xfs_sb_t, sb_logsunit), 0 }, 111 { offsetof(xfs_sb_t, sb_logsunit), 0 },
110 { offsetof(xfs_sb_t, sb_features2), 0 }, 112 { offsetof(xfs_sb_t, sb_features2), 0 },
111 { offsetof(xfs_sb_t, sb_bad_features2), 0 }, 113 { offsetof(xfs_sb_t, sb_bad_features2), 0 },
114 { offsetof(xfs_sb_t, sb_features_compat), 0 },
115 { offsetof(xfs_sb_t, sb_features_ro_compat), 0 },
116 { offsetof(xfs_sb_t, sb_features_incompat), 0 },
117 { offsetof(xfs_sb_t, sb_features_log_incompat), 0 },
118 { offsetof(xfs_sb_t, sb_crc), 0 },
119 { offsetof(xfs_sb_t, sb_pad), 0 },
120 { offsetof(xfs_sb_t, sb_pquotino), 0 },
121 { offsetof(xfs_sb_t, sb_lsn), 0 },
112 { sizeof(xfs_sb_t), 0 } 122 { sizeof(xfs_sb_t), 0 }
113}; 123};
114 124
@@ -319,11 +329,54 @@ xfs_mount_validate_sb(
319 return XFS_ERROR(EWRONGFS); 329 return XFS_ERROR(EWRONGFS);
320 } 330 }
321 331
332
322 if (!xfs_sb_good_version(sbp)) { 333 if (!xfs_sb_good_version(sbp)) {
323 xfs_warn(mp, "bad version"); 334 xfs_warn(mp, "bad version");
324 return XFS_ERROR(EWRONGFS); 335 return XFS_ERROR(EWRONGFS);
325 } 336 }
326 337
338 /*
339 * Version 5 superblock feature mask validation. Reject combinations the
340 * kernel cannot support up front before checking anything else.
341 */
342 if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) {
343 xfs_alert(mp,
344"Version 5 superblock detected. This kernel has EXPERIMENTAL support enabled!\n"
345"Use of these features in this kernel is at your own risk!");
346
347 if (xfs_sb_has_compat_feature(sbp,
348 XFS_SB_FEAT_COMPAT_UNKNOWN)) {
349 xfs_warn(mp,
350"Superblock has unknown compatible features (0x%x) enabled.\n"
351"Using a more recent kernel is recommended.",
352 (sbp->sb_features_compat &
353 XFS_SB_FEAT_COMPAT_UNKNOWN));
354 }
355
356 if (xfs_sb_has_ro_compat_feature(sbp,
357 XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
358 xfs_alert(mp,
359"Superblock has unknown read-only compatible features (0x%x) enabled.",
360 (sbp->sb_features_ro_compat &
361 XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
362 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
363 xfs_warn(mp,
364"Attempted to mount read-only compatible filesystem read-write.\n"
365"Filesystem can only be safely mounted read only.");
366 return XFS_ERROR(EINVAL);
367 }
368 }
369 if (xfs_sb_has_incompat_feature(sbp,
370 XFS_SB_FEAT_INCOMPAT_UNKNOWN)) {
371 xfs_warn(mp,
372"Superblock has unknown incompatible features (0x%x) enabled.\n"
373"Filesystem can not be safely mounted by this kernel.",
374 (sbp->sb_features_incompat &
375 XFS_SB_FEAT_INCOMPAT_UNKNOWN));
376 return XFS_ERROR(EINVAL);
377 }
378 }
379
327 if (unlikely( 380 if (unlikely(
328 sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { 381 sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
329 xfs_warn(mp, 382 xfs_warn(mp,
@@ -557,6 +610,14 @@ xfs_sb_from_disk(
557 to->sb_logsunit = be32_to_cpu(from->sb_logsunit); 610 to->sb_logsunit = be32_to_cpu(from->sb_logsunit);
558 to->sb_features2 = be32_to_cpu(from->sb_features2); 611 to->sb_features2 = be32_to_cpu(from->sb_features2);
559 to->sb_bad_features2 = be32_to_cpu(from->sb_bad_features2); 612 to->sb_bad_features2 = be32_to_cpu(from->sb_bad_features2);
613 to->sb_features_compat = be32_to_cpu(from->sb_features_compat);
614 to->sb_features_ro_compat = be32_to_cpu(from->sb_features_ro_compat);
615 to->sb_features_incompat = be32_to_cpu(from->sb_features_incompat);
616 to->sb_features_log_incompat =
617 be32_to_cpu(from->sb_features_log_incompat);
618 to->sb_pad = 0;
619 to->sb_pquotino = be64_to_cpu(from->sb_pquotino);
620 to->sb_lsn = be64_to_cpu(from->sb_lsn);
560} 621}
561 622
562/* 623/*
@@ -612,13 +673,12 @@ xfs_sb_to_disk(
612 } 673 }
613} 674}
614 675
615static void 676static int
616xfs_sb_verify( 677xfs_sb_verify(
617 struct xfs_buf *bp) 678 struct xfs_buf *bp)
618{ 679{
619 struct xfs_mount *mp = bp->b_target->bt_mount; 680 struct xfs_mount *mp = bp->b_target->bt_mount;
620 struct xfs_sb sb; 681 struct xfs_sb sb;
621 int error;
622 682
623 xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp)); 683 xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp));
624 684
@@ -626,16 +686,46 @@ xfs_sb_verify(
626 * Only check the in progress field for the primary superblock as 686 * Only check the in progress field for the primary superblock as
627 * mkfs.xfs doesn't clear it from secondary superblocks. 687 * mkfs.xfs doesn't clear it from secondary superblocks.
628 */ 688 */
629 error = xfs_mount_validate_sb(mp, &sb, bp->b_bn == XFS_SB_DADDR); 689 return xfs_mount_validate_sb(mp, &sb, bp->b_bn == XFS_SB_DADDR);
630 if (error)
631 xfs_buf_ioerror(bp, error);
632} 690}
633 691
692/*
693 * If the superblock has the CRC feature bit set or the CRC field is non-null,
694 * check that the CRC is valid. We check the CRC field is non-null because a
695 * single bit error could clear the feature bit and unused parts of the
696 * superblock are supposed to be zero. Hence a non-null crc field indicates that
697 * we've potentially lost a feature bit and we should check it anyway.
698 */
634static void 699static void
635xfs_sb_read_verify( 700xfs_sb_read_verify(
636 struct xfs_buf *bp) 701 struct xfs_buf *bp)
637{ 702{
638 xfs_sb_verify(bp); 703 struct xfs_mount *mp = bp->b_target->bt_mount;
704 struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp);
705 int error;
706
707 /*
708 * open code the version check to avoid needing to convert the entire
709 * superblock from disk order just to check the version number
710 */
711 if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC) &&
712 (((be16_to_cpu(dsb->sb_versionnum) & XFS_SB_VERSION_NUMBITS) ==
713 XFS_SB_VERSION_5) ||
714 dsb->sb_crc != 0)) {
715
716 if (!xfs_verify_cksum(bp->b_addr, be16_to_cpu(dsb->sb_sectsize),
717 offsetof(struct xfs_sb, sb_crc))) {
718 error = EFSCORRUPTED;
719 goto out_error;
720 }
721 }
722 error = xfs_sb_verify(bp);
723
724out_error:
725 if (error) {
726 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
727 xfs_buf_ioerror(bp, error);
728 }
639} 729}
640 730
641/* 731/*
@@ -648,11 +738,10 @@ static void
648xfs_sb_quiet_read_verify( 738xfs_sb_quiet_read_verify(
649 struct xfs_buf *bp) 739 struct xfs_buf *bp)
650{ 740{
651 struct xfs_sb sb; 741 struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp);
652 742
653 xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp));
654 743
655 if (sb.sb_magicnum == XFS_SB_MAGIC) { 744 if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC)) {
656 /* XFS filesystem, verify noisily! */ 745 /* XFS filesystem, verify noisily! */
657 xfs_sb_read_verify(bp); 746 xfs_sb_read_verify(bp);
658 return; 747 return;
@@ -663,9 +752,27 @@ xfs_sb_quiet_read_verify(
663 752
664static void 753static void
665xfs_sb_write_verify( 754xfs_sb_write_verify(
666 struct xfs_buf *bp) 755 struct xfs_buf *bp)
667{ 756{
668 xfs_sb_verify(bp); 757 struct xfs_mount *mp = bp->b_target->bt_mount;
758 struct xfs_buf_log_item *bip = bp->b_fspriv;
759 int error;
760
761 error = xfs_sb_verify(bp);
762 if (error) {
763 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
764 xfs_buf_ioerror(bp, error);
765 return;
766 }
767
768 if (!xfs_sb_version_hascrc(&mp->m_sb))
769 return;
770
771 if (bip)
772 XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
773
774 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
775 offsetof(struct xfs_sb, sb_crc));
669} 776}
670 777
671const struct xfs_buf_ops xfs_sb_buf_ops = { 778const struct xfs_buf_ops xfs_sb_buf_ops = {
@@ -687,7 +794,8 @@ int
687xfs_readsb(xfs_mount_t *mp, int flags) 794xfs_readsb(xfs_mount_t *mp, int flags)
688{ 795{
689 unsigned int sector_size; 796 unsigned int sector_size;
690 xfs_buf_t *bp; 797 struct xfs_buf *bp;
798 struct xfs_sb *sbp = &mp->m_sb;
691 int error; 799 int error;
692 int loud = !(flags & XFS_MFSI_QUIET); 800 int loud = !(flags & XFS_MFSI_QUIET);
693 801
@@ -714,7 +822,7 @@ reread:
714 if (bp->b_error) { 822 if (bp->b_error) {
715 error = bp->b_error; 823 error = bp->b_error;
716 if (loud) 824 if (loud)
717 xfs_warn(mp, "SB validate failed"); 825 xfs_warn(mp, "SB validate failed with error %d.", error);
718 goto release_buf; 826 goto release_buf;
719 } 827 }
720 828
@@ -726,10 +834,10 @@ reread:
726 /* 834 /*
727 * We must be able to do sector-sized and sector-aligned IO. 835 * We must be able to do sector-sized and sector-aligned IO.
728 */ 836 */
729 if (sector_size > mp->m_sb.sb_sectsize) { 837 if (sector_size > sbp->sb_sectsize) {
730 if (loud) 838 if (loud)
731 xfs_warn(mp, "device supports %u byte sectors (not %u)", 839 xfs_warn(mp, "device supports %u byte sectors (not %u)",
732 sector_size, mp->m_sb.sb_sectsize); 840 sector_size, sbp->sb_sectsize);
733 error = ENOSYS; 841 error = ENOSYS;
734 goto release_buf; 842 goto release_buf;
735 } 843 }
@@ -738,15 +846,18 @@ reread:
738 * If device sector size is smaller than the superblock size, 846 * If device sector size is smaller than the superblock size,
739 * re-read the superblock so the buffer is correctly sized. 847 * re-read the superblock so the buffer is correctly sized.
740 */ 848 */
741 if (sector_size < mp->m_sb.sb_sectsize) { 849 if (sector_size < sbp->sb_sectsize) {
742 xfs_buf_relse(bp); 850 xfs_buf_relse(bp);
743 sector_size = mp->m_sb.sb_sectsize; 851 sector_size = sbp->sb_sectsize;
744 goto reread; 852 goto reread;
745 } 853 }
746 854
747 /* Initialize per-cpu counters */ 855 /* Initialize per-cpu counters */
748 xfs_icsb_reinit_counters(mp); 856 xfs_icsb_reinit_counters(mp);
749 857
858 /* no need to be quiet anymore, so reset the buf ops */
859 bp->b_ops = &xfs_sb_buf_ops;
860
750 mp->m_sb_bp = bp; 861 mp->m_sb_bp = bp;
751 xfs_buf_unlock(bp); 862 xfs_buf_unlock(bp);
752 return 0; 863 return 0;
@@ -1633,6 +1744,7 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
1633 ASSERT((1LL << f) & XFS_SB_MOD_BITS); 1744 ASSERT((1LL << f) & XFS_SB_MOD_BITS);
1634 first = xfs_sb_info[f].offset; 1745 first = xfs_sb_info[f].offset;
1635 1746
1747 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
1636 xfs_trans_log_buf(tp, bp, first, last); 1748 xfs_trans_log_buf(tp, bp, first, last);
1637} 1749}
1638 1750
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index bc907061d392..b004cecdfb04 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -207,7 +207,6 @@ typedef struct xfs_mount {
207 trimming */ 207 trimming */
208 __int64_t m_update_flags; /* sb flags we need to update 208 __int64_t m_update_flags; /* sb flags we need to update
209 on the next remount,rw */ 209 on the next remount,rw */
210 struct shrinker m_inode_shrink; /* inode reclaim shrinker */
211 int64_t m_low_space[XFS_LOWSP_MAX]; 210 int64_t m_low_space[XFS_LOWSP_MAX];
212 /* low free space thresholds */ 211 /* low free space thresholds */
213 212
@@ -392,6 +391,7 @@ extern void xfs_set_low_space_thresholds(struct xfs_mount *);
392 391
393#endif /* __KERNEL__ */ 392#endif /* __KERNEL__ */
394 393
394extern void xfs_sb_calc_crc(struct xfs_buf *);
395extern void xfs_mod_sb(struct xfs_trans *, __int64_t); 395extern void xfs_mod_sb(struct xfs_trans *, __int64_t);
396extern int xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t, 396extern int xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t,
397 xfs_agnumber_t *); 397 xfs_agnumber_t *);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index e5b5cf973781..f41702b43003 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -617,6 +617,20 @@ xfs_qm_dqdetach(
617 } 617 }
618} 618}
619 619
620int
621xfs_qm_calc_dquots_per_chunk(
622 struct xfs_mount *mp,
623 unsigned int nbblks) /* basic block units */
624{
625 unsigned int ndquots;
626
627 ASSERT(nbblks > 0);
628 ndquots = BBTOB(nbblks);
629 do_div(ndquots, sizeof(xfs_dqblk_t));
630
631 return ndquots;
632}
633
620/* 634/*
621 * This initializes all the quota information that's kept in the 635 * This initializes all the quota information that's kept in the
622 * mount structure 636 * mount structure
@@ -656,9 +670,8 @@ xfs_qm_init_quotainfo(
656 670
657 /* Precalc some constants */ 671 /* Precalc some constants */
658 qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); 672 qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
659 ASSERT(qinf->qi_dqchunklen); 673 qinf->qi_dqperchunk = xfs_qm_calc_dquots_per_chunk(mp,
660 qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen); 674 qinf->qi_dqchunklen);
661 do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
662 675
663 mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD); 676 mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
664 677
@@ -897,6 +910,10 @@ xfs_qm_dqiter_bufs(
897 if (error) 910 if (error)
898 break; 911 break;
899 912
913 /*
914 * XXX(hch): need to figure out if it makes sense to validate
915 * the CRC here.
916 */
900 xfs_qm_reset_dqcounts(mp, bp, firstid, type); 917 xfs_qm_reset_dqcounts(mp, bp, firstid, type);
901 xfs_buf_delwri_queue(bp, buffer_list); 918 xfs_buf_delwri_queue(bp, buffer_list);
902 xfs_buf_relse(bp); 919 xfs_buf_relse(bp);
@@ -1057,7 +1074,7 @@ xfs_qm_quotacheck_dqadjust(
1057 * There are no timers for the default values set in the root dquot. 1074 * There are no timers for the default values set in the root dquot.
1058 */ 1075 */
1059 if (dqp->q_core.d_id) { 1076 if (dqp->q_core.d_id) {
1060 xfs_qm_adjust_dqlimits(mp, &dqp->q_core); 1077 xfs_qm_adjust_dqlimits(mp, dqp);
1061 xfs_qm_adjust_dqtimers(mp, &dqp->q_core); 1078 xfs_qm_adjust_dqtimers(mp, &dqp->q_core);
1062 } 1079 }
1063 1080
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 44b858b79d71..5d16a6e6900f 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -75,6 +75,8 @@ typedef struct xfs_quotainfo {
75 &((qi)->qi_gquota_tree)) 75 &((qi)->qi_gquota_tree))
76 76
77 77
78extern int xfs_qm_calc_dquots_per_chunk(struct xfs_mount *mp,
79 unsigned int nbblks);
78extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long); 80extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
79extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *, 81extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
80 xfs_dquot_t *, xfs_dquot_t *, long, long, uint); 82 xfs_dquot_t *, xfs_dquot_t *, long, long, uint);
@@ -116,7 +118,7 @@ extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
116extern int xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint); 118extern int xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint);
117extern int xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint, 119extern int xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint,
118 fs_disk_quota_t *); 120 fs_disk_quota_t *);
119extern int xfs_qm_scall_setqlim(xfs_mount_t *, xfs_dqid_t, uint, 121extern int xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint,
120 fs_disk_quota_t *); 122 fs_disk_quota_t *);
121extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *); 123extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
122extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint); 124extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint);
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index cf9a34051e07..c41190cad6e9 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -472,15 +472,15 @@ xfs_qm_scall_getqstat(
472 */ 472 */
473int 473int
474xfs_qm_scall_setqlim( 474xfs_qm_scall_setqlim(
475 xfs_mount_t *mp, 475 struct xfs_mount *mp,
476 xfs_dqid_t id, 476 xfs_dqid_t id,
477 uint type, 477 uint type,
478 fs_disk_quota_t *newlim) 478 fs_disk_quota_t *newlim)
479{ 479{
480 struct xfs_quotainfo *q = mp->m_quotainfo; 480 struct xfs_quotainfo *q = mp->m_quotainfo;
481 xfs_disk_dquot_t *ddq; 481 struct xfs_disk_dquot *ddq;
482 xfs_dquot_t *dqp; 482 struct xfs_dquot *dqp;
483 xfs_trans_t *tp; 483 struct xfs_trans *tp;
484 int error; 484 int error;
485 xfs_qcnt_t hard, soft; 485 xfs_qcnt_t hard, soft;
486 486
@@ -529,6 +529,7 @@ xfs_qm_scall_setqlim(
529 if (hard == 0 || hard >= soft) { 529 if (hard == 0 || hard >= soft) {
530 ddq->d_blk_hardlimit = cpu_to_be64(hard); 530 ddq->d_blk_hardlimit = cpu_to_be64(hard);
531 ddq->d_blk_softlimit = cpu_to_be64(soft); 531 ddq->d_blk_softlimit = cpu_to_be64(soft);
532 xfs_dquot_set_prealloc_limits(dqp);
532 if (id == 0) { 533 if (id == 0) {
533 q->qi_bhardlimit = hard; 534 q->qi_bhardlimit = hard;
534 q->qi_bsoftlimit = soft; 535 q->qi_bsoftlimit = soft;
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index b50ec5b95d5a..c61e31c7d997 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -77,7 +77,14 @@ typedef struct xfs_disk_dquot {
77 */ 77 */
78typedef struct xfs_dqblk { 78typedef struct xfs_dqblk {
79 xfs_disk_dquot_t dd_diskdq; /* portion that lives incore as well */ 79 xfs_disk_dquot_t dd_diskdq; /* portion that lives incore as well */
80 char dd_fill[32]; /* filling for posterity */ 80 char dd_fill[4]; /* filling for posterity */
81
82 /*
83 * These two are only present on filesystems with the CRC bits set.
84 */
85 __be32 dd_crc; /* checksum */
86 __be64 dd_lsn; /* last modification in log */
87 uuid_t dd_uuid; /* location information */
81} xfs_dqblk_t; 88} xfs_dqblk_t;
82 89
83/* 90/*
@@ -380,5 +387,7 @@ extern int xfs_qm_dqcheck(struct xfs_mount *, xfs_disk_dquot_t *,
380 xfs_dqid_t, uint, uint, char *); 387 xfs_dqid_t, uint, uint, char *);
381extern int xfs_mount_reset_sbqflags(struct xfs_mount *); 388extern int xfs_mount_reset_sbqflags(struct xfs_mount *);
382 389
390extern const struct xfs_buf_ops xfs_dquot_buf_ops;
391
383#endif /* __KERNEL__ */ 392#endif /* __KERNEL__ */
384#endif /* __XFS_QUOTA_H__ */ 393#endif /* __XFS_QUOTA_H__ */
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index a05b45175fb0..2de58a85833c 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -32,6 +32,7 @@ struct xfs_mount;
32#define XFS_SB_VERSION_2 2 /* 6.2 - attributes */ 32#define XFS_SB_VERSION_2 2 /* 6.2 - attributes */
33#define XFS_SB_VERSION_3 3 /* 6.2 - new inode version */ 33#define XFS_SB_VERSION_3 3 /* 6.2 - new inode version */
34#define XFS_SB_VERSION_4 4 /* 6.2+ - bitmask version */ 34#define XFS_SB_VERSION_4 4 /* 6.2+ - bitmask version */
35#define XFS_SB_VERSION_5 5 /* CRC enabled filesystem */
35#define XFS_SB_VERSION_NUMBITS 0x000f 36#define XFS_SB_VERSION_NUMBITS 0x000f
36#define XFS_SB_VERSION_ALLFBITS 0xfff0 37#define XFS_SB_VERSION_ALLFBITS 0xfff0
37#define XFS_SB_VERSION_SASHFBITS 0xf000 38#define XFS_SB_VERSION_SASHFBITS 0xf000
@@ -161,6 +162,20 @@ typedef struct xfs_sb {
161 */ 162 */
162 __uint32_t sb_bad_features2; 163 __uint32_t sb_bad_features2;
163 164
165 /* version 5 superblock fields start here */
166
167 /* feature masks */
168 __uint32_t sb_features_compat;
169 __uint32_t sb_features_ro_compat;
170 __uint32_t sb_features_incompat;
171 __uint32_t sb_features_log_incompat;
172
173 __uint32_t sb_crc; /* superblock crc */
174 __uint32_t sb_pad;
175
176 xfs_ino_t sb_pquotino; /* project quota inode */
177 xfs_lsn_t sb_lsn; /* last write sequence */
178
164 /* must be padded to 64 bit alignment */ 179 /* must be padded to 64 bit alignment */
165} xfs_sb_t; 180} xfs_sb_t;
166 181
@@ -229,7 +244,21 @@ typedef struct xfs_dsb {
229 * for features2 bits. Easiest just to mark it bad and not use 244 * for features2 bits. Easiest just to mark it bad and not use
230 * it for anything else. 245 * it for anything else.
231 */ 246 */
232 __be32 sb_bad_features2; 247 __be32 sb_bad_features2;
248
249 /* version 5 superblock fields start here */
250
251 /* feature masks */
252 __be32 sb_features_compat;
253 __be32 sb_features_ro_compat;
254 __be32 sb_features_incompat;
255 __be32 sb_features_log_incompat;
256
257 __le32 sb_crc; /* superblock crc */
258 __be32 sb_pad;
259
260 __be64 sb_pquotino; /* project quota inode */
261 __be64 sb_lsn; /* last write sequence */
233 262
234 /* must be padded to 64 bit alignment */ 263 /* must be padded to 64 bit alignment */
235} xfs_dsb_t; 264} xfs_dsb_t;
@@ -250,7 +279,10 @@ typedef enum {
250 XFS_SBS_GQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN, 279 XFS_SBS_GQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN,
251 XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG, 280 XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG,
252 XFS_SBS_LOGSECTLOG, XFS_SBS_LOGSECTSIZE, XFS_SBS_LOGSUNIT, 281 XFS_SBS_LOGSECTLOG, XFS_SBS_LOGSECTSIZE, XFS_SBS_LOGSUNIT,
253 XFS_SBS_FEATURES2, XFS_SBS_BAD_FEATURES2, 282 XFS_SBS_FEATURES2, XFS_SBS_BAD_FEATURES2, XFS_SBS_FEATURES_COMPAT,
283 XFS_SBS_FEATURES_RO_COMPAT, XFS_SBS_FEATURES_INCOMPAT,
284 XFS_SBS_FEATURES_LOG_INCOMPAT, XFS_SBS_CRC, XFS_SBS_PAD,
285 XFS_SBS_PQUOTINO, XFS_SBS_LSN,
254 XFS_SBS_FIELDCOUNT 286 XFS_SBS_FIELDCOUNT
255} xfs_sb_field_t; 287} xfs_sb_field_t;
256 288
@@ -276,6 +308,12 @@ typedef enum {
276#define XFS_SB_FDBLOCKS XFS_SB_MVAL(FDBLOCKS) 308#define XFS_SB_FDBLOCKS XFS_SB_MVAL(FDBLOCKS)
277#define XFS_SB_FEATURES2 XFS_SB_MVAL(FEATURES2) 309#define XFS_SB_FEATURES2 XFS_SB_MVAL(FEATURES2)
278#define XFS_SB_BAD_FEATURES2 XFS_SB_MVAL(BAD_FEATURES2) 310#define XFS_SB_BAD_FEATURES2 XFS_SB_MVAL(BAD_FEATURES2)
311#define XFS_SB_FEATURES_COMPAT XFS_SB_MVAL(FEATURES_COMPAT)
312#define XFS_SB_FEATURES_RO_COMPAT XFS_SB_MVAL(FEATURES_RO_COMPAT)
313#define XFS_SB_FEATURES_INCOMPAT XFS_SB_MVAL(FEATURES_INCOMPAT)
314#define XFS_SB_FEATURES_LOG_INCOMPAT XFS_SB_MVAL(FEATURES_LOG_INCOMPAT)
315#define XFS_SB_CRC XFS_SB_MVAL(CRC)
316#define XFS_SB_PQUOTINO XFS_SB_MVAL(PQUOTINO)
279#define XFS_SB_NUM_BITS ((int)XFS_SBS_FIELDCOUNT) 317#define XFS_SB_NUM_BITS ((int)XFS_SBS_FIELDCOUNT)
280#define XFS_SB_ALL_BITS ((1LL << XFS_SB_NUM_BITS) - 1) 318#define XFS_SB_ALL_BITS ((1LL << XFS_SB_NUM_BITS) - 1)
281#define XFS_SB_MOD_BITS \ 319#define XFS_SB_MOD_BITS \
@@ -283,7 +321,9 @@ typedef enum {
283 XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \ 321 XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \
284 XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \ 322 XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \
285 XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \ 323 XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \
286 XFS_SB_BAD_FEATURES2) 324 XFS_SB_BAD_FEATURES2 | XFS_SB_FEATURES_COMPAT | \
325 XFS_SB_FEATURES_RO_COMPAT | XFS_SB_FEATURES_INCOMPAT | \
326 XFS_SB_FEATURES_LOG_INCOMPAT | XFS_SB_PQUOTINO)
287 327
288 328
289/* 329/*
@@ -325,6 +365,8 @@ static inline int xfs_sb_good_version(xfs_sb_t *sbp)
325 365
326 return 1; 366 return 1;
327 } 367 }
368 if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5)
369 return 1;
328 370
329 return 0; 371 return 0;
330} 372}
@@ -365,7 +407,7 @@ static inline int xfs_sb_version_hasattr(xfs_sb_t *sbp)
365{ 407{
366 return sbp->sb_versionnum == XFS_SB_VERSION_2 || 408 return sbp->sb_versionnum == XFS_SB_VERSION_2 ||
367 sbp->sb_versionnum == XFS_SB_VERSION_3 || 409 sbp->sb_versionnum == XFS_SB_VERSION_3 ||
368 (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && 410 (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
369 (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT)); 411 (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT));
370} 412}
371 413
@@ -373,7 +415,7 @@ static inline void xfs_sb_version_addattr(xfs_sb_t *sbp)
373{ 415{
374 if (sbp->sb_versionnum == XFS_SB_VERSION_1) 416 if (sbp->sb_versionnum == XFS_SB_VERSION_1)
375 sbp->sb_versionnum = XFS_SB_VERSION_2; 417 sbp->sb_versionnum = XFS_SB_VERSION_2;
376 else if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) 418 else if (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4)
377 sbp->sb_versionnum |= XFS_SB_VERSION_ATTRBIT; 419 sbp->sb_versionnum |= XFS_SB_VERSION_ATTRBIT;
378 else 420 else
379 sbp->sb_versionnum = XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT; 421 sbp->sb_versionnum = XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT;
@@ -382,7 +424,7 @@ static inline void xfs_sb_version_addattr(xfs_sb_t *sbp)
382static inline int xfs_sb_version_hasnlink(xfs_sb_t *sbp) 424static inline int xfs_sb_version_hasnlink(xfs_sb_t *sbp)
383{ 425{
384 return sbp->sb_versionnum == XFS_SB_VERSION_3 || 426 return sbp->sb_versionnum == XFS_SB_VERSION_3 ||
385 (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && 427 (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
386 (sbp->sb_versionnum & XFS_SB_VERSION_NLINKBIT)); 428 (sbp->sb_versionnum & XFS_SB_VERSION_NLINKBIT));
387} 429}
388 430
@@ -396,13 +438,13 @@ static inline void xfs_sb_version_addnlink(xfs_sb_t *sbp)
396 438
397static inline int xfs_sb_version_hasquota(xfs_sb_t *sbp) 439static inline int xfs_sb_version_hasquota(xfs_sb_t *sbp)
398{ 440{
399 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && 441 return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
400 (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT); 442 (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT);
401} 443}
402 444
403static inline void xfs_sb_version_addquota(xfs_sb_t *sbp) 445static inline void xfs_sb_version_addquota(xfs_sb_t *sbp)
404{ 446{
405 if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) 447 if (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4)
406 sbp->sb_versionnum |= XFS_SB_VERSION_QUOTABIT; 448 sbp->sb_versionnum |= XFS_SB_VERSION_QUOTABIT;
407 else 449 else
408 sbp->sb_versionnum = xfs_sb_version_tonew(sbp->sb_versionnum) | 450 sbp->sb_versionnum = xfs_sb_version_tonew(sbp->sb_versionnum) |
@@ -411,13 +453,14 @@ static inline void xfs_sb_version_addquota(xfs_sb_t *sbp)
411 453
412static inline int xfs_sb_version_hasalign(xfs_sb_t *sbp) 454static inline int xfs_sb_version_hasalign(xfs_sb_t *sbp)
413{ 455{
414 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && 456 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
415 (sbp->sb_versionnum & XFS_SB_VERSION_ALIGNBIT); 457 (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
458 (sbp->sb_versionnum & XFS_SB_VERSION_ALIGNBIT));
416} 459}
417 460
418static inline int xfs_sb_version_hasdalign(xfs_sb_t *sbp) 461static inline int xfs_sb_version_hasdalign(xfs_sb_t *sbp)
419{ 462{
420 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && 463 return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
421 (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT); 464 (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT);
422} 465}
423 466
@@ -429,38 +472,42 @@ static inline int xfs_sb_version_hasshared(xfs_sb_t *sbp)
429 472
430static inline int xfs_sb_version_hasdirv2(xfs_sb_t *sbp) 473static inline int xfs_sb_version_hasdirv2(xfs_sb_t *sbp)
431{ 474{
432 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && 475 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
433 (sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT); 476 (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
477 (sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT));
434} 478}
435 479
436static inline int xfs_sb_version_haslogv2(xfs_sb_t *sbp) 480static inline int xfs_sb_version_haslogv2(xfs_sb_t *sbp)
437{ 481{
438 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && 482 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
439 (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT); 483 (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
484 (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT));
440} 485}
441 486
442static inline int xfs_sb_version_hasextflgbit(xfs_sb_t *sbp) 487static inline int xfs_sb_version_hasextflgbit(xfs_sb_t *sbp)
443{ 488{
444 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && 489 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
445 (sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT); 490 (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
491 (sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT));
446} 492}
447 493
448static inline int xfs_sb_version_hassector(xfs_sb_t *sbp) 494static inline int xfs_sb_version_hassector(xfs_sb_t *sbp)
449{ 495{
450 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && 496 return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
451 (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT); 497 (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT);
452} 498}
453 499
454static inline int xfs_sb_version_hasasciici(xfs_sb_t *sbp) 500static inline int xfs_sb_version_hasasciici(xfs_sb_t *sbp)
455{ 501{
456 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && 502 return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
457 (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT); 503 (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT);
458} 504}
459 505
460static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp) 506static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
461{ 507{
462 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && 508 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
463 (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT); 509 (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
510 (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT));
464} 511}
465 512
466/* 513/*
@@ -475,14 +522,16 @@ static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
475 522
476static inline int xfs_sb_version_haslazysbcount(xfs_sb_t *sbp) 523static inline int xfs_sb_version_haslazysbcount(xfs_sb_t *sbp)
477{ 524{
478 return xfs_sb_version_hasmorebits(sbp) && 525 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
479 (sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT); 526 (xfs_sb_version_hasmorebits(sbp) &&
527 (sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT));
480} 528}
481 529
482static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp) 530static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp)
483{ 531{
484 return xfs_sb_version_hasmorebits(sbp) && 532 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
485 (sbp->sb_features2 & XFS_SB_VERSION2_ATTR2BIT); 533 (xfs_sb_version_hasmorebits(sbp) &&
534 (sbp->sb_features2 & XFS_SB_VERSION2_ATTR2BIT));
486} 535}
487 536
488static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp) 537static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp)
@@ -500,14 +549,73 @@ static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp)
500 549
501static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp) 550static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp)
502{ 551{
503 return xfs_sb_version_hasmorebits(sbp) && 552 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
504 (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT); 553 (xfs_sb_version_hasmorebits(sbp) &&
554 (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT));
505} 555}
506 556
507static inline int xfs_sb_version_hascrc(xfs_sb_t *sbp) 557static inline int xfs_sb_version_hascrc(xfs_sb_t *sbp)
508{ 558{
509 return (xfs_sb_version_hasmorebits(sbp) && 559 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;
510 (sbp->sb_features2 & XFS_SB_VERSION2_CRCBIT)); 560}
561
562
563/*
564 * Extended v5 superblock feature masks. These are to be used for new v5
565 * superblock features only.
566 *
567 * Compat features are new features that old kernels will not notice or affect
568 * and so can mount read-write without issues.
569 *
570 * RO-Compat (read only) are features that old kernels can read but will break
571 * if they write. Hence only read-only mounts of such filesystems are allowed on
572 * kernels that don't support the feature bit.
573 *
574 * InCompat features are features which old kernels will not understand and so
575 * must not mount.
576 *
577 * Log-InCompat features are for changes to log formats or new transactions that
578 * can't be replayed on older kernels. The fields are set when the filesystem is
579 * mounted, and a clean unmount clears the fields.
580 */
581#define XFS_SB_FEAT_COMPAT_ALL 0
582#define XFS_SB_FEAT_COMPAT_UNKNOWN ~XFS_SB_FEAT_COMPAT_ALL
583static inline bool
584xfs_sb_has_compat_feature(
585 struct xfs_sb *sbp,
586 __uint32_t feature)
587{
588 return (sbp->sb_features_compat & feature) != 0;
589}
590
591#define XFS_SB_FEAT_RO_COMPAT_ALL 0
592#define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL
593static inline bool
594xfs_sb_has_ro_compat_feature(
595 struct xfs_sb *sbp,
596 __uint32_t feature)
597{
598 return (sbp->sb_features_ro_compat & feature) != 0;
599}
600
601#define XFS_SB_FEAT_INCOMPAT_ALL 0
602#define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL
603static inline bool
604xfs_sb_has_incompat_feature(
605 struct xfs_sb *sbp,
606 __uint32_t feature)
607{
608 return (sbp->sb_features_incompat & feature) != 0;
609}
610
611#define XFS_SB_FEAT_INCOMPAT_LOG_ALL 0
612#define XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_LOG_ALL
613static inline bool
614xfs_sb_has_incompat_log_feature(
615 struct xfs_sb *sbp,
616 __uint32_t feature)
617{
618 return (sbp->sb_features_log_incompat & feature) != 0;
511} 619}
512 620
513/* 621/*
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
new file mode 100644
index 000000000000..5f234389327c
--- /dev/null
+++ b/fs/xfs/xfs_symlink.c
@@ -0,0 +1,730 @@
1/*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * Copyright (c) 2012-2013 Red Hat, Inc.
4 * All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it would be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19#include "xfs.h"
20#include "xfs_fs.h"
21#include "xfs_types.h"
22#include "xfs_bit.h"
23#include "xfs_log.h"
24#include "xfs_trans.h"
25#include "xfs_sb.h"
26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_mount.h"
29#include "xfs_da_btree.h"
30#include "xfs_bmap_btree.h"
31#include "xfs_ialloc_btree.h"
32#include "xfs_dinode.h"
33#include "xfs_inode.h"
34#include "xfs_inode_item.h"
35#include "xfs_itable.h"
36#include "xfs_ialloc.h"
37#include "xfs_alloc.h"
38#include "xfs_bmap.h"
39#include "xfs_error.h"
40#include "xfs_quota.h"
41#include "xfs_utils.h"
42#include "xfs_trans_space.h"
43#include "xfs_log_priv.h"
44#include "xfs_trace.h"
45#include "xfs_symlink.h"
46#include "xfs_cksum.h"
47#include "xfs_buf_item.h"
48
49
50/*
51 * Each contiguous block has a header, so it is not just a simple pathlen
52 * to FSB conversion.
53 */
54int
55xfs_symlink_blocks(
56 struct xfs_mount *mp,
57 int pathlen)
58{
59 int fsblocks = 0;
60 int len = pathlen;
61
62 do {
63 fsblocks++;
64 len -= XFS_SYMLINK_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
65 } while (len > 0);
66
67 ASSERT(fsblocks <= XFS_SYMLINK_MAPS);
68 return fsblocks;
69}
70
71static int
72xfs_symlink_hdr_set(
73 struct xfs_mount *mp,
74 xfs_ino_t ino,
75 uint32_t offset,
76 uint32_t size,
77 struct xfs_buf *bp)
78{
79 struct xfs_dsymlink_hdr *dsl = bp->b_addr;
80
81 if (!xfs_sb_version_hascrc(&mp->m_sb))
82 return 0;
83
84 dsl->sl_magic = cpu_to_be32(XFS_SYMLINK_MAGIC);
85 dsl->sl_offset = cpu_to_be32(offset);
86 dsl->sl_bytes = cpu_to_be32(size);
87 uuid_copy(&dsl->sl_uuid, &mp->m_sb.sb_uuid);
88 dsl->sl_owner = cpu_to_be64(ino);
89 dsl->sl_blkno = cpu_to_be64(bp->b_bn);
90 bp->b_ops = &xfs_symlink_buf_ops;
91
92 return sizeof(struct xfs_dsymlink_hdr);
93}
94
95/*
96 * Checking of the symlink header is split into two parts. the verifier does
97 * CRC, location and bounds checking, the unpacking function checks the path
98 * parameters and owner.
99 */
100bool
101xfs_symlink_hdr_ok(
102 struct xfs_mount *mp,
103 xfs_ino_t ino,
104 uint32_t offset,
105 uint32_t size,
106 struct xfs_buf *bp)
107{
108 struct xfs_dsymlink_hdr *dsl = bp->b_addr;
109
110 if (offset != be32_to_cpu(dsl->sl_offset))
111 return false;
112 if (size != be32_to_cpu(dsl->sl_bytes))
113 return false;
114 if (ino != be64_to_cpu(dsl->sl_owner))
115 return false;
116
117 /* ok */
118 return true;
119}
120
121static bool
122xfs_symlink_verify(
123 struct xfs_buf *bp)
124{
125 struct xfs_mount *mp = bp->b_target->bt_mount;
126 struct xfs_dsymlink_hdr *dsl = bp->b_addr;
127
128 if (!xfs_sb_version_hascrc(&mp->m_sb))
129 return false;
130 if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC))
131 return false;
132 if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_uuid))
133 return false;
134 if (bp->b_bn != be64_to_cpu(dsl->sl_blkno))
135 return false;
136 if (be32_to_cpu(dsl->sl_offset) +
137 be32_to_cpu(dsl->sl_bytes) >= MAXPATHLEN)
138 return false;
139 if (dsl->sl_owner == 0)
140 return false;
141
142 return true;
143}
144
145static void
146xfs_symlink_read_verify(
147 struct xfs_buf *bp)
148{
149 struct xfs_mount *mp = bp->b_target->bt_mount;
150
151 /* no verification of non-crc buffers */
152 if (!xfs_sb_version_hascrc(&mp->m_sb))
153 return;
154
155 if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
156 offsetof(struct xfs_dsymlink_hdr, sl_crc)) ||
157 !xfs_symlink_verify(bp)) {
158 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
159 xfs_buf_ioerror(bp, EFSCORRUPTED);
160 }
161}
162
163static void
164xfs_symlink_write_verify(
165 struct xfs_buf *bp)
166{
167 struct xfs_mount *mp = bp->b_target->bt_mount;
168 struct xfs_buf_log_item *bip = bp->b_fspriv;
169
170 /* no verification of non-crc buffers */
171 if (!xfs_sb_version_hascrc(&mp->m_sb))
172 return;
173
174 if (!xfs_symlink_verify(bp)) {
175 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
176 xfs_buf_ioerror(bp, EFSCORRUPTED);
177 return;
178 }
179
180 if (bip) {
181 struct xfs_dsymlink_hdr *dsl = bp->b_addr;
182 dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
183 }
184 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
185 offsetof(struct xfs_dsymlink_hdr, sl_crc));
186}
187
188const struct xfs_buf_ops xfs_symlink_buf_ops = {
189 .verify_read = xfs_symlink_read_verify,
190 .verify_write = xfs_symlink_write_verify,
191};
192
193void
194xfs_symlink_local_to_remote(
195 struct xfs_trans *tp,
196 struct xfs_buf *bp,
197 struct xfs_inode *ip,
198 struct xfs_ifork *ifp)
199{
200 struct xfs_mount *mp = ip->i_mount;
201 char *buf;
202
203 if (!xfs_sb_version_hascrc(&mp->m_sb)) {
204 bp->b_ops = NULL;
205 memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
206 return;
207 }
208
209 /*
210 * As this symlink fits in an inode literal area, it must also fit in
211 * the smallest buffer the filesystem supports.
212 */
213 ASSERT(BBTOB(bp->b_length) >=
214 ifp->if_bytes + sizeof(struct xfs_dsymlink_hdr));
215
216 bp->b_ops = &xfs_symlink_buf_ops;
217
218 buf = bp->b_addr;
219 buf += xfs_symlink_hdr_set(mp, ip->i_ino, 0, ifp->if_bytes, bp);
220 memcpy(buf, ifp->if_u1.if_data, ifp->if_bytes);
221}
222
223/* ----- Kernel only functions below ----- */
224STATIC int
225xfs_readlink_bmap(
226 struct xfs_inode *ip,
227 char *link)
228{
229 struct xfs_mount *mp = ip->i_mount;
230 struct xfs_bmbt_irec mval[XFS_SYMLINK_MAPS];
231 struct xfs_buf *bp;
232 xfs_daddr_t d;
233 char *cur_chunk;
234 int pathlen = ip->i_d.di_size;
235 int nmaps = XFS_SYMLINK_MAPS;
236 int byte_cnt;
237 int n;
238 int error = 0;
239 int fsblocks = 0;
240 int offset;
241
242 fsblocks = xfs_symlink_blocks(mp, pathlen);
243 error = xfs_bmapi_read(ip, 0, fsblocks, mval, &nmaps, 0);
244 if (error)
245 goto out;
246
247 offset = 0;
248 for (n = 0; n < nmaps; n++) {
249 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
250 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
251
252 bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0,
253 &xfs_symlink_buf_ops);
254 if (!bp)
255 return XFS_ERROR(ENOMEM);
256 error = bp->b_error;
257 if (error) {
258 xfs_buf_ioerror_alert(bp, __func__);
259 xfs_buf_relse(bp);
260 goto out;
261 }
262 byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
263 if (pathlen < byte_cnt)
264 byte_cnt = pathlen;
265
266 cur_chunk = bp->b_addr;
267 if (xfs_sb_version_hascrc(&mp->m_sb)) {
268 if (!xfs_symlink_hdr_ok(mp, ip->i_ino, offset,
269 byte_cnt, bp)) {
270 error = EFSCORRUPTED;
271 xfs_alert(mp,
272"symlink header does not match required off/len/owner (0x%x/Ox%x,0x%llx)",
273 offset, byte_cnt, ip->i_ino);
274 xfs_buf_relse(bp);
275 goto out;
276
277 }
278
279 cur_chunk += sizeof(struct xfs_dsymlink_hdr);
280 }
281
282 memcpy(link + offset, bp->b_addr, byte_cnt);
283
284 pathlen -= byte_cnt;
285 offset += byte_cnt;
286
287 xfs_buf_relse(bp);
288 }
289 ASSERT(pathlen == 0);
290
291 link[ip->i_d.di_size] = '\0';
292 error = 0;
293
294 out:
295 return error;
296}
297
298int
299xfs_readlink(
300 struct xfs_inode *ip,
301 char *link)
302{
303 struct xfs_mount *mp = ip->i_mount;
304 xfs_fsize_t pathlen;
305 int error = 0;
306
307 trace_xfs_readlink(ip);
308
309 if (XFS_FORCED_SHUTDOWN(mp))
310 return XFS_ERROR(EIO);
311
312 xfs_ilock(ip, XFS_ILOCK_SHARED);
313
314 pathlen = ip->i_d.di_size;
315 if (!pathlen)
316 goto out;
317
318 if (pathlen < 0 || pathlen > MAXPATHLEN) {
319 xfs_alert(mp, "%s: inode (%llu) bad symlink length (%lld)",
320 __func__, (unsigned long long) ip->i_ino,
321 (long long) pathlen);
322 ASSERT(0);
323 error = XFS_ERROR(EFSCORRUPTED);
324 goto out;
325 }
326
327
328 if (ip->i_df.if_flags & XFS_IFINLINE) {
329 memcpy(link, ip->i_df.if_u1.if_data, pathlen);
330 link[pathlen] = '\0';
331 } else {
332 error = xfs_readlink_bmap(ip, link);
333 }
334
335 out:
336 xfs_iunlock(ip, XFS_ILOCK_SHARED);
337 return error;
338}
339
340int
341xfs_symlink(
342 struct xfs_inode *dp,
343 struct xfs_name *link_name,
344 const char *target_path,
345 umode_t mode,
346 struct xfs_inode **ipp)
347{
348 struct xfs_mount *mp = dp->i_mount;
349 struct xfs_trans *tp = NULL;
350 struct xfs_inode *ip = NULL;
351 int error = 0;
352 int pathlen;
353 struct xfs_bmap_free free_list;
354 xfs_fsblock_t first_block;
355 bool unlock_dp_on_error = false;
356 uint cancel_flags;
357 int committed;
358 xfs_fileoff_t first_fsb;
359 xfs_filblks_t fs_blocks;
360 int nmaps;
361 struct xfs_bmbt_irec mval[XFS_SYMLINK_MAPS];
362 xfs_daddr_t d;
363 const char *cur_chunk;
364 int byte_cnt;
365 int n;
366 xfs_buf_t *bp;
367 prid_t prid;
368 struct xfs_dquot *udqp, *gdqp;
369 uint resblks;
370
371 *ipp = NULL;
372
373 trace_xfs_symlink(dp, link_name);
374
375 if (XFS_FORCED_SHUTDOWN(mp))
376 return XFS_ERROR(EIO);
377
378 /*
379 * Check component lengths of the target path name.
380 */
381 pathlen = strlen(target_path);
382 if (pathlen >= MAXPATHLEN) /* total string too long */
383 return XFS_ERROR(ENAMETOOLONG);
384
385 udqp = gdqp = NULL;
386 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
387 prid = xfs_get_projid(dp);
388 else
389 prid = XFS_PROJID_DEFAULT;
390
391 /*
392 * Make sure that we have allocated dquot(s) on disk.
393 */
394 error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
395 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
396 if (error)
397 goto std_return;
398
399 tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK);
400 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
401 /*
402 * The symlink will fit into the inode data fork?
403 * There can't be any attributes so we get the whole variable part.
404 */
405 if (pathlen <= XFS_LITINO(mp, dp->i_d.di_version))
406 fs_blocks = 0;
407 else
408 fs_blocks = XFS_B_TO_FSB(mp, pathlen);
409 resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
410 error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0,
411 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
412 if (error == ENOSPC && fs_blocks == 0) {
413 resblks = 0;
414 error = xfs_trans_reserve(tp, 0, XFS_SYMLINK_LOG_RES(mp), 0,
415 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
416 }
417 if (error) {
418 cancel_flags = 0;
419 goto error_return;
420 }
421
422 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
423 unlock_dp_on_error = true;
424
425 /*
426 * Check whether the directory allows new symlinks or not.
427 */
428 if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) {
429 error = XFS_ERROR(EPERM);
430 goto error_return;
431 }
432
433 /*
434 * Reserve disk quota : blocks and inode.
435 */
436 error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0);
437 if (error)
438 goto error_return;
439
440 /*
441 * Check for ability to enter directory entry, if no space reserved.
442 */
443 error = xfs_dir_canenter(tp, dp, link_name, resblks);
444 if (error)
445 goto error_return;
446 /*
447 * Initialize the bmap freelist prior to calling either
448 * bmapi or the directory create code.
449 */
450 xfs_bmap_init(&free_list, &first_block);
451
452 /*
453 * Allocate an inode for the symlink.
454 */
455 error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
456 prid, resblks > 0, &ip, NULL);
457 if (error) {
458 if (error == ENOSPC)
459 goto error_return;
460 goto error1;
461 }
462
463 /*
464 * An error after we've joined dp to the transaction will result in the
465 * transaction cancel unlocking dp so don't do it explicitly in the
466 * error path.
467 */
468 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
469 unlock_dp_on_error = false;
470
471 /*
472 * Also attach the dquot(s) to it, if applicable.
473 */
474 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp);
475
476 if (resblks)
477 resblks -= XFS_IALLOC_SPACE_RES(mp);
478 /*
479 * If the symlink will fit into the inode, write it inline.
480 */
481 if (pathlen <= XFS_IFORK_DSIZE(ip)) {
482 xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK);
483 memcpy(ip->i_df.if_u1.if_data, target_path, pathlen);
484 ip->i_d.di_size = pathlen;
485
486 /*
487 * The inode was initially created in extent format.
488 */
489 ip->i_df.if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT);
490 ip->i_df.if_flags |= XFS_IFINLINE;
491
492 ip->i_d.di_format = XFS_DINODE_FMT_LOCAL;
493 xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
494
495 } else {
496 int offset;
497
498 first_fsb = 0;
499 nmaps = XFS_SYMLINK_MAPS;
500
501 error = xfs_bmapi_write(tp, ip, first_fsb, fs_blocks,
502 XFS_BMAPI_METADATA, &first_block, resblks,
503 mval, &nmaps, &free_list);
504 if (error)
505 goto error2;
506
507 if (resblks)
508 resblks -= fs_blocks;
509 ip->i_d.di_size = pathlen;
510 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
511
512 cur_chunk = target_path;
513 offset = 0;
514 for (n = 0; n < nmaps; n++) {
515 char *buf;
516
517 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
518 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
519 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
520 BTOBB(byte_cnt), 0);
521 if (!bp) {
522 error = ENOMEM;
523 goto error2;
524 }
525 bp->b_ops = &xfs_symlink_buf_ops;
526
527 byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
528 if (pathlen < byte_cnt) {
529 byte_cnt = pathlen;
530 }
531
532 buf = bp->b_addr;
533 buf += xfs_symlink_hdr_set(mp, ip->i_ino, offset,
534 byte_cnt, bp);
535
536 memcpy(buf, cur_chunk, byte_cnt);
537
538 cur_chunk += byte_cnt;
539 pathlen -= byte_cnt;
540 offset += byte_cnt;
541
542 xfs_trans_log_buf(tp, bp, 0, (buf + byte_cnt - 1) -
543 (char *)bp->b_addr);
544 }
545 }
546
547 /*
548 * Create the directory entry for the symlink.
549 */
550 error = xfs_dir_createname(tp, dp, link_name, ip->i_ino,
551 &first_block, &free_list, resblks);
552 if (error)
553 goto error2;
554 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
555 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
556
557 /*
558 * If this is a synchronous mount, make sure that the
559 * symlink transaction goes to disk before returning to
560 * the user.
561 */
562 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
563 xfs_trans_set_sync(tp);
564 }
565
566 error = xfs_bmap_finish(&tp, &free_list, &committed);
567 if (error) {
568 goto error2;
569 }
570 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
571 xfs_qm_dqrele(udqp);
572 xfs_qm_dqrele(gdqp);
573
574 *ipp = ip;
575 return 0;
576
577 error2:
578 IRELE(ip);
579 error1:
580 xfs_bmap_cancel(&free_list);
581 cancel_flags |= XFS_TRANS_ABORT;
582 error_return:
583 xfs_trans_cancel(tp, cancel_flags);
584 xfs_qm_dqrele(udqp);
585 xfs_qm_dqrele(gdqp);
586
587 if (unlock_dp_on_error)
588 xfs_iunlock(dp, XFS_ILOCK_EXCL);
589 std_return:
590 return error;
591}
592
593/*
594 * Free a symlink that has blocks associated with it.
595 */
596int
597xfs_inactive_symlink_rmt(
598 xfs_inode_t *ip,
599 xfs_trans_t **tpp)
600{
601 xfs_buf_t *bp;
602 int committed;
603 int done;
604 int error;
605 xfs_fsblock_t first_block;
606 xfs_bmap_free_t free_list;
607 int i;
608 xfs_mount_t *mp;
609 xfs_bmbt_irec_t mval[XFS_SYMLINK_MAPS];
610 int nmaps;
611 xfs_trans_t *ntp;
612 int size;
613 xfs_trans_t *tp;
614
615 tp = *tpp;
616 mp = ip->i_mount;
617 ASSERT(ip->i_d.di_size > XFS_IFORK_DSIZE(ip));
618 /*
619 * We're freeing a symlink that has some
620 * blocks allocated to it. Free the
621 * blocks here. We know that we've got
622 * either 1 or 2 extents and that we can
623 * free them all in one bunmapi call.
624 */
625 ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2);
626
627 /*
628 * Lock the inode, fix the size, and join it to the transaction.
629 * Hold it so in the normal path, we still have it locked for
630 * the second transaction. In the error paths we need it
631 * held so the cancel won't rele it, see below.
632 */
633 size = (int)ip->i_d.di_size;
634 ip->i_d.di_size = 0;
635 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
636 /*
637 * Find the block(s) so we can inval and unmap them.
638 */
639 done = 0;
640 xfs_bmap_init(&free_list, &first_block);
641 nmaps = ARRAY_SIZE(mval);
642 error = xfs_bmapi_read(ip, 0, xfs_symlink_blocks(mp, size),
643 mval, &nmaps, 0);
644 if (error)
645 goto error0;
646 /*
647 * Invalidate the block(s). No validation is done.
648 */
649 for (i = 0; i < nmaps; i++) {
650 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
651 XFS_FSB_TO_DADDR(mp, mval[i].br_startblock),
652 XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0);
653 if (!bp) {
654 error = ENOMEM;
655 goto error1;
656 }
657 xfs_trans_binval(tp, bp);
658 }
659 /*
660 * Unmap the dead block(s) to the free_list.
661 */
662 if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps,
663 &first_block, &free_list, &done)))
664 goto error1;
665 ASSERT(done);
666 /*
667 * Commit the first transaction. This logs the EFI and the inode.
668 */
669 if ((error = xfs_bmap_finish(&tp, &free_list, &committed)))
670 goto error1;
671 /*
672 * The transaction must have been committed, since there were
673 * actually extents freed by xfs_bunmapi. See xfs_bmap_finish.
674 * The new tp has the extent freeing and EFDs.
675 */
676 ASSERT(committed);
677 /*
678 * The first xact was committed, so add the inode to the new one.
679 * Mark it dirty so it will be logged and moved forward in the log as
680 * part of every commit.
681 */
682 xfs_trans_ijoin(tp, ip, 0);
683 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
684 /*
685 * Get a new, empty transaction to return to our caller.
686 */
687 ntp = xfs_trans_dup(tp);
688 /*
689 * Commit the transaction containing extent freeing and EFDs.
690 * If we get an error on the commit here or on the reserve below,
691 * we need to unlock the inode since the new transaction doesn't
692 * have the inode attached.
693 */
694 error = xfs_trans_commit(tp, 0);
695 tp = ntp;
696 if (error) {
697 ASSERT(XFS_FORCED_SHUTDOWN(mp));
698 goto error0;
699 }
700 /*
701 * transaction commit worked ok so we can drop the extra ticket
702 * reference that we gained in xfs_trans_dup()
703 */
704 xfs_log_ticket_put(tp->t_ticket);
705
706 /*
707 * Remove the memory for extent descriptions (just bookkeeping).
708 */
709 if (ip->i_df.if_bytes)
710 xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK);
711 ASSERT(ip->i_df.if_bytes == 0);
712 /*
713 * Put an itruncate log reservation in the new transaction
714 * for our caller.
715 */
716 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
717 XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) {
718 ASSERT(XFS_FORCED_SHUTDOWN(mp));
719 goto error0;
720 }
721
722 xfs_trans_ijoin(tp, ip, 0);
723 *tpp = tp;
724 return 0;
725
726 error1:
727 xfs_bmap_cancel(&free_list);
728 error0:
729 return error;
730}
diff --git a/fs/xfs/xfs_symlink.h b/fs/xfs/xfs_symlink.h
new file mode 100644
index 000000000000..b39398d2097c
--- /dev/null
+++ b/fs/xfs/xfs_symlink.h
@@ -0,0 +1,66 @@
1/*
2 * Copyright (c) 2012 Red Hat, Inc. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write the Free Software Foundation,
15 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16 */
17#ifndef __XFS_SYMLINK_H
18#define __XFS_SYMLINK_H 1
19
20struct xfs_mount;
21struct xfs_trans;
22struct xfs_inode;
23struct xfs_buf;
24struct xfs_ifork;
25struct xfs_name;
26
27#define XFS_SYMLINK_MAGIC 0x58534c4d /* XSLM */
28
29struct xfs_dsymlink_hdr {
30 __be32 sl_magic;
31 __be32 sl_offset;
32 __be32 sl_bytes;
33 __be32 sl_crc;
34 uuid_t sl_uuid;
35 __be64 sl_owner;
36 __be64 sl_blkno;
37 __be64 sl_lsn;
38};
39
40/*
41 * The maximum pathlen is 1024 bytes. Since the minimum file system
42 * blocksize is 512 bytes, we can get a max of 3 extents back from
43 * bmapi when crc headers are taken into account.
44 */
45#define XFS_SYMLINK_MAPS 3
46
47#define XFS_SYMLINK_BUF_SPACE(mp, bufsize) \
48 ((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \
49 sizeof(struct xfs_dsymlink_hdr) : 0))
50
51int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen);
52
53void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
54 struct xfs_inode *ip, struct xfs_ifork *ifp);
55
56extern const struct xfs_buf_ops xfs_symlink_buf_ops;
57
58#ifdef __KERNEL__
59
60int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
61 const char *target_path, umode_t mode, struct xfs_inode **ipp);
62int xfs_readlink(struct xfs_inode *ip, char *link);
63int xfs_inactive_symlink_rmt(struct xfs_inode *ip, struct xfs_trans **tpp);
64
65#endif /* __KERNEL__ */
66#endif /* __XFS_SYMLINK_H */
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index 624bedd81357..b6e3897c1d9f 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -22,7 +22,6 @@
22#include "xfs_trans.h" 22#include "xfs_trans.h"
23#include "xfs_sb.h" 23#include "xfs_sb.h"
24#include "xfs_ag.h" 24#include "xfs_ag.h"
25#include "xfs_da_btree.h"
26#include "xfs_bmap_btree.h" 25#include "xfs_bmap_btree.h"
27#include "xfs_alloc_btree.h" 26#include "xfs_alloc_btree.h"
28#include "xfs_ialloc_btree.h" 27#include "xfs_ialloc_btree.h"
@@ -30,6 +29,7 @@
30#include "xfs_inode.h" 29#include "xfs_inode.h"
31#include "xfs_btree.h" 30#include "xfs_btree.h"
32#include "xfs_mount.h" 31#include "xfs_mount.h"
32#include "xfs_da_btree.h"
33#include "xfs_ialloc.h" 33#include "xfs_ialloc.h"
34#include "xfs_itable.h" 34#include "xfs_itable.h"
35#include "xfs_alloc.h" 35#include "xfs_alloc.h"
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 16a812977eab..aa4db3307d36 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -619,6 +619,30 @@ DECLARE_EVENT_CLASS(xfs_iref_class,
619 (char *)__entry->caller_ip) 619 (char *)__entry->caller_ip)
620) 620)
621 621
622TRACE_EVENT(xfs_iomap_prealloc_size,
623 TP_PROTO(struct xfs_inode *ip, xfs_fsblock_t blocks, int shift,
624 unsigned int writeio_blocks),
625 TP_ARGS(ip, blocks, shift, writeio_blocks),
626 TP_STRUCT__entry(
627 __field(dev_t, dev)
628 __field(xfs_ino_t, ino)
629 __field(xfs_fsblock_t, blocks)
630 __field(int, shift)
631 __field(unsigned int, writeio_blocks)
632 ),
633 TP_fast_assign(
634 __entry->dev = VFS_I(ip)->i_sb->s_dev;
635 __entry->ino = ip->i_ino;
636 __entry->blocks = blocks;
637 __entry->shift = shift;
638 __entry->writeio_blocks = writeio_blocks;
639 ),
640 TP_printk("dev %d:%d ino 0x%llx prealloc blocks %llu shift %d "
641 "m_writeio_blocks %u",
642 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino,
643 __entry->blocks, __entry->shift, __entry->writeio_blocks)
644)
645
622#define DEFINE_IREF_EVENT(name) \ 646#define DEFINE_IREF_EVENT(name) \
623DEFINE_EVENT(xfs_iref_class, name, \ 647DEFINE_EVENT(xfs_iref_class, name, \
624 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \ 648 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 3edf5dbee001..73a5fa457e16 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -659,6 +659,7 @@ xfs_trans_binval(
659 ASSERT(XFS_BUF_ISSTALE(bp)); 659 ASSERT(XFS_BUF_ISSTALE(bp));
660 ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY))); 660 ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY)));
661 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_INODE_BUF)); 661 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_INODE_BUF));
662 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLFT_MASK));
662 ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); 663 ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
663 ASSERT(bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY); 664 ASSERT(bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY);
664 ASSERT(tp->t_flags & XFS_TRANS_DIRTY); 665 ASSERT(tp->t_flags & XFS_TRANS_DIRTY);
@@ -671,6 +672,7 @@ xfs_trans_binval(
671 bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY); 672 bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY);
672 bip->__bli_format.blf_flags &= ~XFS_BLF_INODE_BUF; 673 bip->__bli_format.blf_flags &= ~XFS_BLF_INODE_BUF;
673 bip->__bli_format.blf_flags |= XFS_BLF_CANCEL; 674 bip->__bli_format.blf_flags |= XFS_BLF_CANCEL;
675 bip->__bli_format.blf_flags &= ~XFS_BLFT_MASK;
674 for (i = 0; i < bip->bli_format_count; i++) { 676 for (i = 0; i < bip->bli_format_count; i++) {
675 memset(bip->bli_formats[i].blf_data_map, 0, 677 memset(bip->bli_formats[i].blf_data_map, 0,
676 (bip->bli_formats[i].blf_map_size * sizeof(uint))); 678 (bip->bli_formats[i].blf_map_size * sizeof(uint)));
@@ -702,12 +704,13 @@ xfs_trans_inode_buf(
702 ASSERT(atomic_read(&bip->bli_refcount) > 0); 704 ASSERT(atomic_read(&bip->bli_refcount) > 0);
703 705
704 bip->bli_flags |= XFS_BLI_INODE_BUF; 706 bip->bli_flags |= XFS_BLI_INODE_BUF;
707 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
705} 708}
706 709
707/* 710/*
708 * This call is used to indicate that the buffer is going to 711 * This call is used to indicate that the buffer is going to
709 * be staled and was an inode buffer. This means it gets 712 * be staled and was an inode buffer. This means it gets
710 * special processing during unpin - where any inodes 713 * special processing during unpin - where any inodes
711 * associated with the buffer should be removed from ail. 714 * associated with the buffer should be removed from ail.
712 * There is also special processing during recovery, 715 * There is also special processing during recovery,
713 * any replay of the inodes in the buffer needs to be 716 * any replay of the inodes in the buffer needs to be
@@ -726,6 +729,7 @@ xfs_trans_stale_inode_buf(
726 729
727 bip->bli_flags |= XFS_BLI_STALE_INODE; 730 bip->bli_flags |= XFS_BLI_STALE_INODE;
728 bip->bli_item.li_cb = xfs_buf_iodone; 731 bip->bli_item.li_cb = xfs_buf_iodone;
732 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
729} 733}
730 734
731/* 735/*
@@ -749,8 +753,43 @@ xfs_trans_inode_alloc_buf(
749 ASSERT(atomic_read(&bip->bli_refcount) > 0); 753 ASSERT(atomic_read(&bip->bli_refcount) > 0);
750 754
751 bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF; 755 bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
756 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
752} 757}
753 758
759/*
760 * Set the type of the buffer for log recovery so that it can correctly identify
761 * and hence attach the correct buffer ops to the buffer after replay.
762 */
763void
764xfs_trans_buf_set_type(
765 struct xfs_trans *tp,
766 struct xfs_buf *bp,
767 enum xfs_blft type)
768{
769 struct xfs_buf_log_item *bip = bp->b_fspriv;
770
771 if (!tp)
772 return;
773
774 ASSERT(bp->b_transp == tp);
775 ASSERT(bip != NULL);
776 ASSERT(atomic_read(&bip->bli_refcount) > 0);
777
778 xfs_blft_to_flags(&bip->__bli_format, type);
779}
780
781void
782xfs_trans_buf_copy_type(
783 struct xfs_buf *dst_bp,
784 struct xfs_buf *src_bp)
785{
786 struct xfs_buf_log_item *sbip = src_bp->b_fspriv;
787 struct xfs_buf_log_item *dbip = dst_bp->b_fspriv;
788 enum xfs_blft type;
789
790 type = xfs_blft_from_flags(&sbip->__bli_format);
791 xfs_blft_to_flags(&dbip->__bli_format, type);
792}
754 793
755/* 794/*
756 * Similar to xfs_trans_inode_buf(), this marks the buffer as a cluster of 795 * Similar to xfs_trans_inode_buf(), this marks the buffer as a cluster of
@@ -769,14 +808,28 @@ xfs_trans_dquot_buf(
769 xfs_buf_t *bp, 808 xfs_buf_t *bp,
770 uint type) 809 uint type)
771{ 810{
772 xfs_buf_log_item_t *bip = bp->b_fspriv; 811 struct xfs_buf_log_item *bip = bp->b_fspriv;
773 812
774 ASSERT(bp->b_transp == tp);
775 ASSERT(bip != NULL);
776 ASSERT(type == XFS_BLF_UDQUOT_BUF || 813 ASSERT(type == XFS_BLF_UDQUOT_BUF ||
777 type == XFS_BLF_PDQUOT_BUF || 814 type == XFS_BLF_PDQUOT_BUF ||
778 type == XFS_BLF_GDQUOT_BUF); 815 type == XFS_BLF_GDQUOT_BUF);
779 ASSERT(atomic_read(&bip->bli_refcount) > 0);
780 816
781 bip->__bli_format.blf_flags |= type; 817 bip->__bli_format.blf_flags |= type;
818
819 switch (type) {
820 case XFS_BLF_UDQUOT_BUF:
821 type = XFS_BLFT_UDQUOT_BUF;
822 break;
823 case XFS_BLF_PDQUOT_BUF:
824 type = XFS_BLFT_PDQUOT_BUF;
825 break;
826 case XFS_BLF_GDQUOT_BUF:
827 type = XFS_BLFT_GDQUOT_BUF;
828 break;
829 default:
830 type = XFS_BLFT_UNKNOWN_BUF;
831 break;
832 }
833
834 xfs_trans_buf_set_type(tp, bp, type);
782} 835}
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 642c2d6e1db1..fec75d023703 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -326,12 +326,12 @@ xfs_trans_dqlockedjoin(
326 */ 326 */
327void 327void
328xfs_trans_apply_dquot_deltas( 328xfs_trans_apply_dquot_deltas(
329 xfs_trans_t *tp) 329 struct xfs_trans *tp)
330{ 330{
331 int i, j; 331 int i, j;
332 xfs_dquot_t *dqp; 332 struct xfs_dquot *dqp;
333 xfs_dqtrx_t *qtrx, *qa; 333 struct xfs_dqtrx *qtrx, *qa;
334 xfs_disk_dquot_t *d; 334 struct xfs_disk_dquot *d;
335 long totalbdelta; 335 long totalbdelta;
336 long totalrtbdelta; 336 long totalrtbdelta;
337 337
@@ -412,7 +412,7 @@ xfs_trans_apply_dquot_deltas(
412 * Start/reset the timer(s) if needed. 412 * Start/reset the timer(s) if needed.
413 */ 413 */
414 if (d->d_id) { 414 if (d->d_id) {
415 xfs_qm_adjust_dqlimits(tp->t_mountp, d); 415 xfs_qm_adjust_dqlimits(tp->t_mountp, dqp);
416 xfs_qm_adjust_dqtimers(tp->t_mountp, d); 416 xfs_qm_adjust_dqtimers(tp->t_mountp, d);
417 } 417 }
418 418
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 77ad74834baa..1501f4fa51a6 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * Copyright (c) 2012 Red Hat, Inc.
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -48,103 +49,8 @@
48#include "xfs_vnodeops.h" 49#include "xfs_vnodeops.h"
49#include "xfs_trace.h" 50#include "xfs_trace.h"
50#include "xfs_icache.h" 51#include "xfs_icache.h"
52#include "xfs_symlink.h"
51 53
52/*
53 * The maximum pathlen is 1024 bytes. Since the minimum file system
54 * blocksize is 512 bytes, we can get a max of 2 extents back from
55 * bmapi.
56 */
57#define SYMLINK_MAPS 2
58
59STATIC int
60xfs_readlink_bmap(
61 xfs_inode_t *ip,
62 char *link)
63{
64 xfs_mount_t *mp = ip->i_mount;
65 int pathlen = ip->i_d.di_size;
66 int nmaps = SYMLINK_MAPS;
67 xfs_bmbt_irec_t mval[SYMLINK_MAPS];
68 xfs_daddr_t d;
69 int byte_cnt;
70 int n;
71 xfs_buf_t *bp;
72 int error = 0;
73
74 error = xfs_bmapi_read(ip, 0, XFS_B_TO_FSB(mp, pathlen), mval, &nmaps,
75 0);
76 if (error)
77 goto out;
78
79 for (n = 0; n < nmaps; n++) {
80 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
81 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
82
83 bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0, NULL);
84 if (!bp)
85 return XFS_ERROR(ENOMEM);
86 error = bp->b_error;
87 if (error) {
88 xfs_buf_ioerror_alert(bp, __func__);
89 xfs_buf_relse(bp);
90 goto out;
91 }
92 if (pathlen < byte_cnt)
93 byte_cnt = pathlen;
94 pathlen -= byte_cnt;
95
96 memcpy(link, bp->b_addr, byte_cnt);
97 xfs_buf_relse(bp);
98 }
99
100 link[ip->i_d.di_size] = '\0';
101 error = 0;
102
103 out:
104 return error;
105}
106
107int
108xfs_readlink(
109 xfs_inode_t *ip,
110 char *link)
111{
112 xfs_mount_t *mp = ip->i_mount;
113 xfs_fsize_t pathlen;
114 int error = 0;
115
116 trace_xfs_readlink(ip);
117
118 if (XFS_FORCED_SHUTDOWN(mp))
119 return XFS_ERROR(EIO);
120
121 xfs_ilock(ip, XFS_ILOCK_SHARED);
122
123 pathlen = ip->i_d.di_size;
124 if (!pathlen)
125 goto out;
126
127 if (pathlen < 0 || pathlen > MAXPATHLEN) {
128 xfs_alert(mp, "%s: inode (%llu) bad symlink length (%lld)",
129 __func__, (unsigned long long) ip->i_ino,
130 (long long) pathlen);
131 ASSERT(0);
132 error = XFS_ERROR(EFSCORRUPTED);
133 goto out;
134 }
135
136
137 if (ip->i_df.if_flags & XFS_IFINLINE) {
138 memcpy(link, ip->i_df.if_u1.if_data, pathlen);
139 link[pathlen] = '\0';
140 } else {
141 error = xfs_readlink_bmap(ip, link);
142 }
143
144 out:
145 xfs_iunlock(ip, XFS_ILOCK_SHARED);
146 return error;
147}
148 54
149/* 55/*
150 * This is called by xfs_inactive to free any blocks beyond eof 56 * This is called by xfs_inactive to free any blocks beyond eof
@@ -249,145 +155,6 @@ xfs_free_eofblocks(
249 return error; 155 return error;
250} 156}
251 157
252/*
253 * Free a symlink that has blocks associated with it.
254 */
255STATIC int
256xfs_inactive_symlink_rmt(
257 xfs_inode_t *ip,
258 xfs_trans_t **tpp)
259{
260 xfs_buf_t *bp;
261 int committed;
262 int done;
263 int error;
264 xfs_fsblock_t first_block;
265 xfs_bmap_free_t free_list;
266 int i;
267 xfs_mount_t *mp;
268 xfs_bmbt_irec_t mval[SYMLINK_MAPS];
269 int nmaps;
270 xfs_trans_t *ntp;
271 int size;
272 xfs_trans_t *tp;
273
274 tp = *tpp;
275 mp = ip->i_mount;
276 ASSERT(ip->i_d.di_size > XFS_IFORK_DSIZE(ip));
277 /*
278 * We're freeing a symlink that has some
279 * blocks allocated to it. Free the
280 * blocks here. We know that we've got
281 * either 1 or 2 extents and that we can
282 * free them all in one bunmapi call.
283 */
284 ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2);
285
286 /*
287 * Lock the inode, fix the size, and join it to the transaction.
288 * Hold it so in the normal path, we still have it locked for
289 * the second transaction. In the error paths we need it
290 * held so the cancel won't rele it, see below.
291 */
292 size = (int)ip->i_d.di_size;
293 ip->i_d.di_size = 0;
294 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
295 /*
296 * Find the block(s) so we can inval and unmap them.
297 */
298 done = 0;
299 xfs_bmap_init(&free_list, &first_block);
300 nmaps = ARRAY_SIZE(mval);
301 error = xfs_bmapi_read(ip, 0, XFS_B_TO_FSB(mp, size),
302 mval, &nmaps, 0);
303 if (error)
304 goto error0;
305 /*
306 * Invalidate the block(s).
307 */
308 for (i = 0; i < nmaps; i++) {
309 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
310 XFS_FSB_TO_DADDR(mp, mval[i].br_startblock),
311 XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0);
312 if (!bp) {
313 error = ENOMEM;
314 goto error1;
315 }
316 xfs_trans_binval(tp, bp);
317 }
318 /*
319 * Unmap the dead block(s) to the free_list.
320 */
321 if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps,
322 &first_block, &free_list, &done)))
323 goto error1;
324 ASSERT(done);
325 /*
326 * Commit the first transaction. This logs the EFI and the inode.
327 */
328 if ((error = xfs_bmap_finish(&tp, &free_list, &committed)))
329 goto error1;
330 /*
331 * The transaction must have been committed, since there were
332 * actually extents freed by xfs_bunmapi. See xfs_bmap_finish.
333 * The new tp has the extent freeing and EFDs.
334 */
335 ASSERT(committed);
336 /*
337 * The first xact was committed, so add the inode to the new one.
338 * Mark it dirty so it will be logged and moved forward in the log as
339 * part of every commit.
340 */
341 xfs_trans_ijoin(tp, ip, 0);
342 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
343 /*
344 * Get a new, empty transaction to return to our caller.
345 */
346 ntp = xfs_trans_dup(tp);
347 /*
348 * Commit the transaction containing extent freeing and EFDs.
349 * If we get an error on the commit here or on the reserve below,
350 * we need to unlock the inode since the new transaction doesn't
351 * have the inode attached.
352 */
353 error = xfs_trans_commit(tp, 0);
354 tp = ntp;
355 if (error) {
356 ASSERT(XFS_FORCED_SHUTDOWN(mp));
357 goto error0;
358 }
359 /*
360 * transaction commit worked ok so we can drop the extra ticket
361 * reference that we gained in xfs_trans_dup()
362 */
363 xfs_log_ticket_put(tp->t_ticket);
364
365 /*
366 * Remove the memory for extent descriptions (just bookkeeping).
367 */
368 if (ip->i_df.if_bytes)
369 xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK);
370 ASSERT(ip->i_df.if_bytes == 0);
371 /*
372 * Put an itruncate log reservation in the new transaction
373 * for our caller.
374 */
375 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
376 XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) {
377 ASSERT(XFS_FORCED_SHUTDOWN(mp));
378 goto error0;
379 }
380
381 xfs_trans_ijoin(tp, ip, 0);
382 *tpp = tp;
383 return 0;
384
385 error1:
386 xfs_bmap_cancel(&free_list);
387 error0:
388 return error;
389}
390
391int 158int
392xfs_release( 159xfs_release(
393 xfs_inode_t *ip) 160 xfs_inode_t *ip)
@@ -1353,247 +1120,6 @@ xfs_link(
1353} 1120}
1354 1121
1355int 1122int
1356xfs_symlink(
1357 xfs_inode_t *dp,
1358 struct xfs_name *link_name,
1359 const char *target_path,
1360 umode_t mode,
1361 xfs_inode_t **ipp)
1362{
1363 xfs_mount_t *mp = dp->i_mount;
1364 xfs_trans_t *tp;
1365 xfs_inode_t *ip;
1366 int error;
1367 int pathlen;
1368 xfs_bmap_free_t free_list;
1369 xfs_fsblock_t first_block;
1370 bool unlock_dp_on_error = false;
1371 uint cancel_flags;
1372 int committed;
1373 xfs_fileoff_t first_fsb;
1374 xfs_filblks_t fs_blocks;
1375 int nmaps;
1376 xfs_bmbt_irec_t mval[SYMLINK_MAPS];
1377 xfs_daddr_t d;
1378 const char *cur_chunk;
1379 int byte_cnt;
1380 int n;
1381 xfs_buf_t *bp;
1382 prid_t prid;
1383 struct xfs_dquot *udqp, *gdqp;
1384 uint resblks;
1385
1386 *ipp = NULL;
1387 error = 0;
1388 ip = NULL;
1389 tp = NULL;
1390
1391 trace_xfs_symlink(dp, link_name);
1392
1393 if (XFS_FORCED_SHUTDOWN(mp))
1394 return XFS_ERROR(EIO);
1395
1396 /*
1397 * Check component lengths of the target path name.
1398 */
1399 pathlen = strlen(target_path);
1400 if (pathlen >= MAXPATHLEN) /* total string too long */
1401 return XFS_ERROR(ENAMETOOLONG);
1402
1403 udqp = gdqp = NULL;
1404 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
1405 prid = xfs_get_projid(dp);
1406 else
1407 prid = XFS_PROJID_DEFAULT;
1408
1409 /*
1410 * Make sure that we have allocated dquot(s) on disk.
1411 */
1412 error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
1413 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
1414 if (error)
1415 goto std_return;
1416
1417 tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK);
1418 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
1419 /*
1420 * The symlink will fit into the inode data fork?
1421 * There can't be any attributes so we get the whole variable part.
1422 */
1423 if (pathlen <= XFS_LITINO(mp))
1424 fs_blocks = 0;
1425 else
1426 fs_blocks = XFS_B_TO_FSB(mp, pathlen);
1427 resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
1428 error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0,
1429 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
1430 if (error == ENOSPC && fs_blocks == 0) {
1431 resblks = 0;
1432 error = xfs_trans_reserve(tp, 0, XFS_SYMLINK_LOG_RES(mp), 0,
1433 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
1434 }
1435 if (error) {
1436 cancel_flags = 0;
1437 goto error_return;
1438 }
1439
1440 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
1441 unlock_dp_on_error = true;
1442
1443 /*
1444 * Check whether the directory allows new symlinks or not.
1445 */
1446 if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) {
1447 error = XFS_ERROR(EPERM);
1448 goto error_return;
1449 }
1450
1451 /*
1452 * Reserve disk quota : blocks and inode.
1453 */
1454 error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0);
1455 if (error)
1456 goto error_return;
1457
1458 /*
1459 * Check for ability to enter directory entry, if no space reserved.
1460 */
1461 error = xfs_dir_canenter(tp, dp, link_name, resblks);
1462 if (error)
1463 goto error_return;
1464 /*
1465 * Initialize the bmap freelist prior to calling either
1466 * bmapi or the directory create code.
1467 */
1468 xfs_bmap_init(&free_list, &first_block);
1469
1470 /*
1471 * Allocate an inode for the symlink.
1472 */
1473 error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
1474 prid, resblks > 0, &ip, NULL);
1475 if (error) {
1476 if (error == ENOSPC)
1477 goto error_return;
1478 goto error1;
1479 }
1480
1481 /*
1482 * An error after we've joined dp to the transaction will result in the
1483 * transaction cancel unlocking dp so don't do it explicitly in the
1484 * error path.
1485 */
1486 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
1487 unlock_dp_on_error = false;
1488
1489 /*
1490 * Also attach the dquot(s) to it, if applicable.
1491 */
1492 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp);
1493
1494 if (resblks)
1495 resblks -= XFS_IALLOC_SPACE_RES(mp);
1496 /*
1497 * If the symlink will fit into the inode, write it inline.
1498 */
1499 if (pathlen <= XFS_IFORK_DSIZE(ip)) {
1500 xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK);
1501 memcpy(ip->i_df.if_u1.if_data, target_path, pathlen);
1502 ip->i_d.di_size = pathlen;
1503
1504 /*
1505 * The inode was initially created in extent format.
1506 */
1507 ip->i_df.if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT);
1508 ip->i_df.if_flags |= XFS_IFINLINE;
1509
1510 ip->i_d.di_format = XFS_DINODE_FMT_LOCAL;
1511 xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
1512
1513 } else {
1514 first_fsb = 0;
1515 nmaps = SYMLINK_MAPS;
1516
1517 error = xfs_bmapi_write(tp, ip, first_fsb, fs_blocks,
1518 XFS_BMAPI_METADATA, &first_block, resblks,
1519 mval, &nmaps, &free_list);
1520 if (error)
1521 goto error2;
1522
1523 if (resblks)
1524 resblks -= fs_blocks;
1525 ip->i_d.di_size = pathlen;
1526 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1527
1528 cur_chunk = target_path;
1529 for (n = 0; n < nmaps; n++) {
1530 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
1531 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
1532 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
1533 BTOBB(byte_cnt), 0);
1534 if (!bp) {
1535 error = ENOMEM;
1536 goto error2;
1537 }
1538 if (pathlen < byte_cnt) {
1539 byte_cnt = pathlen;
1540 }
1541 pathlen -= byte_cnt;
1542
1543 memcpy(bp->b_addr, cur_chunk, byte_cnt);
1544 cur_chunk += byte_cnt;
1545
1546 xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1);
1547 }
1548 }
1549
1550 /*
1551 * Create the directory entry for the symlink.
1552 */
1553 error = xfs_dir_createname(tp, dp, link_name, ip->i_ino,
1554 &first_block, &free_list, resblks);
1555 if (error)
1556 goto error2;
1557 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
1558 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
1559
1560 /*
1561 * If this is a synchronous mount, make sure that the
1562 * symlink transaction goes to disk before returning to
1563 * the user.
1564 */
1565 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
1566 xfs_trans_set_sync(tp);
1567 }
1568
1569 error = xfs_bmap_finish(&tp, &free_list, &committed);
1570 if (error) {
1571 goto error2;
1572 }
1573 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1574 xfs_qm_dqrele(udqp);
1575 xfs_qm_dqrele(gdqp);
1576
1577 *ipp = ip;
1578 return 0;
1579
1580 error2:
1581 IRELE(ip);
1582 error1:
1583 xfs_bmap_cancel(&free_list);
1584 cancel_flags |= XFS_TRANS_ABORT;
1585 error_return:
1586 xfs_trans_cancel(tp, cancel_flags);
1587 xfs_qm_dqrele(udqp);
1588 xfs_qm_dqrele(gdqp);
1589
1590 if (unlock_dp_on_error)
1591 xfs_iunlock(dp, XFS_ILOCK_EXCL);
1592 std_return:
1593 return error;
1594}
1595
1596int
1597xfs_set_dmattrs( 1123xfs_set_dmattrs(
1598 xfs_inode_t *ip, 1124 xfs_inode_t *ip,
1599 u_int evmask, 1125 u_int evmask,