aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2013-05-21 04:02:08 -0400
committerBen Myers <bpm@sgi.com>2013-05-30 18:26:31 -0400
commit7bc0dc271e494e12be3afd3c6431e5216347c624 (patch)
tree9a33ad50512cc9bbdd13229c6cade51458fae148 /fs
parent634fd5322a3e6ae632dcf5f20eebc0583ba50838 (diff)
xfs: rework remote attr CRCs
Note: this changes the on-disk remote attribute format. I assert that this is OK to do as CRCs are marked experimental and the first kernel it is included in has not yet reached release yet. Further, the userspace utilities are still evolving and so anyone using this stuff right now is a developer or tester using volatile filesystems for testing this feature. Hence changing the format right now to save longer term pain is the right thing to do. The fundamental change is to move from a header per extent in the attribute to a header per filesytem block in the attribute. This means there are more header blocks and the parsing of the attribute data is slightly more complex, but it has the advantage that we always know the size of the attribute on disk based on the length of the data it contains. This is where the header-per-extent method has problems. We don't know the size of the attribute on disk without first knowing how many extents are used to hold it. And we can't tell from a mapping lookup, either, because remote attributes can be allocated contiguously with other attribute blocks and so there is no obvious way of determining the actual size of the atribute on disk short of walking and mapping buffers. The problem with this approach is that if we map a buffer incorrectly (e.g. we make the last buffer for the attribute data too long), we then get buffer cache lookup failure when we map it correctly. i.e. we get a size mismatch on lookup. This is not necessarily fatal, but it's a cache coherency problem that can lead to returning the wrong data to userspace or writing the wrong data to disk. And debug kernels will assert fail if this occurs. I found lots of niggly little problems trying to fix this issue on a 4k block size filesystem, finally getting it to pass with lots of fixes. The thing is, 1024 byte filesystems still failed, and it was getting really complex handling all the corner cases that were showing up. And there were clearly more that I hadn't found yet. It is complex, fragile code, and if we don't fix it now, it will be complex, fragile code forever more. Hence the simple fix is to add a header to each filesystem block. This gives us the same relationship between the attribute data length and the number of blocks on disk as we have without CRCs - it's a linear mapping and doesn't require us to guess anything. It is simple to implement, too - the remote block count calculated at lookup time can be used by the remote attribute set/get/remove code without modification for both CRC and non-CRC filesystems. The world becomes sane again. Because the copy-in and copy-out now need to iterate over each filesystem block, I moved them into helper functions so we separate the block mapping and buffer manupulations from the attribute data and CRC header manipulations. The code becomes much clearer as a result, and it is a lot easier to understand and debug. It also appears to be much more robust - once it worked on 4k block size filesystems, it has worked without failure on 1k block size filesystems, too. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Ben Myers <bpm@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com> (cherry picked from commit ad1858d77771172e08016890f0eb2faedec3ecee)
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/xfs_attr_leaf.c13
-rw-r--r--fs/xfs/xfs_attr_remote.c381
-rw-r--r--fs/xfs/xfs_attr_remote.h10
-rw-r--r--fs/xfs/xfs_buf.c1
4 files changed, 247 insertions, 158 deletions
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 5b03d15b707b..d788302e506a 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -1412,7 +1412,7 @@ xfs_attr3_leaf_add_work(
1412 name_rmt->valuelen = 0; 1412 name_rmt->valuelen = 0;
1413 name_rmt->valueblk = 0; 1413 name_rmt->valueblk = 0;
1414 args->rmtblkno = 1; 1414 args->rmtblkno = 1;
1415 args->rmtblkcnt = XFS_B_TO_FSB(mp, args->valuelen); 1415 args->rmtblkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
1416 } 1416 }
1417 xfs_trans_log_buf(args->trans, bp, 1417 xfs_trans_log_buf(args->trans, bp,
1418 XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index), 1418 XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index),
@@ -2354,8 +2354,9 @@ xfs_attr3_leaf_lookup_int(
2354 args->index = probe; 2354 args->index = probe;
2355 args->valuelen = be32_to_cpu(name_rmt->valuelen); 2355 args->valuelen = be32_to_cpu(name_rmt->valuelen);
2356 args->rmtblkno = be32_to_cpu(name_rmt->valueblk); 2356 args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
2357 args->rmtblkcnt = XFS_B_TO_FSB(args->dp->i_mount, 2357 args->rmtblkcnt = xfs_attr3_rmt_blocks(
2358 args->valuelen); 2358 args->dp->i_mount,
2359 args->valuelen);
2359 return XFS_ERROR(EEXIST); 2360 return XFS_ERROR(EEXIST);
2360 } 2361 }
2361 } 2362 }
@@ -2406,7 +2407,8 @@ xfs_attr3_leaf_getvalue(
2406 ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0); 2407 ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0);
2407 valuelen = be32_to_cpu(name_rmt->valuelen); 2408 valuelen = be32_to_cpu(name_rmt->valuelen);
2408 args->rmtblkno = be32_to_cpu(name_rmt->valueblk); 2409 args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
2409 args->rmtblkcnt = XFS_B_TO_FSB(args->dp->i_mount, valuelen); 2410 args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount,
2411 valuelen);
2410 if (args->flags & ATTR_KERNOVAL) { 2412 if (args->flags & ATTR_KERNOVAL) {
2411 args->valuelen = valuelen; 2413 args->valuelen = valuelen;
2412 return 0; 2414 return 0;
@@ -2732,7 +2734,8 @@ xfs_attr3_leaf_list_int(
2732 args.valuelen = valuelen; 2734 args.valuelen = valuelen;
2733 args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS); 2735 args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS);
2734 args.rmtblkno = be32_to_cpu(name_rmt->valueblk); 2736 args.rmtblkno = be32_to_cpu(name_rmt->valueblk);
2735 args.rmtblkcnt = XFS_B_TO_FSB(args.dp->i_mount, valuelen); 2737 args.rmtblkcnt = xfs_attr3_rmt_blocks(
2738 args.dp->i_mount, valuelen);
2736 retval = xfs_attr_rmtval_get(&args); 2739 retval = xfs_attr_rmtval_get(&args);
2737 if (retval) 2740 if (retval)
2738 return retval; 2741 return retval;
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c
index d8bcb2d742d1..ef6b0c124528 100644
--- a/fs/xfs/xfs_attr_remote.c
+++ b/fs/xfs/xfs_attr_remote.c
@@ -47,7 +47,7 @@
47 * Each contiguous block has a header, so it is not just a simple attribute 47 * Each contiguous block has a header, so it is not just a simple attribute
48 * length to FSB conversion. 48 * length to FSB conversion.
49 */ 49 */
50static int 50int
51xfs_attr3_rmt_blocks( 51xfs_attr3_rmt_blocks(
52 struct xfs_mount *mp, 52 struct xfs_mount *mp,
53 int attrlen) 53 int attrlen)
@@ -59,12 +59,43 @@ xfs_attr3_rmt_blocks(
59 return XFS_B_TO_FSB(mp, attrlen); 59 return XFS_B_TO_FSB(mp, attrlen);
60} 60}
61 61
62/*
63 * Checking of the remote attribute header is split into two parts. The verifier
64 * does CRC, location and bounds checking, the unpacking function checks the
65 * attribute parameters and owner.
66 */
67static bool
68xfs_attr3_rmt_hdr_ok(
69 struct xfs_mount *mp,
70 void *ptr,
71 xfs_ino_t ino,
72 uint32_t offset,
73 uint32_t size,
74 xfs_daddr_t bno)
75{
76 struct xfs_attr3_rmt_hdr *rmt = ptr;
77
78 if (bno != be64_to_cpu(rmt->rm_blkno))
79 return false;
80 if (offset != be32_to_cpu(rmt->rm_offset))
81 return false;
82 if (size != be32_to_cpu(rmt->rm_bytes))
83 return false;
84 if (ino != be64_to_cpu(rmt->rm_owner))
85 return false;
86
87 /* ok */
88 return true;
89}
90
62static bool 91static bool
63xfs_attr3_rmt_verify( 92xfs_attr3_rmt_verify(
64 struct xfs_buf *bp) 93 struct xfs_mount *mp,
94 void *ptr,
95 int fsbsize,
96 xfs_daddr_t bno)
65{ 97{
66 struct xfs_mount *mp = bp->b_target->bt_mount; 98 struct xfs_attr3_rmt_hdr *rmt = ptr;
67 struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
68 99
69 if (!xfs_sb_version_hascrc(&mp->m_sb)) 100 if (!xfs_sb_version_hascrc(&mp->m_sb))
70 return false; 101 return false;
@@ -72,7 +103,9 @@ xfs_attr3_rmt_verify(
72 return false; 103 return false;
73 if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_uuid)) 104 if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_uuid))
74 return false; 105 return false;
75 if (bp->b_bn != be64_to_cpu(rmt->rm_blkno)) 106 if (be64_to_cpu(rmt->rm_blkno) != bno)
107 return false;
108 if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))
76 return false; 109 return false;
77 if (be32_to_cpu(rmt->rm_offset) + 110 if (be32_to_cpu(rmt->rm_offset) +
78 be32_to_cpu(rmt->rm_bytes) >= XATTR_SIZE_MAX) 111 be32_to_cpu(rmt->rm_bytes) >= XATTR_SIZE_MAX)
@@ -88,17 +121,40 @@ xfs_attr3_rmt_read_verify(
88 struct xfs_buf *bp) 121 struct xfs_buf *bp)
89{ 122{
90 struct xfs_mount *mp = bp->b_target->bt_mount; 123 struct xfs_mount *mp = bp->b_target->bt_mount;
124 char *ptr;
125 int len;
126 bool corrupt = false;
127 xfs_daddr_t bno;
91 128
92 /* no verification of non-crc buffers */ 129 /* no verification of non-crc buffers */
93 if (!xfs_sb_version_hascrc(&mp->m_sb)) 130 if (!xfs_sb_version_hascrc(&mp->m_sb))
94 return; 131 return;
95 132
96 if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), 133 ptr = bp->b_addr;
97 XFS_ATTR3_RMT_CRC_OFF) || 134 bno = bp->b_bn;
98 !xfs_attr3_rmt_verify(bp)) { 135 len = BBTOB(bp->b_length);
136 ASSERT(len >= XFS_LBSIZE(mp));
137
138 while (len > 0) {
139 if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp),
140 XFS_ATTR3_RMT_CRC_OFF)) {
141 corrupt = true;
142 break;
143 }
144 if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
145 corrupt = true;
146 break;
147 }
148 len -= XFS_LBSIZE(mp);
149 ptr += XFS_LBSIZE(mp);
150 bno += mp->m_bsize;
151 }
152
153 if (corrupt) {
99 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); 154 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
100 xfs_buf_ioerror(bp, EFSCORRUPTED); 155 xfs_buf_ioerror(bp, EFSCORRUPTED);
101 } 156 } else
157 ASSERT(len == 0);
102} 158}
103 159
104static void 160static void
@@ -107,23 +163,39 @@ xfs_attr3_rmt_write_verify(
107{ 163{
108 struct xfs_mount *mp = bp->b_target->bt_mount; 164 struct xfs_mount *mp = bp->b_target->bt_mount;
109 struct xfs_buf_log_item *bip = bp->b_fspriv; 165 struct xfs_buf_log_item *bip = bp->b_fspriv;
166 char *ptr;
167 int len;
168 xfs_daddr_t bno;
110 169
111 /* no verification of non-crc buffers */ 170 /* no verification of non-crc buffers */
112 if (!xfs_sb_version_hascrc(&mp->m_sb)) 171 if (!xfs_sb_version_hascrc(&mp->m_sb))
113 return; 172 return;
114 173
115 if (!xfs_attr3_rmt_verify(bp)) { 174 ptr = bp->b_addr;
116 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); 175 bno = bp->b_bn;
117 xfs_buf_ioerror(bp, EFSCORRUPTED); 176 len = BBTOB(bp->b_length);
118 return; 177 ASSERT(len >= XFS_LBSIZE(mp));
119 } 178
179 while (len > 0) {
180 if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
181 XFS_CORRUPTION_ERROR(__func__,
182 XFS_ERRLEVEL_LOW, mp, bp->b_addr);
183 xfs_buf_ioerror(bp, EFSCORRUPTED);
184 return;
185 }
186 if (bip) {
187 struct xfs_attr3_rmt_hdr *rmt;
120 188
121 if (bip) { 189 rmt = (struct xfs_attr3_rmt_hdr *)ptr;
122 struct xfs_attr3_rmt_hdr *rmt = bp->b_addr; 190 rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn);
123 rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn); 191 }
192 xfs_update_cksum(ptr, XFS_LBSIZE(mp), XFS_ATTR3_RMT_CRC_OFF);
193
194 len -= XFS_LBSIZE(mp);
195 ptr += XFS_LBSIZE(mp);
196 bno += mp->m_bsize;
124 } 197 }
125 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), 198 ASSERT(len == 0);
126 XFS_ATTR3_RMT_CRC_OFF);
127} 199}
128 200
129const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = { 201const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
@@ -131,15 +203,16 @@ const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
131 .verify_write = xfs_attr3_rmt_write_verify, 203 .verify_write = xfs_attr3_rmt_write_verify,
132}; 204};
133 205
134static int 206STATIC int
135xfs_attr3_rmt_hdr_set( 207xfs_attr3_rmt_hdr_set(
136 struct xfs_mount *mp, 208 struct xfs_mount *mp,
209 void *ptr,
137 xfs_ino_t ino, 210 xfs_ino_t ino,
138 uint32_t offset, 211 uint32_t offset,
139 uint32_t size, 212 uint32_t size,
140 struct xfs_buf *bp) 213 xfs_daddr_t bno)
141{ 214{
142 struct xfs_attr3_rmt_hdr *rmt = bp->b_addr; 215 struct xfs_attr3_rmt_hdr *rmt = ptr;
143 216
144 if (!xfs_sb_version_hascrc(&mp->m_sb)) 217 if (!xfs_sb_version_hascrc(&mp->m_sb))
145 return 0; 218 return 0;
@@ -149,36 +222,107 @@ xfs_attr3_rmt_hdr_set(
149 rmt->rm_bytes = cpu_to_be32(size); 222 rmt->rm_bytes = cpu_to_be32(size);
150 uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_uuid); 223 uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_uuid);
151 rmt->rm_owner = cpu_to_be64(ino); 224 rmt->rm_owner = cpu_to_be64(ino);
152 rmt->rm_blkno = cpu_to_be64(bp->b_bn); 225 rmt->rm_blkno = cpu_to_be64(bno);
153 bp->b_ops = &xfs_attr3_rmt_buf_ops;
154 226
155 return sizeof(struct xfs_attr3_rmt_hdr); 227 return sizeof(struct xfs_attr3_rmt_hdr);
156} 228}
157 229
158/* 230/*
159 * Checking of the remote attribute header is split into two parts. the verifier 231 * Helper functions to copy attribute data in and out of the one disk extents
160 * does CRC, location and bounds checking, the unpacking function checks the
161 * attribute parameters and owner.
162 */ 232 */
163static bool 233STATIC int
164xfs_attr3_rmt_hdr_ok( 234xfs_attr_rmtval_copyout(
165 struct xfs_mount *mp, 235 struct xfs_mount *mp,
166 xfs_ino_t ino, 236 struct xfs_buf *bp,
167 uint32_t offset, 237 xfs_ino_t ino,
168 uint32_t size, 238 int *offset,
169 struct xfs_buf *bp) 239 int *valuelen,
240 char **dst)
170{ 241{
171 struct xfs_attr3_rmt_hdr *rmt = bp->b_addr; 242 char *src = bp->b_addr;
243 xfs_daddr_t bno = bp->b_bn;
244 int len = BBTOB(bp->b_length);
172 245
173 if (offset != be32_to_cpu(rmt->rm_offset)) 246 ASSERT(len >= XFS_LBSIZE(mp));
174 return false;
175 if (size != be32_to_cpu(rmt->rm_bytes))
176 return false;
177 if (ino != be64_to_cpu(rmt->rm_owner))
178 return false;
179 247
180 /* ok */ 248 while (len > 0 && *valuelen > 0) {
181 return true; 249 int hdr_size = 0;
250 int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, XFS_LBSIZE(mp));
251
252 byte_cnt = min_t(int, *valuelen, byte_cnt);
253
254 if (xfs_sb_version_hascrc(&mp->m_sb)) {
255 if (!xfs_attr3_rmt_hdr_ok(mp, src, ino, *offset,
256 byte_cnt, bno)) {
257 xfs_alert(mp,
258"remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
259 bno, *offset, byte_cnt, ino);
260 return EFSCORRUPTED;
261 }
262 hdr_size = sizeof(struct xfs_attr3_rmt_hdr);
263 }
264
265 memcpy(*dst, src + hdr_size, byte_cnt);
266
267 /* roll buffer forwards */
268 len -= XFS_LBSIZE(mp);
269 src += XFS_LBSIZE(mp);
270 bno += mp->m_bsize;
271
272 /* roll attribute data forwards */
273 *valuelen -= byte_cnt;
274 *dst += byte_cnt;
275 *offset += byte_cnt;
276 }
277 return 0;
278}
279
280STATIC void
281xfs_attr_rmtval_copyin(
282 struct xfs_mount *mp,
283 struct xfs_buf *bp,
284 xfs_ino_t ino,
285 int *offset,
286 int *valuelen,
287 char **src)
288{
289 char *dst = bp->b_addr;
290 xfs_daddr_t bno = bp->b_bn;
291 int len = BBTOB(bp->b_length);
292
293 ASSERT(len >= XFS_LBSIZE(mp));
294
295 while (len > 0 && *valuelen > 0) {
296 int hdr_size;
297 int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, XFS_LBSIZE(mp));
298
299 byte_cnt = min(*valuelen, byte_cnt);
300 hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset,
301 byte_cnt, bno);
302
303 memcpy(dst + hdr_size, *src, byte_cnt);
304
305 /*
306 * If this is the last block, zero the remainder of it.
307 * Check that we are actually the last block, too.
308 */
309 if (byte_cnt + hdr_size < XFS_LBSIZE(mp)) {
310 ASSERT(*valuelen - byte_cnt == 0);
311 ASSERT(len == XFS_LBSIZE(mp));
312 memset(dst + hdr_size + byte_cnt, 0,
313 XFS_LBSIZE(mp) - hdr_size - byte_cnt);
314 }
315
316 /* roll buffer forwards */
317 len -= XFS_LBSIZE(mp);
318 dst += XFS_LBSIZE(mp);
319 bno += mp->m_bsize;
320
321 /* roll attribute data forwards */
322 *valuelen -= byte_cnt;
323 *src += byte_cnt;
324 *offset += byte_cnt;
325 }
182} 326}
183 327
184/* 328/*
@@ -192,13 +336,12 @@ xfs_attr_rmtval_get(
192 struct xfs_bmbt_irec map[ATTR_RMTVALUE_MAPSIZE]; 336 struct xfs_bmbt_irec map[ATTR_RMTVALUE_MAPSIZE];
193 struct xfs_mount *mp = args->dp->i_mount; 337 struct xfs_mount *mp = args->dp->i_mount;
194 struct xfs_buf *bp; 338 struct xfs_buf *bp;
195 xfs_daddr_t dblkno;
196 xfs_dablk_t lblkno = args->rmtblkno; 339 xfs_dablk_t lblkno = args->rmtblkno;
197 void *dst = args->value; 340 char *dst = args->value;
198 int valuelen = args->valuelen; 341 int valuelen = args->valuelen;
199 int nmap; 342 int nmap;
200 int error; 343 int error;
201 int blkcnt; 344 int blkcnt = args->rmtblkcnt;
202 int i; 345 int i;
203 int offset = 0; 346 int offset = 0;
204 347
@@ -208,7 +351,6 @@ xfs_attr_rmtval_get(
208 351
209 while (valuelen > 0) { 352 while (valuelen > 0) {
210 nmap = ATTR_RMTVALUE_MAPSIZE; 353 nmap = ATTR_RMTVALUE_MAPSIZE;
211 blkcnt = xfs_attr3_rmt_blocks(mp, valuelen);
212 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, 354 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
213 blkcnt, map, &nmap, 355 blkcnt, map, &nmap,
214 XFS_BMAPI_ATTRFORK); 356 XFS_BMAPI_ATTRFORK);
@@ -217,45 +359,29 @@ xfs_attr_rmtval_get(
217 ASSERT(nmap >= 1); 359 ASSERT(nmap >= 1);
218 360
219 for (i = 0; (i < nmap) && (valuelen > 0); i++) { 361 for (i = 0; (i < nmap) && (valuelen > 0); i++) {
220 int byte_cnt; 362 xfs_daddr_t dblkno;
221 char *src; 363 int dblkcnt;
222 364
223 ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) && 365 ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) &&
224 (map[i].br_startblock != HOLESTARTBLOCK)); 366 (map[i].br_startblock != HOLESTARTBLOCK));
225 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); 367 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
226 blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); 368 dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
227 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, 369 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
228 dblkno, blkcnt, 0, &bp, 370 dblkno, dblkcnt, 0, &bp,
229 &xfs_attr3_rmt_buf_ops); 371 &xfs_attr3_rmt_buf_ops);
230 if (error) 372 if (error)
231 return error; 373 return error;
232 374
233 byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, BBTOB(bp->b_length)); 375 error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino,
234 byte_cnt = min_t(int, valuelen, byte_cnt); 376 &offset, &valuelen,
235 377 &dst);
236 src = bp->b_addr;
237 if (xfs_sb_version_hascrc(&mp->m_sb)) {
238 if (!xfs_attr3_rmt_hdr_ok(mp, args->dp->i_ino,
239 offset, byte_cnt, bp)) {
240 xfs_alert(mp,
241"remote attribute header does not match required off/len/owner (0x%x/Ox%x,0x%llx)",
242 offset, byte_cnt, args->dp->i_ino);
243 xfs_buf_relse(bp);
244 return EFSCORRUPTED;
245
246 }
247
248 src += sizeof(struct xfs_attr3_rmt_hdr);
249 }
250
251 memcpy(dst, src, byte_cnt);
252 xfs_buf_relse(bp); 378 xfs_buf_relse(bp);
379 if (error)
380 return error;
253 381
254 offset += byte_cnt; 382 /* roll attribute extent map forwards */
255 dst += byte_cnt;
256 valuelen -= byte_cnt;
257
258 lblkno += map[i].br_blockcount; 383 lblkno += map[i].br_blockcount;
384 blkcnt -= map[i].br_blockcount;
259 } 385 }
260 } 386 }
261 ASSERT(valuelen == 0); 387 ASSERT(valuelen == 0);
@@ -273,17 +399,13 @@ xfs_attr_rmtval_set(
273 struct xfs_inode *dp = args->dp; 399 struct xfs_inode *dp = args->dp;
274 struct xfs_mount *mp = dp->i_mount; 400 struct xfs_mount *mp = dp->i_mount;
275 struct xfs_bmbt_irec map; 401 struct xfs_bmbt_irec map;
276 struct xfs_buf *bp;
277 xfs_daddr_t dblkno;
278 xfs_dablk_t lblkno; 402 xfs_dablk_t lblkno;
279 xfs_fileoff_t lfileoff = 0; 403 xfs_fileoff_t lfileoff = 0;
280 void *src = args->value; 404 char *src = args->value;
281 int blkcnt; 405 int blkcnt;
282 int valuelen; 406 int valuelen;
283 int nmap; 407 int nmap;
284 int error; 408 int error;
285 int hdrcnt = 0;
286 bool crcs = xfs_sb_version_hascrc(&mp->m_sb);
287 int offset = 0; 409 int offset = 0;
288 410
289 trace_xfs_attr_rmtval_set(args); 411 trace_xfs_attr_rmtval_set(args);
@@ -292,21 +414,14 @@ xfs_attr_rmtval_set(
292 * Find a "hole" in the attribute address space large enough for 414 * Find a "hole" in the attribute address space large enough for
293 * us to drop the new attribute's value into. Because CRC enable 415 * us to drop the new attribute's value into. Because CRC enable
294 * attributes have headers, we can't just do a straight byte to FSB 416 * attributes have headers, we can't just do a straight byte to FSB
295 * conversion. We calculate the worst case block count in this case 417 * conversion and have to take the header space into account.
296 * and we may not need that many, so we have to handle this when
297 * allocating the blocks below.
298 */ 418 */
299 blkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen); 419 blkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
300
301 error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff, 420 error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
302 XFS_ATTR_FORK); 421 XFS_ATTR_FORK);
303 if (error) 422 if (error)
304 return error; 423 return error;
305 424
306 /* Start with the attribute data. We'll allocate the rest afterwards. */
307 if (crcs)
308 blkcnt = XFS_B_TO_FSB(mp, args->valuelen);
309
310 args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff; 425 args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff;
311 args->rmtblkcnt = blkcnt; 426 args->rmtblkcnt = blkcnt;
312 427
@@ -349,31 +464,6 @@ xfs_attr_rmtval_set(
349 (map.br_startblock != HOLESTARTBLOCK)); 464 (map.br_startblock != HOLESTARTBLOCK));
350 lblkno += map.br_blockcount; 465 lblkno += map.br_blockcount;
351 blkcnt -= map.br_blockcount; 466 blkcnt -= map.br_blockcount;
352 hdrcnt++;
353
354 /*
355 * If we have enough blocks for the attribute data, calculate
356 * how many extra blocks we need for headers. We might run
357 * through this multiple times in the case that the additional
358 * headers in the blocks needed for the data fragments spills
359 * into requiring more blocks. e.g. for 512 byte blocks, we'll
360 * spill for another block every 9 headers we require in this
361 * loop.
362 *
363 * Note that this can result in contiguous allocation of blocks,
364 * so we don't use all the space we allocate for headers as we
365 * have one less header for each contiguous allocation that
366 * occurs in the map/write loop below.
367 */
368 if (crcs && blkcnt == 0) {
369 int total_len;
370
371 total_len = args->valuelen +
372 hdrcnt * sizeof(struct xfs_attr3_rmt_hdr);
373 blkcnt = XFS_B_TO_FSB(mp, total_len);
374 blkcnt -= args->rmtblkcnt;
375 args->rmtblkcnt += blkcnt;
376 }
377 467
378 /* 468 /*
379 * Start the next trans in the chain. 469 * Start the next trans in the chain.
@@ -390,17 +480,15 @@ xfs_attr_rmtval_set(
390 * the INCOMPLETE flag. 480 * the INCOMPLETE flag.
391 */ 481 */
392 lblkno = args->rmtblkno; 482 lblkno = args->rmtblkno;
393 valuelen = args->valuelen;
394 blkcnt = args->rmtblkcnt; 483 blkcnt = args->rmtblkcnt;
484 valuelen = args->valuelen;
395 while (valuelen > 0) { 485 while (valuelen > 0) {
396 int byte_cnt; 486 struct xfs_buf *bp;
397 int hdr_size; 487 xfs_daddr_t dblkno;
398 int dblkcnt; 488 int dblkcnt;
399 char *buf; 489
490 ASSERT(blkcnt > 0);
400 491
401 /*
402 * Try to remember where we decided to put the value.
403 */
404 xfs_bmap_init(args->flist, args->firstblock); 492 xfs_bmap_init(args->flist, args->firstblock);
405 nmap = 1; 493 nmap = 1;
406 error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno, 494 error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
@@ -419,29 +507,17 @@ xfs_attr_rmtval_set(
419 if (!bp) 507 if (!bp)
420 return ENOMEM; 508 return ENOMEM;
421 bp->b_ops = &xfs_attr3_rmt_buf_ops; 509 bp->b_ops = &xfs_attr3_rmt_buf_ops;
422 buf = bp->b_addr;
423
424 byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, BBTOB(bp->b_length));
425 byte_cnt = min_t(int, valuelen, byte_cnt);
426 hdr_size = xfs_attr3_rmt_hdr_set(mp, dp->i_ino, offset,
427 byte_cnt, bp);
428 ASSERT(hdr_size + byte_cnt <= BBTOB(bp->b_length));
429 510
430 memcpy(buf + hdr_size, src, byte_cnt); 511 xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset,
431 512 &valuelen, &src);
432 if (byte_cnt + hdr_size < BBTOB(bp->b_length))
433 xfs_buf_zero(bp, byte_cnt + hdr_size,
434 BBTOB(bp->b_length) - byte_cnt - hdr_size);
435 513
436 error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */ 514 error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */
437 xfs_buf_relse(bp); 515 xfs_buf_relse(bp);
438 if (error) 516 if (error)
439 return error; 517 return error;
440 518
441 src += byte_cnt;
442 valuelen -= byte_cnt;
443 offset += byte_cnt;
444 519
520 /* roll attribute extent map forwards */
445 lblkno += map.br_blockcount; 521 lblkno += map.br_blockcount;
446 blkcnt -= map.br_blockcount; 522 blkcnt -= map.br_blockcount;
447 } 523 }
@@ -454,19 +530,17 @@ xfs_attr_rmtval_set(
454 * out-of-line buffer that it is stored on. 530 * out-of-line buffer that it is stored on.
455 */ 531 */
456int 532int
457xfs_attr_rmtval_remove(xfs_da_args_t *args) 533xfs_attr_rmtval_remove(
534 struct xfs_da_args *args)
458{ 535{
459 xfs_mount_t *mp; 536 struct xfs_mount *mp = args->dp->i_mount;
460 xfs_bmbt_irec_t map; 537 xfs_dablk_t lblkno;
461 xfs_buf_t *bp; 538 int blkcnt;
462 xfs_daddr_t dblkno; 539 int error;
463 xfs_dablk_t lblkno; 540 int done;
464 int valuelen, blkcnt, nmap, error, done, committed;
465 541
466 trace_xfs_attr_rmtval_remove(args); 542 trace_xfs_attr_rmtval_remove(args);
467 543
468 mp = args->dp->i_mount;
469
470 /* 544 /*
471 * Roll through the "value", invalidating the attribute value's blocks. 545 * Roll through the "value", invalidating the attribute value's blocks.
472 * Note that args->rmtblkcnt is the minimum number of data blocks we'll 546 * Note that args->rmtblkcnt is the minimum number of data blocks we'll
@@ -476,10 +550,13 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
476 * lookups. 550 * lookups.
477 */ 551 */
478 lblkno = args->rmtblkno; 552 lblkno = args->rmtblkno;
479 valuelen = args->valuelen; 553 blkcnt = args->rmtblkcnt;
480 blkcnt = xfs_attr3_rmt_blocks(mp, valuelen); 554 while (blkcnt > 0) {
481 while (valuelen > 0) { 555 struct xfs_bmbt_irec map;
482 int dblkcnt; 556 struct xfs_buf *bp;
557 xfs_daddr_t dblkno;
558 int dblkcnt;
559 int nmap;
483 560
484 /* 561 /*
485 * Try to remember where we decided to put the value. 562 * Try to remember where we decided to put the value.
@@ -506,21 +583,19 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
506 bp = NULL; 583 bp = NULL;
507 } 584 }
508 585
509 valuelen -= XFS_ATTR3_RMT_BUF_SPACE(mp,
510 XFS_FSB_TO_B(mp, map.br_blockcount));
511
512 lblkno += map.br_blockcount; 586 lblkno += map.br_blockcount;
513 blkcnt -= map.br_blockcount; 587 blkcnt -= map.br_blockcount;
514 blkcnt = max(blkcnt, xfs_attr3_rmt_blocks(mp, valuelen));
515 } 588 }
516 589
517 /* 590 /*
518 * Keep de-allocating extents until the remote-value region is gone. 591 * Keep de-allocating extents until the remote-value region is gone.
519 */ 592 */
520 blkcnt = lblkno - args->rmtblkno;
521 lblkno = args->rmtblkno; 593 lblkno = args->rmtblkno;
594 blkcnt = args->rmtblkcnt;
522 done = 0; 595 done = 0;
523 while (!done) { 596 while (!done) {
597 int committed;
598
524 xfs_bmap_init(args->flist, args->firstblock); 599 xfs_bmap_init(args->flist, args->firstblock);
525 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt, 600 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
526 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, 601 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
diff --git a/fs/xfs/xfs_attr_remote.h b/fs/xfs/xfs_attr_remote.h
index c7cca60a062a..92a8fd7977cc 100644
--- a/fs/xfs/xfs_attr_remote.h
+++ b/fs/xfs/xfs_attr_remote.h
@@ -20,6 +20,14 @@
20 20
21#define XFS_ATTR3_RMT_MAGIC 0x5841524d /* XARM */ 21#define XFS_ATTR3_RMT_MAGIC 0x5841524d /* XARM */
22 22
23/*
24 * There is one of these headers per filesystem block in a remote attribute.
25 * This is done to ensure there is a 1:1 mapping between the attribute value
26 * length and the number of blocks needed to store the attribute. This makes the
27 * verification of a buffer a little more complex, but greatly simplifies the
28 * allocation, reading and writing of these attributes as we don't have to guess
29 * the number of blocks needed to store the attribute data.
30 */
23struct xfs_attr3_rmt_hdr { 31struct xfs_attr3_rmt_hdr {
24 __be32 rm_magic; 32 __be32 rm_magic;
25 __be32 rm_offset; 33 __be32 rm_offset;
@@ -39,6 +47,8 @@ struct xfs_attr3_rmt_hdr {
39 47
40extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops; 48extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops;
41 49
50int xfs_attr3_rmt_blocks(struct xfs_mount *mp, int attrlen);
51
42int xfs_attr_rmtval_get(struct xfs_da_args *args); 52int xfs_attr_rmtval_get(struct xfs_da_args *args);
43int xfs_attr_rmtval_set(struct xfs_da_args *args); 53int xfs_attr_rmtval_set(struct xfs_da_args *args);
44int xfs_attr_rmtval_remove(struct xfs_da_args *args); 54int xfs_attr_rmtval_remove(struct xfs_da_args *args);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 0d2554299688..1b2472a46e46 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -513,6 +513,7 @@ _xfs_buf_find(
513 xfs_alert(btp->bt_mount, 513 xfs_alert(btp->bt_mount,
514 "%s: Block out of range: block 0x%llx, EOFS 0x%llx ", 514 "%s: Block out of range: block 0x%llx, EOFS 0x%llx ",
515 __func__, blkno, eofs); 515 __func__, blkno, eofs);
516 WARN_ON(1);
516 return NULL; 517 return NULL;
517 } 518 }
518 519