aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_attr_remote.c
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2013-04-03 01:11:28 -0400
committerBen Myers <bpm@sgi.com>2013-04-27 13:58:53 -0400
commitd2e448d5fdebdcda93ed171339a3d864f65c227e (patch)
tree1d0681b4bf9fd79a38de7f1a1b3328e9dd89db74 /fs/xfs/xfs_attr_remote.c
parent95920cd6ce1c9cd8d3a0f639a674aa26c974ed57 (diff)
xfs: add CRC protection to remote attributes
There are two ways of doing this - the first is to add a CRC to the remote attribute entry in the attribute block. The second is to treat them similar to the remote symlink, where each fragment has it's own header and identifies fragment location in the attribute. The problem with the CRC in the remote attr entry is that we cannot identify the owner of the metadata from the metadata blocks themselves, or where the blocks fit into the remote attribute. The down side to this approach is that we never know when the attribute has been read from disk or not and so we have to verify it every time it is read, and we must calculate it during the create transaction and log it. We do not log CRCs for any other metadata, and so this creates a unique set of coherency problems that, in general, are best avoided. Adding an identifying header to each allocated block allows us to identify each fragment and where in the attribute it is located. It enables us to rebuild the remote attribute from just the raw blocks containing the attribute. It also provides us to do per-block CRCs verification at IO time rather than during the transaction context that creates it or every time it is read into a user buffer. Hence it avoids all the problems that an external, logged CRC has, and provides all the benefits of self identifying metadata. The only complexity is that we have to add a header per fragment, and we don't know how many fragments will be needed prior to allocations. If we take the symlink example, the header is 56 bytes and hence for a 4k block size filesystem, in the worst case 16 headers requires 1 extra block for the 64k attribute data. For 512 byte filesystems the worst case is an extra block for every 9 fragments (i.e. 16 extra blocks in the worse case). This will be very rare and so it's not really a major concern. Because allocation is done in two steps - the first finds a hole large enough in the attribute file, the second does the allocation - we only need to find a hole big enough for a worst case allocation. We only need to allocate enough extra blocks for number of headers required by the fragments, and we can calculate that as we go.... Hence it really only makes sense to use the same model as for symlinks - it doesn't add that much complexity, does not require an attribute tree format change, and does not require logging calculated CRC values. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Ben Myers <bpm@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_attr_remote.c')
-rw-r--r--fs/xfs/xfs_attr_remote.c324
1 files changed, 273 insertions, 51 deletions
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c
index d0d67e935262..53da46b46c2f 100644
--- a/fs/xfs/xfs_attr_remote.c
+++ b/fs/xfs/xfs_attr_remote.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * Copyright (c) 2013 Red Hat, Inc.
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -37,63 +38,232 @@
37#include "xfs_attr_remote.h" 38#include "xfs_attr_remote.h"
38#include "xfs_trans_space.h" 39#include "xfs_trans_space.h"
39#include "xfs_trace.h" 40#include "xfs_trace.h"
40 41#include "xfs_cksum.h"
42#include "xfs_buf_item.h"
41 43
42#define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ 44#define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */
43 45
44/* 46/*
47 * Each contiguous block has a header, so it is not just a simple attribute
48 * length to FSB conversion.
49 */
50static int
51xfs_attr3_rmt_blocks(
52 struct xfs_mount *mp,
53 int attrlen)
54{
55 int fsblocks = 0;
56 int len = attrlen;
57
58 do {
59 fsblocks++;
60 len -= XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
61 } while (len > 0);
62
63 return fsblocks;
64}
65
66static bool
67xfs_attr3_rmt_verify(
68 struct xfs_buf *bp)
69{
70 struct xfs_mount *mp = bp->b_target->bt_mount;
71 struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
72
73 if (!xfs_sb_version_hascrc(&mp->m_sb))
74 return false;
75 if (rmt->rm_magic != cpu_to_be32(XFS_ATTR3_RMT_MAGIC))
76 return false;
77 if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_uuid))
78 return false;
79 if (bp->b_bn != be64_to_cpu(rmt->rm_blkno))
80 return false;
81 if (be32_to_cpu(rmt->rm_offset) +
82 be32_to_cpu(rmt->rm_bytes) >= MAXPATHLEN)
83 return false;
84 if (rmt->rm_owner == 0)
85 return false;
86
87 return true;
88}
89
90static void
91xfs_attr3_rmt_read_verify(
92 struct xfs_buf *bp)
93{
94 struct xfs_mount *mp = bp->b_target->bt_mount;
95
96 /* no verification of non-crc buffers */
97 if (!xfs_sb_version_hascrc(&mp->m_sb))
98 return;
99
100 if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
101 XFS_ATTR3_RMT_CRC_OFF) ||
102 !xfs_attr3_rmt_verify(bp)) {
103 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
104 xfs_buf_ioerror(bp, EFSCORRUPTED);
105 }
106}
107
108static void
109xfs_attr3_rmt_write_verify(
110 struct xfs_buf *bp)
111{
112 struct xfs_mount *mp = bp->b_target->bt_mount;
113 struct xfs_buf_log_item *bip = bp->b_fspriv;
114
115 /* no verification of non-crc buffers */
116 if (!xfs_sb_version_hascrc(&mp->m_sb))
117 return;
118
119 if (!xfs_attr3_rmt_verify(bp)) {
120 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
121 xfs_buf_ioerror(bp, EFSCORRUPTED);
122 return;
123 }
124
125 if (bip) {
126 struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
127 rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn);
128 }
129 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
130 XFS_ATTR3_RMT_CRC_OFF);
131}
132
133const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
134 .verify_read = xfs_attr3_rmt_read_verify,
135 .verify_write = xfs_attr3_rmt_write_verify,
136};
137
138static int
139xfs_attr3_rmt_hdr_set(
140 struct xfs_mount *mp,
141 xfs_ino_t ino,
142 uint32_t offset,
143 uint32_t size,
144 struct xfs_buf *bp)
145{
146 struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
147
148 if (!xfs_sb_version_hascrc(&mp->m_sb))
149 return 0;
150
151 rmt->rm_magic = cpu_to_be32(XFS_ATTR3_RMT_MAGIC);
152 rmt->rm_offset = cpu_to_be32(offset);
153 rmt->rm_bytes = cpu_to_be32(size);
154 uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_uuid);
155 rmt->rm_owner = cpu_to_be64(ino);
156 rmt->rm_blkno = cpu_to_be64(bp->b_bn);
157 bp->b_ops = &xfs_attr3_rmt_buf_ops;
158
159 return sizeof(struct xfs_attr3_rmt_hdr);
160}
161
162/*
163 * Checking of the remote attribute header is split into two parts. the verifier
164 * does CRC, location and bounds checking, the unpacking function checks the
165 * attribute parameters and owner.
166 */
167static bool
168xfs_attr3_rmt_hdr_ok(
169 struct xfs_mount *mp,
170 xfs_ino_t ino,
171 uint32_t offset,
172 uint32_t size,
173 struct xfs_buf *bp)
174{
175 struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
176
177 if (offset != be32_to_cpu(rmt->rm_offset))
178 return false;
179 if (size != be32_to_cpu(rmt->rm_bytes))
180 return false;
181 if (ino != be64_to_cpu(rmt->rm_owner))
182 return false;
183
184 /* ok */
185 return true;
186
187}
188
189/*
45 * Read the value associated with an attribute from the out-of-line buffer 190 * Read the value associated with an attribute from the out-of-line buffer
46 * that we stored it in. 191 * that we stored it in.
47 */ 192 */
48int 193int
49xfs_attr_rmtval_get(xfs_da_args_t *args) 194xfs_attr_rmtval_get(
195 struct xfs_da_args *args)
50{ 196{
51 xfs_bmbt_irec_t map[ATTR_RMTVALUE_MAPSIZE]; 197 struct xfs_bmbt_irec map[ATTR_RMTVALUE_MAPSIZE];
52 xfs_mount_t *mp; 198 struct xfs_mount *mp = args->dp->i_mount;
53 xfs_daddr_t dblkno; 199 struct xfs_buf *bp;
54 void *dst; 200 xfs_daddr_t dblkno;
55 xfs_buf_t *bp; 201 xfs_dablk_t lblkno = args->rmtblkno;
56 int nmap, error, tmp, valuelen, blkcnt, i; 202 void *dst = args->value;
57 xfs_dablk_t lblkno; 203 int valuelen = args->valuelen;
204 int nmap;
205 int error;
206 int blkcnt;
207 int i;
208 int offset = 0;
58 209
59 trace_xfs_attr_rmtval_get(args); 210 trace_xfs_attr_rmtval_get(args);
60 211
61 ASSERT(!(args->flags & ATTR_KERNOVAL)); 212 ASSERT(!(args->flags & ATTR_KERNOVAL));
62 213
63 mp = args->dp->i_mount;
64 dst = args->value;
65 valuelen = args->valuelen;
66 lblkno = args->rmtblkno;
67 while (valuelen > 0) { 214 while (valuelen > 0) {
68 nmap = ATTR_RMTVALUE_MAPSIZE; 215 nmap = ATTR_RMTVALUE_MAPSIZE;
69 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, 216 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
70 args->rmtblkcnt, map, &nmap, 217 args->rmtblkcnt, map, &nmap,
71 XFS_BMAPI_ATTRFORK); 218 XFS_BMAPI_ATTRFORK);
72 if (error) 219 if (error)
73 return(error); 220 return error;
74 ASSERT(nmap >= 1); 221 ASSERT(nmap >= 1);
75 222
76 for (i = 0; (i < nmap) && (valuelen > 0); i++) { 223 for (i = 0; (i < nmap) && (valuelen > 0); i++) {
224 int byte_cnt;
225 char *src;
226
77 ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) && 227 ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) &&
78 (map[i].br_startblock != HOLESTARTBLOCK)); 228 (map[i].br_startblock != HOLESTARTBLOCK));
79 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); 229 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
80 blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); 230 blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
81 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, 231 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
82 dblkno, blkcnt, 0, &bp, NULL); 232 dblkno, blkcnt, 0, &bp,
233 &xfs_attr3_rmt_buf_ops);
83 if (error) 234 if (error)
84 return(error); 235 return error;
236
237 byte_cnt = min_t(int, valuelen, BBTOB(bp->b_length));
238 byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, byte_cnt);
85 239
86 tmp = min_t(int, valuelen, BBTOB(bp->b_length)); 240 src = bp->b_addr;
87 xfs_buf_iomove(bp, 0, tmp, dst, XBRW_READ); 241 if (xfs_sb_version_hascrc(&mp->m_sb)) {
242 if (!xfs_attr3_rmt_hdr_ok(mp, args->dp->i_ino,
243 offset, byte_cnt, bp)) {
244 xfs_alert(mp,
245"remote attribute header does not match required off/len/owner (0x%x/Ox%x,0x%llx)",
246 offset, byte_cnt, args->dp->i_ino);
247 xfs_buf_relse(bp);
248 return EFSCORRUPTED;
249
250 }
251
252 src += sizeof(struct xfs_attr3_rmt_hdr);
253 }
254
255 memcpy(dst, src, byte_cnt);
88 xfs_buf_relse(bp); 256 xfs_buf_relse(bp);
89 dst += tmp; 257
90 valuelen -= tmp; 258 offset += byte_cnt;
259 dst += byte_cnt;
260 valuelen -= byte_cnt;
91 261
92 lblkno += map[i].br_blockcount; 262 lblkno += map[i].br_blockcount;
93 } 263 }
94 } 264 }
95 ASSERT(valuelen == 0); 265 ASSERT(valuelen == 0);
96 return(0); 266 return 0;
97} 267}
98 268
99/* 269/*
@@ -101,35 +271,49 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
101 * that we have defined for it. 271 * that we have defined for it.
102 */ 272 */
103int 273int
104xfs_attr_rmtval_set(xfs_da_args_t *args) 274xfs_attr_rmtval_set(
275 struct xfs_da_args *args)
105{ 276{
106 xfs_mount_t *mp; 277 struct xfs_inode *dp = args->dp;
107 xfs_fileoff_t lfileoff; 278 struct xfs_mount *mp = dp->i_mount;
108 xfs_inode_t *dp; 279 struct xfs_bmbt_irec map;
109 xfs_bmbt_irec_t map; 280 struct xfs_buf *bp;
110 xfs_daddr_t dblkno; 281 xfs_daddr_t dblkno;
111 void *src; 282 xfs_dablk_t lblkno;
112 xfs_buf_t *bp; 283 xfs_fileoff_t lfileoff = 0;
113 xfs_dablk_t lblkno; 284 void *src = args->value;
114 int blkcnt, valuelen, nmap, error, tmp, committed; 285 int blkcnt;
286 int valuelen;
287 int nmap;
288 int error;
289 int hdrcnt = 0;
290 bool crcs = xfs_sb_version_hascrc(&mp->m_sb);
291 int offset = 0;
115 292
116 trace_xfs_attr_rmtval_set(args); 293 trace_xfs_attr_rmtval_set(args);
117 294
118 dp = args->dp;
119 mp = dp->i_mount;
120 src = args->value;
121
122 /* 295 /*
123 * Find a "hole" in the attribute address space large enough for 296 * Find a "hole" in the attribute address space large enough for
124 * us to drop the new attribute's value into. 297 * us to drop the new attribute's value into. Because CRC enable
298 * attributes have headers, we can't just do a straight byte to FSB
299 * conversion. We calculate the worst case block count in this case
300 * and we may not need that many, so we have to handle this when
301 * allocating the blocks below.
125 */ 302 */
126 blkcnt = XFS_B_TO_FSB(mp, args->valuelen); 303 if (!crcs)
127 lfileoff = 0; 304 blkcnt = XFS_B_TO_FSB(mp, args->valuelen);
305 else
306 blkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
307
128 error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff, 308 error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
129 XFS_ATTR_FORK); 309 XFS_ATTR_FORK);
130 if (error) { 310 if (error)
131 return(error); 311 return error;
132 } 312
313 /* Start with the attribute data. We'll allocate the rest afterwards. */
314 if (crcs)
315 blkcnt = XFS_B_TO_FSB(mp, args->valuelen);
316
133 args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff; 317 args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff;
134 args->rmtblkcnt = blkcnt; 318 args->rmtblkcnt = blkcnt;
135 319
@@ -137,6 +321,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
137 * Roll through the "value", allocating blocks on disk as required. 321 * Roll through the "value", allocating blocks on disk as required.
138 */ 322 */
139 while (blkcnt > 0) { 323 while (blkcnt > 0) {
324 int committed;
325
140 /* 326 /*
141 * Allocate a single extent, up to the size of the value. 327 * Allocate a single extent, up to the size of the value.
142 */ 328 */
@@ -170,6 +356,27 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
170 (map.br_startblock != HOLESTARTBLOCK)); 356 (map.br_startblock != HOLESTARTBLOCK));
171 lblkno += map.br_blockcount; 357 lblkno += map.br_blockcount;
172 blkcnt -= map.br_blockcount; 358 blkcnt -= map.br_blockcount;
359 hdrcnt++;
360
361 /*
362 * If we have enough blocks for the attribute data, calculate
363 * how many extra blocks we need for headers. We might run
364 * through this multiple times in the case that the additional
365 * headers in the blocks needed for the data fragments spills
366 * into requiring more blocks. e.g. for 512 byte blocks, we'll
367 * spill for another block every 9 headers we require in this
368 * loop.
369 */
370
371 if (crcs && blkcnt == 0) {
372 int total_len;
373
374 total_len = args->valuelen +
375 hdrcnt * sizeof(struct xfs_attr3_rmt_hdr);
376 blkcnt = XFS_B_TO_FSB(mp, total_len);
377 blkcnt -= args->rmtblkcnt;
378 args->rmtblkcnt += blkcnt;
379 }
173 380
174 /* 381 /*
175 * Start the next trans in the chain. 382 * Start the next trans in the chain.
@@ -188,7 +395,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
188 lblkno = args->rmtblkno; 395 lblkno = args->rmtblkno;
189 valuelen = args->valuelen; 396 valuelen = args->valuelen;
190 while (valuelen > 0) { 397 while (valuelen > 0) {
191 int buflen; 398 int byte_cnt;
399 char *buf;
192 400
193 /* 401 /*
194 * Try to remember where we decided to put the value. 402 * Try to remember where we decided to put the value.
@@ -210,24 +418,38 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
210 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, 0); 418 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, 0);
211 if (!bp) 419 if (!bp)
212 return ENOMEM; 420 return ENOMEM;
421 bp->b_ops = &xfs_attr3_rmt_buf_ops;
422
423 byte_cnt = BBTOB(bp->b_length);
424 byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, byte_cnt);
425 if (valuelen < byte_cnt) {
426 byte_cnt = valuelen;
427 }
428
429 buf = bp->b_addr;
430 buf += xfs_attr3_rmt_hdr_set(mp, dp->i_ino, offset,
431 byte_cnt, bp);
432 memcpy(buf, src, byte_cnt);
213 433
214 buflen = BBTOB(bp->b_length); 434 if (byte_cnt < BBTOB(bp->b_length))
215 tmp = min_t(int, valuelen, buflen); 435 xfs_buf_zero(bp, byte_cnt,
216 xfs_buf_iomove(bp, 0, tmp, src, XBRW_WRITE); 436 BBTOB(bp->b_length) - byte_cnt);
217 if (tmp < buflen)
218 xfs_buf_zero(bp, tmp, buflen - tmp);
219 437
220 error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */ 438 error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */
221 xfs_buf_relse(bp); 439 xfs_buf_relse(bp);
222 if (error) 440 if (error)
223 return error; 441 return error;
224 src += tmp; 442
225 valuelen -= tmp; 443 src += byte_cnt;
444 valuelen -= byte_cnt;
445 offset += byte_cnt;
446 hdrcnt--;
226 447
227 lblkno += map.br_blockcount; 448 lblkno += map.br_blockcount;
228 } 449 }
229 ASSERT(valuelen == 0); 450 ASSERT(valuelen == 0);
230 return(0); 451 ASSERT(hdrcnt == 0);
452 return 0;
231} 453}
232 454
233/* 455/*
@@ -306,7 +528,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
306 ASSERT(committed); 528 ASSERT(committed);
307 args->trans = NULL; 529 args->trans = NULL;
308 xfs_bmap_cancel(args->flist); 530 xfs_bmap_cancel(args->flist);
309 return(error); 531 return error;
310 } 532 }
311 533
312 /* 534 /*