diff options
Diffstat (limited to 'fs/xfs/xfs_dfrag.c')
-rw-r--r-- | fs/xfs/xfs_dfrag.c | 159 |
1 files changed, 127 insertions, 32 deletions
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index d1483a4f71b8..5bba29a07812 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c | |||
@@ -45,15 +45,21 @@ | |||
45 | #include "xfs_vnodeops.h" | 45 | #include "xfs_vnodeops.h" |
46 | #include "xfs_trace.h" | 46 | #include "xfs_trace.h" |
47 | 47 | ||
48 | |||
49 | static int xfs_swap_extents( | ||
50 | xfs_inode_t *ip, /* target inode */ | ||
51 | xfs_inode_t *tip, /* tmp inode */ | ||
52 | xfs_swapext_t *sxp); | ||
53 | |||
48 | /* | 54 | /* |
49 | * Syssgi interface for swapext | 55 | * ioctl interface for swapext |
50 | */ | 56 | */ |
51 | int | 57 | int |
52 | xfs_swapext( | 58 | xfs_swapext( |
53 | xfs_swapext_t *sxp) | 59 | xfs_swapext_t *sxp) |
54 | { | 60 | { |
55 | xfs_inode_t *ip, *tip; | 61 | xfs_inode_t *ip, *tip; |
56 | struct file *file, *target_file; | 62 | struct file *file, *tmp_file; |
57 | int error = 0; | 63 | int error = 0; |
58 | 64 | ||
59 | /* Pull information for the target fd */ | 65 | /* Pull information for the target fd */ |
@@ -68,56 +74,138 @@ xfs_swapext( | |||
68 | goto out_put_file; | 74 | goto out_put_file; |
69 | } | 75 | } |
70 | 76 | ||
71 | target_file = fget((int)sxp->sx_fdtmp); | 77 | tmp_file = fget((int)sxp->sx_fdtmp); |
72 | if (!target_file) { | 78 | if (!tmp_file) { |
73 | error = XFS_ERROR(EINVAL); | 79 | error = XFS_ERROR(EINVAL); |
74 | goto out_put_file; | 80 | goto out_put_file; |
75 | } | 81 | } |
76 | 82 | ||
77 | if (!(target_file->f_mode & FMODE_WRITE) || | 83 | if (!(tmp_file->f_mode & FMODE_WRITE) || |
78 | (target_file->f_flags & O_APPEND)) { | 84 | (tmp_file->f_flags & O_APPEND)) { |
79 | error = XFS_ERROR(EBADF); | 85 | error = XFS_ERROR(EBADF); |
80 | goto out_put_target_file; | 86 | goto out_put_tmp_file; |
81 | } | 87 | } |
82 | 88 | ||
83 | if (IS_SWAPFILE(file->f_path.dentry->d_inode) || | 89 | if (IS_SWAPFILE(file->f_path.dentry->d_inode) || |
84 | IS_SWAPFILE(target_file->f_path.dentry->d_inode)) { | 90 | IS_SWAPFILE(tmp_file->f_path.dentry->d_inode)) { |
85 | error = XFS_ERROR(EINVAL); | 91 | error = XFS_ERROR(EINVAL); |
86 | goto out_put_target_file; | 92 | goto out_put_tmp_file; |
87 | } | 93 | } |
88 | 94 | ||
89 | ip = XFS_I(file->f_path.dentry->d_inode); | 95 | ip = XFS_I(file->f_path.dentry->d_inode); |
90 | tip = XFS_I(target_file->f_path.dentry->d_inode); | 96 | tip = XFS_I(tmp_file->f_path.dentry->d_inode); |
91 | 97 | ||
92 | if (ip->i_mount != tip->i_mount) { | 98 | if (ip->i_mount != tip->i_mount) { |
93 | error = XFS_ERROR(EINVAL); | 99 | error = XFS_ERROR(EINVAL); |
94 | goto out_put_target_file; | 100 | goto out_put_tmp_file; |
95 | } | 101 | } |
96 | 102 | ||
97 | if (ip->i_ino == tip->i_ino) { | 103 | if (ip->i_ino == tip->i_ino) { |
98 | error = XFS_ERROR(EINVAL); | 104 | error = XFS_ERROR(EINVAL); |
99 | goto out_put_target_file; | 105 | goto out_put_tmp_file; |
100 | } | 106 | } |
101 | 107 | ||
102 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 108 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
103 | error = XFS_ERROR(EIO); | 109 | error = XFS_ERROR(EIO); |
104 | goto out_put_target_file; | 110 | goto out_put_tmp_file; |
105 | } | 111 | } |
106 | 112 | ||
107 | error = xfs_swap_extents(ip, tip, sxp); | 113 | error = xfs_swap_extents(ip, tip, sxp); |
108 | 114 | ||
109 | out_put_target_file: | 115 | out_put_tmp_file: |
110 | fput(target_file); | 116 | fput(tmp_file); |
111 | out_put_file: | 117 | out_put_file: |
112 | fput(file); | 118 | fput(file); |
113 | out: | 119 | out: |
114 | return error; | 120 | return error; |
115 | } | 121 | } |
116 | 122 | ||
117 | int | 123 | /* |
124 | * We need to check that the format of the data fork in the temporary inode is | ||
125 | * valid for the target inode before doing the swap. This is not a problem with | ||
126 | * attr1 because of the fixed fork offset, but attr2 has a dynamically sized | ||
127 | * data fork depending on the space the attribute fork is taking so we can get | ||
128 | * invalid formats on the target inode. | ||
129 | * | ||
130 | * E.g. target has space for 7 extents in extent format, temp inode only has | ||
131 | * space for 6. If we defragment down to 7 extents, then the tmp format is a | ||
132 | * btree, but when swapped it needs to be in extent format. Hence we can't just | ||
133 | * blindly swap data forks on attr2 filesystems. | ||
134 | * | ||
135 | * Note that we check the swap in both directions so that we don't end up with | ||
136 | * a corrupt temporary inode, either. | ||
137 | * | ||
138 | * Note that fixing the way xfs_fsr sets up the attribute fork in the source | ||
139 | * inode will prevent this situation from occurring, so all we do here is | ||
140 | * reject and log the attempt. basically we are putting the responsibility on | ||
141 | * userspace to get this right. | ||
142 | */ | ||
143 | static int | ||
144 | xfs_swap_extents_check_format( | ||
145 | xfs_inode_t *ip, /* target inode */ | ||
146 | xfs_inode_t *tip) /* tmp inode */ | ||
147 | { | ||
148 | |||
149 | /* Should never get a local format */ | ||
150 | if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || | ||
151 | tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) | ||
152 | return EINVAL; | ||
153 | |||
154 | /* | ||
155 | * if the target inode has less extents that then temporary inode then | ||
156 | * why did userspace call us? | ||
157 | */ | ||
158 | if (ip->i_d.di_nextents < tip->i_d.di_nextents) | ||
159 | return EINVAL; | ||
160 | |||
161 | /* | ||
162 | * if the target inode is in extent form and the temp inode is in btree | ||
163 | * form then we will end up with the target inode in the wrong format | ||
164 | * as we already know there are less extents in the temp inode. | ||
165 | */ | ||
166 | if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && | ||
167 | tip->i_d.di_format == XFS_DINODE_FMT_BTREE) | ||
168 | return EINVAL; | ||
169 | |||
170 | /* Check temp in extent form to max in target */ | ||
171 | if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && | ||
172 | XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > ip->i_df.if_ext_max) | ||
173 | return EINVAL; | ||
174 | |||
175 | /* Check target in extent form to max in temp */ | ||
176 | if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && | ||
177 | XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max) | ||
178 | return EINVAL; | ||
179 | |||
180 | /* | ||
181 | * If we are in a btree format, check that the temp root block will fit | ||
182 | * in the target and that it has enough extents to be in btree format | ||
183 | * in the target. | ||
184 | * | ||
185 | * Note that we have to be careful to allow btree->extent conversions | ||
186 | * (a common defrag case) which will occur when the temp inode is in | ||
187 | * extent format... | ||
188 | */ | ||
189 | if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE && | ||
190 | ((XFS_IFORK_BOFF(ip) && | ||
191 | tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) || | ||
192 | XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= ip->i_df.if_ext_max)) | ||
193 | return EINVAL; | ||
194 | |||
195 | /* Reciprocal target->temp btree format checks */ | ||
196 | if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE && | ||
197 | ((XFS_IFORK_BOFF(tip) && | ||
198 | ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) || | ||
199 | XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= tip->i_df.if_ext_max)) | ||
200 | return EINVAL; | ||
201 | |||
202 | return 0; | ||
203 | } | ||
204 | |||
205 | static int | ||
118 | xfs_swap_extents( | 206 | xfs_swap_extents( |
119 | xfs_inode_t *ip, | 207 | xfs_inode_t *ip, /* target inode */ |
120 | xfs_inode_t *tip, | 208 | xfs_inode_t *tip, /* tmp inode */ |
121 | xfs_swapext_t *sxp) | 209 | xfs_swapext_t *sxp) |
122 | { | 210 | { |
123 | xfs_mount_t *mp; | 211 | xfs_mount_t *mp; |
@@ -161,13 +249,6 @@ xfs_swap_extents( | |||
161 | goto out_unlock; | 249 | goto out_unlock; |
162 | } | 250 | } |
163 | 251 | ||
164 | /* Should never get a local format */ | ||
165 | if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || | ||
166 | tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) { | ||
167 | error = XFS_ERROR(EINVAL); | ||
168 | goto out_unlock; | ||
169 | } | ||
170 | |||
171 | if (VN_CACHED(VFS_I(tip)) != 0) { | 252 | if (VN_CACHED(VFS_I(tip)) != 0) { |
172 | error = xfs_flushinval_pages(tip, 0, -1, | 253 | error = xfs_flushinval_pages(tip, 0, -1, |
173 | FI_REMAPF_LOCKED); | 254 | FI_REMAPF_LOCKED); |
@@ -189,13 +270,15 @@ xfs_swap_extents( | |||
189 | goto out_unlock; | 270 | goto out_unlock; |
190 | } | 271 | } |
191 | 272 | ||
192 | /* | 273 | trace_xfs_swap_extent_before(ip, 0); |
193 | * If the target has extended attributes, the tmp file | 274 | trace_xfs_swap_extent_before(tip, 1); |
194 | * must also in order to ensure the correct data fork | 275 | |
195 | * format. | 276 | /* check inode formats now that data is flushed */ |
196 | */ | 277 | error = xfs_swap_extents_check_format(ip, tip); |
197 | if ( XFS_IFORK_Q(ip) != XFS_IFORK_Q(tip) ) { | 278 | if (error) { |
198 | error = XFS_ERROR(EINVAL); | 279 | xfs_fs_cmn_err(CE_NOTE, mp, |
280 | "%s: inode 0x%llx format is incompatible for exchanging.", | ||
281 | __FILE__, ip->i_ino); | ||
199 | goto out_unlock; | 282 | goto out_unlock; |
200 | } | 283 | } |
201 | 284 | ||
@@ -276,6 +359,16 @@ xfs_swap_extents( | |||
276 | *tifp = *tempifp; /* struct copy */ | 359 | *tifp = *tempifp; /* struct copy */ |
277 | 360 | ||
278 | /* | 361 | /* |
362 | * Fix the in-memory data fork values that are dependent on the fork | ||
363 | * offset in the inode. We can't assume they remain the same as attr2 | ||
364 | * has dynamic fork offsets. | ||
365 | */ | ||
366 | ifp->if_ext_max = XFS_IFORK_SIZE(ip, XFS_DATA_FORK) / | ||
367 | (uint)sizeof(xfs_bmbt_rec_t); | ||
368 | tifp->if_ext_max = XFS_IFORK_SIZE(tip, XFS_DATA_FORK) / | ||
369 | (uint)sizeof(xfs_bmbt_rec_t); | ||
370 | |||
371 | /* | ||
279 | * Fix the on-disk inode values | 372 | * Fix the on-disk inode values |
280 | */ | 373 | */ |
281 | tmp = (__uint64_t)ip->i_d.di_nblocks; | 374 | tmp = (__uint64_t)ip->i_d.di_nblocks; |
@@ -347,6 +440,8 @@ xfs_swap_extents( | |||
347 | 440 | ||
348 | error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT); | 441 | error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT); |
349 | 442 | ||
443 | trace_xfs_swap_extent_after(ip, 0); | ||
444 | trace_xfs_swap_extent_after(tip, 1); | ||
350 | out: | 445 | out: |
351 | kmem_free(tempifp); | 446 | kmem_free(tempifp); |
352 | return error; | 447 | return error; |