diff options
author | Amir Goldstein <amir73il@users.sf.net> | 2011-06-27 19:40:50 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2011-06-27 19:40:50 -0400 |
commit | dae1e52cb1267bf8f52e5e47a80fab566d7e8aa4 (patch) | |
tree | 2537e9f8f138e4935f88e605244174c9b2400bf4 /fs/ext4 | |
parent | 9f125d641beb898f5bf2fe69583192c18043517a (diff) |
ext4: move ext4_ind_* functions from inode.c to indirect.c
This patch moves functions from inode.c to indirect.c.
The moved functions are ext4_ind_* functions and their helpers.
Functions called from inode.c are declared extern.
Signed-off-by: Amir Goldstein <amir73il@users.sf.net>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/Makefile | 2 | ||||
-rw-r--r-- | fs/ext4/block_validity.c | 1 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 9 | ||||
-rw-r--r-- | fs/ext4/indirect.c | 1510 | ||||
-rw-r--r-- | fs/ext4/inode.c | 1486 |
5 files changed, 1521 insertions, 1487 deletions
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index 04109460ba9e..56fd8f865930 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile | |||
@@ -7,7 +7,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o | |||
7 | ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ | 7 | ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ |
8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ | 8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ |
9 | ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ | 9 | ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ |
10 | mmp.o | 10 | mmp.o indirect.o |
11 | 11 | ||
12 | ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o | 12 | ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o |
13 | ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o | 13 | ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o |
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index af103be491b0..8efb2f0a3447 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c | |||
@@ -266,3 +266,4 @@ int ext4_check_blockref(const char *function, unsigned int line, | |||
266 | } | 266 | } |
267 | return 0; | 267 | return 0; |
268 | } | 268 | } |
269 | |||
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 82ba7eb7c4a5..ddaf5043fb38 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -1834,6 +1834,15 @@ extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | |||
1834 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); | 1834 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); |
1835 | extern void ext4_da_update_reserve_space(struct inode *inode, | 1835 | extern void ext4_da_update_reserve_space(struct inode *inode, |
1836 | int used, int quota_claim); | 1836 | int used, int quota_claim); |
1837 | |||
1838 | /* indirect.c */ | ||
1839 | extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | ||
1840 | struct ext4_map_blocks *map, int flags); | ||
1841 | extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | ||
1842 | const struct iovec *iov, loff_t offset, | ||
1843 | unsigned long nr_segs); | ||
1844 | extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); | ||
1845 | extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk); | ||
1837 | extern void ext4_ind_truncate(struct inode *inode); | 1846 | extern void ext4_ind_truncate(struct inode *inode); |
1838 | 1847 | ||
1839 | /* ioctl.c */ | 1848 | /* ioctl.c */ |
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c new file mode 100644 index 000000000000..c3e85a86e821 --- /dev/null +++ b/fs/ext4/indirect.c | |||
@@ -0,0 +1,1510 @@ | |||
1 | /* | ||
2 | * linux/fs/ext4/indirect.c | ||
3 | * | ||
4 | * from | ||
5 | * | ||
6 | * linux/fs/ext4/inode.c | ||
7 | * | ||
8 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
9 | * Remy Card (card@masi.ibp.fr) | ||
10 | * Laboratoire MASI - Institut Blaise Pascal | ||
11 | * Universite Pierre et Marie Curie (Paris VI) | ||
12 | * | ||
13 | * from | ||
14 | * | ||
15 | * linux/fs/minix/inode.c | ||
16 | * | ||
17 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
18 | * | ||
19 | * Goal-directed block allocation by Stephen Tweedie | ||
20 | * (sct@redhat.com), 1993, 1998 | ||
21 | */ | ||
22 | |||
23 | #include <linux/module.h> | ||
24 | #include "ext4_jbd2.h" | ||
25 | #include "truncate.h" | ||
26 | |||
27 | #include <trace/events/ext4.h> | ||
28 | |||
29 | typedef struct { | ||
30 | __le32 *p; | ||
31 | __le32 key; | ||
32 | struct buffer_head *bh; | ||
33 | } Indirect; | ||
34 | |||
35 | static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) | ||
36 | { | ||
37 | p->key = *(p->p = v); | ||
38 | p->bh = bh; | ||
39 | } | ||
40 | |||
41 | /** | ||
42 | * ext4_block_to_path - parse the block number into array of offsets | ||
43 | * @inode: inode in question (we are only interested in its superblock) | ||
44 | * @i_block: block number to be parsed | ||
45 | * @offsets: array to store the offsets in | ||
46 | * @boundary: set this non-zero if the referred-to block is likely to be | ||
47 | * followed (on disk) by an indirect block. | ||
48 | * | ||
49 | * To store the locations of file's data ext4 uses a data structure common | ||
50 | * for UNIX filesystems - tree of pointers anchored in the inode, with | ||
51 | * data blocks at leaves and indirect blocks in intermediate nodes. | ||
52 | * This function translates the block number into path in that tree - | ||
53 | * return value is the path length and @offsets[n] is the offset of | ||
54 | * pointer to (n+1)th node in the nth one. If @block is out of range | ||
55 | * (negative or too large) warning is printed and zero returned. | ||
56 | * | ||
57 | * Note: function doesn't find node addresses, so no IO is needed. All | ||
58 | * we need to know is the capacity of indirect blocks (taken from the | ||
59 | * inode->i_sb). | ||
60 | */ | ||
61 | |||
62 | /* | ||
63 | * Portability note: the last comparison (check that we fit into triple | ||
64 | * indirect block) is spelled differently, because otherwise on an | ||
65 | * architecture with 32-bit longs and 8Kb pages we might get into trouble | ||
66 | * if our filesystem had 8Kb blocks. We might use long long, but that would | ||
67 | * kill us on x86. Oh, well, at least the sign propagation does not matter - | ||
68 | * i_block would have to be negative in the very beginning, so we would not | ||
69 | * get there at all. | ||
70 | */ | ||
71 | |||
72 | static int ext4_block_to_path(struct inode *inode, | ||
73 | ext4_lblk_t i_block, | ||
74 | ext4_lblk_t offsets[4], int *boundary) | ||
75 | { | ||
76 | int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); | ||
77 | int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); | ||
78 | const long direct_blocks = EXT4_NDIR_BLOCKS, | ||
79 | indirect_blocks = ptrs, | ||
80 | double_blocks = (1 << (ptrs_bits * 2)); | ||
81 | int n = 0; | ||
82 | int final = 0; | ||
83 | |||
84 | if (i_block < direct_blocks) { | ||
85 | offsets[n++] = i_block; | ||
86 | final = direct_blocks; | ||
87 | } else if ((i_block -= direct_blocks) < indirect_blocks) { | ||
88 | offsets[n++] = EXT4_IND_BLOCK; | ||
89 | offsets[n++] = i_block; | ||
90 | final = ptrs; | ||
91 | } else if ((i_block -= indirect_blocks) < double_blocks) { | ||
92 | offsets[n++] = EXT4_DIND_BLOCK; | ||
93 | offsets[n++] = i_block >> ptrs_bits; | ||
94 | offsets[n++] = i_block & (ptrs - 1); | ||
95 | final = ptrs; | ||
96 | } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { | ||
97 | offsets[n++] = EXT4_TIND_BLOCK; | ||
98 | offsets[n++] = i_block >> (ptrs_bits * 2); | ||
99 | offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); | ||
100 | offsets[n++] = i_block & (ptrs - 1); | ||
101 | final = ptrs; | ||
102 | } else { | ||
103 | ext4_warning(inode->i_sb, "block %lu > max in inode %lu", | ||
104 | i_block + direct_blocks + | ||
105 | indirect_blocks + double_blocks, inode->i_ino); | ||
106 | } | ||
107 | if (boundary) | ||
108 | *boundary = final - 1 - (i_block & (ptrs - 1)); | ||
109 | return n; | ||
110 | } | ||
111 | |||
112 | /** | ||
113 | * ext4_get_branch - read the chain of indirect blocks leading to data | ||
114 | * @inode: inode in question | ||
115 | * @depth: depth of the chain (1 - direct pointer, etc.) | ||
116 | * @offsets: offsets of pointers in inode/indirect blocks | ||
117 | * @chain: place to store the result | ||
118 | * @err: here we store the error value | ||
119 | * | ||
120 | * Function fills the array of triples <key, p, bh> and returns %NULL | ||
121 | * if everything went OK or the pointer to the last filled triple | ||
122 | * (incomplete one) otherwise. Upon the return chain[i].key contains | ||
123 | * the number of (i+1)-th block in the chain (as it is stored in memory, | ||
124 | * i.e. little-endian 32-bit), chain[i].p contains the address of that | ||
125 | * number (it points into struct inode for i==0 and into the bh->b_data | ||
126 | * for i>0) and chain[i].bh points to the buffer_head of i-th indirect | ||
127 | * block for i>0 and NULL for i==0. In other words, it holds the block | ||
128 | * numbers of the chain, addresses they were taken from (and where we can | ||
129 | * verify that chain did not change) and buffer_heads hosting these | ||
130 | * numbers. | ||
131 | * | ||
132 | * Function stops when it stumbles upon zero pointer (absent block) | ||
133 | * (pointer to last triple returned, *@err == 0) | ||
134 | * or when it gets an IO error reading an indirect block | ||
135 | * (ditto, *@err == -EIO) | ||
136 | * or when it reads all @depth-1 indirect blocks successfully and finds | ||
137 | * the whole chain, all way to the data (returns %NULL, *err == 0). | ||
138 | * | ||
139 | * Need to be called with | ||
140 | * down_read(&EXT4_I(inode)->i_data_sem) | ||
141 | */ | ||
142 | static Indirect *ext4_get_branch(struct inode *inode, int depth, | ||
143 | ext4_lblk_t *offsets, | ||
144 | Indirect chain[4], int *err) | ||
145 | { | ||
146 | struct super_block *sb = inode->i_sb; | ||
147 | Indirect *p = chain; | ||
148 | struct buffer_head *bh; | ||
149 | |||
150 | *err = 0; | ||
151 | /* i_data is not going away, no lock needed */ | ||
152 | add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets); | ||
153 | if (!p->key) | ||
154 | goto no_block; | ||
155 | while (--depth) { | ||
156 | bh = sb_getblk(sb, le32_to_cpu(p->key)); | ||
157 | if (unlikely(!bh)) | ||
158 | goto failure; | ||
159 | |||
160 | if (!bh_uptodate_or_lock(bh)) { | ||
161 | if (bh_submit_read(bh) < 0) { | ||
162 | put_bh(bh); | ||
163 | goto failure; | ||
164 | } | ||
165 | /* validate block references */ | ||
166 | if (ext4_check_indirect_blockref(inode, bh)) { | ||
167 | put_bh(bh); | ||
168 | goto failure; | ||
169 | } | ||
170 | } | ||
171 | |||
172 | add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); | ||
173 | /* Reader: end */ | ||
174 | if (!p->key) | ||
175 | goto no_block; | ||
176 | } | ||
177 | return NULL; | ||
178 | |||
179 | failure: | ||
180 | *err = -EIO; | ||
181 | no_block: | ||
182 | return p; | ||
183 | } | ||
184 | |||
185 | /** | ||
186 | * ext4_find_near - find a place for allocation with sufficient locality | ||
187 | * @inode: owner | ||
188 | * @ind: descriptor of indirect block. | ||
189 | * | ||
190 | * This function returns the preferred place for block allocation. | ||
191 | * It is used when heuristic for sequential allocation fails. | ||
192 | * Rules are: | ||
193 | * + if there is a block to the left of our position - allocate near it. | ||
194 | * + if pointer will live in indirect block - allocate near that block. | ||
195 | * + if pointer will live in inode - allocate in the same | ||
196 | * cylinder group. | ||
197 | * | ||
198 | * In the latter case we colour the starting block by the callers PID to | ||
199 | * prevent it from clashing with concurrent allocations for a different inode | ||
200 | * in the same block group. The PID is used here so that functionally related | ||
201 | * files will be close-by on-disk. | ||
202 | * | ||
203 | * Caller must make sure that @ind is valid and will stay that way. | ||
204 | */ | ||
205 | static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | ||
206 | { | ||
207 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
208 | __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; | ||
209 | __le32 *p; | ||
210 | ext4_fsblk_t bg_start; | ||
211 | ext4_fsblk_t last_block; | ||
212 | ext4_grpblk_t colour; | ||
213 | ext4_group_t block_group; | ||
214 | int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); | ||
215 | |||
216 | /* Try to find previous block */ | ||
217 | for (p = ind->p - 1; p >= start; p--) { | ||
218 | if (*p) | ||
219 | return le32_to_cpu(*p); | ||
220 | } | ||
221 | |||
222 | /* No such thing, so let's try location of indirect block */ | ||
223 | if (ind->bh) | ||
224 | return ind->bh->b_blocknr; | ||
225 | |||
226 | /* | ||
227 | * It is going to be referred to from the inode itself? OK, just put it | ||
228 | * into the same cylinder group then. | ||
229 | */ | ||
230 | block_group = ei->i_block_group; | ||
231 | if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { | ||
232 | block_group &= ~(flex_size-1); | ||
233 | if (S_ISREG(inode->i_mode)) | ||
234 | block_group++; | ||
235 | } | ||
236 | bg_start = ext4_group_first_block_no(inode->i_sb, block_group); | ||
237 | last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; | ||
238 | |||
239 | /* | ||
240 | * If we are doing delayed allocation, we don't need take | ||
241 | * colour into account. | ||
242 | */ | ||
243 | if (test_opt(inode->i_sb, DELALLOC)) | ||
244 | return bg_start; | ||
245 | |||
246 | if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) | ||
247 | colour = (current->pid % 16) * | ||
248 | (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); | ||
249 | else | ||
250 | colour = (current->pid % 16) * ((last_block - bg_start) / 16); | ||
251 | return bg_start + colour; | ||
252 | } | ||
253 | |||
254 | /** | ||
255 | * ext4_find_goal - find a preferred place for allocation. | ||
256 | * @inode: owner | ||
257 | * @block: block we want | ||
258 | * @partial: pointer to the last triple within a chain | ||
259 | * | ||
260 | * Normally this function find the preferred place for block allocation, | ||
261 | * returns it. | ||
262 | * Because this is only used for non-extent files, we limit the block nr | ||
263 | * to 32 bits. | ||
264 | */ | ||
265 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, | ||
266 | Indirect *partial) | ||
267 | { | ||
268 | ext4_fsblk_t goal; | ||
269 | |||
270 | /* | ||
271 | * XXX need to get goal block from mballoc's data structures | ||
272 | */ | ||
273 | |||
274 | goal = ext4_find_near(inode, partial); | ||
275 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; | ||
276 | return goal; | ||
277 | } | ||
278 | |||
279 | /** | ||
280 | * ext4_blks_to_allocate - Look up the block map and count the number | ||
281 | * of direct blocks need to be allocated for the given branch. | ||
282 | * | ||
283 | * @branch: chain of indirect blocks | ||
284 | * @k: number of blocks need for indirect blocks | ||
285 | * @blks: number of data blocks to be mapped. | ||
286 | * @blocks_to_boundary: the offset in the indirect block | ||
287 | * | ||
288 | * return the total number of blocks to be allocate, including the | ||
289 | * direct and indirect blocks. | ||
290 | */ | ||
291 | static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, | ||
292 | int blocks_to_boundary) | ||
293 | { | ||
294 | unsigned int count = 0; | ||
295 | |||
296 | /* | ||
297 | * Simple case, [t,d]Indirect block(s) has not allocated yet | ||
298 | * then it's clear blocks on that path have not allocated | ||
299 | */ | ||
300 | if (k > 0) { | ||
301 | /* right now we don't handle cross boundary allocation */ | ||
302 | if (blks < blocks_to_boundary + 1) | ||
303 | count += blks; | ||
304 | else | ||
305 | count += blocks_to_boundary + 1; | ||
306 | return count; | ||
307 | } | ||
308 | |||
309 | count++; | ||
310 | while (count < blks && count <= blocks_to_boundary && | ||
311 | le32_to_cpu(*(branch[0].p + count)) == 0) { | ||
312 | count++; | ||
313 | } | ||
314 | return count; | ||
315 | } | ||
316 | |||
317 | /** | ||
318 | * ext4_alloc_blocks: multiple allocate blocks needed for a branch | ||
319 | * @handle: handle for this transaction | ||
320 | * @inode: inode which needs allocated blocks | ||
321 | * @iblock: the logical block to start allocated at | ||
322 | * @goal: preferred physical block of allocation | ||
323 | * @indirect_blks: the number of blocks need to allocate for indirect | ||
324 | * blocks | ||
325 | * @blks: number of desired blocks | ||
326 | * @new_blocks: on return it will store the new block numbers for | ||
327 | * the indirect blocks(if needed) and the first direct block, | ||
328 | * @err: on return it will store the error code | ||
329 | * | ||
330 | * This function will return the number of blocks allocated as | ||
331 | * requested by the passed-in parameters. | ||
332 | */ | ||
333 | static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | ||
334 | ext4_lblk_t iblock, ext4_fsblk_t goal, | ||
335 | int indirect_blks, int blks, | ||
336 | ext4_fsblk_t new_blocks[4], int *err) | ||
337 | { | ||
338 | struct ext4_allocation_request ar; | ||
339 | int target, i; | ||
340 | unsigned long count = 0, blk_allocated = 0; | ||
341 | int index = 0; | ||
342 | ext4_fsblk_t current_block = 0; | ||
343 | int ret = 0; | ||
344 | |||
345 | /* | ||
346 | * Here we try to allocate the requested multiple blocks at once, | ||
347 | * on a best-effort basis. | ||
348 | * To build a branch, we should allocate blocks for | ||
349 | * the indirect blocks(if not allocated yet), and at least | ||
350 | * the first direct block of this branch. That's the | ||
351 | * minimum number of blocks need to allocate(required) | ||
352 | */ | ||
353 | /* first we try to allocate the indirect blocks */ | ||
354 | target = indirect_blks; | ||
355 | while (target > 0) { | ||
356 | count = target; | ||
357 | /* allocating blocks for indirect blocks and direct blocks */ | ||
358 | current_block = ext4_new_meta_blocks(handle, inode, goal, | ||
359 | 0, &count, err); | ||
360 | if (*err) | ||
361 | goto failed_out; | ||
362 | |||
363 | if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) { | ||
364 | EXT4_ERROR_INODE(inode, | ||
365 | "current_block %llu + count %lu > %d!", | ||
366 | current_block, count, | ||
367 | EXT4_MAX_BLOCK_FILE_PHYS); | ||
368 | *err = -EIO; | ||
369 | goto failed_out; | ||
370 | } | ||
371 | |||
372 | target -= count; | ||
373 | /* allocate blocks for indirect blocks */ | ||
374 | while (index < indirect_blks && count) { | ||
375 | new_blocks[index++] = current_block++; | ||
376 | count--; | ||
377 | } | ||
378 | if (count > 0) { | ||
379 | /* | ||
380 | * save the new block number | ||
381 | * for the first direct block | ||
382 | */ | ||
383 | new_blocks[index] = current_block; | ||
384 | printk(KERN_INFO "%s returned more blocks than " | ||
385 | "requested\n", __func__); | ||
386 | WARN_ON(1); | ||
387 | break; | ||
388 | } | ||
389 | } | ||
390 | |||
391 | target = blks - count ; | ||
392 | blk_allocated = count; | ||
393 | if (!target) | ||
394 | goto allocated; | ||
395 | /* Now allocate data blocks */ | ||
396 | memset(&ar, 0, sizeof(ar)); | ||
397 | ar.inode = inode; | ||
398 | ar.goal = goal; | ||
399 | ar.len = target; | ||
400 | ar.logical = iblock; | ||
401 | if (S_ISREG(inode->i_mode)) | ||
402 | /* enable in-core preallocation only for regular files */ | ||
403 | ar.flags = EXT4_MB_HINT_DATA; | ||
404 | |||
405 | current_block = ext4_mb_new_blocks(handle, &ar, err); | ||
406 | if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) { | ||
407 | EXT4_ERROR_INODE(inode, | ||
408 | "current_block %llu + ar.len %d > %d!", | ||
409 | current_block, ar.len, | ||
410 | EXT4_MAX_BLOCK_FILE_PHYS); | ||
411 | *err = -EIO; | ||
412 | goto failed_out; | ||
413 | } | ||
414 | |||
415 | if (*err && (target == blks)) { | ||
416 | /* | ||
417 | * if the allocation failed and we didn't allocate | ||
418 | * any blocks before | ||
419 | */ | ||
420 | goto failed_out; | ||
421 | } | ||
422 | if (!*err) { | ||
423 | if (target == blks) { | ||
424 | /* | ||
425 | * save the new block number | ||
426 | * for the first direct block | ||
427 | */ | ||
428 | new_blocks[index] = current_block; | ||
429 | } | ||
430 | blk_allocated += ar.len; | ||
431 | } | ||
432 | allocated: | ||
433 | /* total number of blocks allocated for direct blocks */ | ||
434 | ret = blk_allocated; | ||
435 | *err = 0; | ||
436 | return ret; | ||
437 | failed_out: | ||
438 | for (i = 0; i < index; i++) | ||
439 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); | ||
440 | return ret; | ||
441 | } | ||
442 | |||
443 | /** | ||
444 | * ext4_alloc_branch - allocate and set up a chain of blocks. | ||
445 | * @handle: handle for this transaction | ||
446 | * @inode: owner | ||
447 | * @indirect_blks: number of allocated indirect blocks | ||
448 | * @blks: number of allocated direct blocks | ||
449 | * @goal: preferred place for allocation | ||
450 | * @offsets: offsets (in the blocks) to store the pointers to next. | ||
451 | * @branch: place to store the chain in. | ||
452 | * | ||
453 | * This function allocates blocks, zeroes out all but the last one, | ||
454 | * links them into chain and (if we are synchronous) writes them to disk. | ||
455 | * In other words, it prepares a branch that can be spliced onto the | ||
456 | * inode. It stores the information about that chain in the branch[], in | ||
457 | * the same format as ext4_get_branch() would do. We are calling it after | ||
458 | * we had read the existing part of chain and partial points to the last | ||
459 | * triple of that (one with zero ->key). Upon the exit we have the same | ||
460 | * picture as after the successful ext4_get_block(), except that in one | ||
461 | * place chain is disconnected - *branch->p is still zero (we did not | ||
462 | * set the last link), but branch->key contains the number that should | ||
463 | * be placed into *branch->p to fill that gap. | ||
464 | * | ||
465 | * If allocation fails we free all blocks we've allocated (and forget | ||
466 | * their buffer_heads) and return the error value the from failed | ||
467 | * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain | ||
468 | * as described above and return 0. | ||
469 | */ | ||
470 | static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | ||
471 | ext4_lblk_t iblock, int indirect_blks, | ||
472 | int *blks, ext4_fsblk_t goal, | ||
473 | ext4_lblk_t *offsets, Indirect *branch) | ||
474 | { | ||
475 | int blocksize = inode->i_sb->s_blocksize; | ||
476 | int i, n = 0; | ||
477 | int err = 0; | ||
478 | struct buffer_head *bh; | ||
479 | int num; | ||
480 | ext4_fsblk_t new_blocks[4]; | ||
481 | ext4_fsblk_t current_block; | ||
482 | |||
483 | num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks, | ||
484 | *blks, new_blocks, &err); | ||
485 | if (err) | ||
486 | return err; | ||
487 | |||
488 | branch[0].key = cpu_to_le32(new_blocks[0]); | ||
489 | /* | ||
490 | * metadata blocks and data blocks are allocated. | ||
491 | */ | ||
492 | for (n = 1; n <= indirect_blks; n++) { | ||
493 | /* | ||
494 | * Get buffer_head for parent block, zero it out | ||
495 | * and set the pointer to new one, then send | ||
496 | * parent to disk. | ||
497 | */ | ||
498 | bh = sb_getblk(inode->i_sb, new_blocks[n-1]); | ||
499 | if (unlikely(!bh)) { | ||
500 | err = -EIO; | ||
501 | goto failed; | ||
502 | } | ||
503 | |||
504 | branch[n].bh = bh; | ||
505 | lock_buffer(bh); | ||
506 | BUFFER_TRACE(bh, "call get_create_access"); | ||
507 | err = ext4_journal_get_create_access(handle, bh); | ||
508 | if (err) { | ||
509 | /* Don't brelse(bh) here; it's done in | ||
510 | * ext4_journal_forget() below */ | ||
511 | unlock_buffer(bh); | ||
512 | goto failed; | ||
513 | } | ||
514 | |||
515 | memset(bh->b_data, 0, blocksize); | ||
516 | branch[n].p = (__le32 *) bh->b_data + offsets[n]; | ||
517 | branch[n].key = cpu_to_le32(new_blocks[n]); | ||
518 | *branch[n].p = branch[n].key; | ||
519 | if (n == indirect_blks) { | ||
520 | current_block = new_blocks[n]; | ||
521 | /* | ||
522 | * End of chain, update the last new metablock of | ||
523 | * the chain to point to the new allocated | ||
524 | * data blocks numbers | ||
525 | */ | ||
526 | for (i = 1; i < num; i++) | ||
527 | *(branch[n].p + i) = cpu_to_le32(++current_block); | ||
528 | } | ||
529 | BUFFER_TRACE(bh, "marking uptodate"); | ||
530 | set_buffer_uptodate(bh); | ||
531 | unlock_buffer(bh); | ||
532 | |||
533 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | ||
534 | err = ext4_handle_dirty_metadata(handle, inode, bh); | ||
535 | if (err) | ||
536 | goto failed; | ||
537 | } | ||
538 | *blks = num; | ||
539 | return err; | ||
540 | failed: | ||
541 | /* Allocation failed, free what we already allocated */ | ||
542 | ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0); | ||
543 | for (i = 1; i <= n ; i++) { | ||
544 | /* | ||
545 | * branch[i].bh is newly allocated, so there is no | ||
546 | * need to revoke the block, which is why we don't | ||
547 | * need to set EXT4_FREE_BLOCKS_METADATA. | ||
548 | */ | ||
549 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, | ||
550 | EXT4_FREE_BLOCKS_FORGET); | ||
551 | } | ||
552 | for (i = n+1; i < indirect_blks; i++) | ||
553 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); | ||
554 | |||
555 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0); | ||
556 | |||
557 | return err; | ||
558 | } | ||
559 | |||
560 | /** | ||
561 | * ext4_splice_branch - splice the allocated branch onto inode. | ||
562 | * @handle: handle for this transaction | ||
563 | * @inode: owner | ||
564 | * @block: (logical) number of block we are adding | ||
565 | * @chain: chain of indirect blocks (with a missing link - see | ||
566 | * ext4_alloc_branch) | ||
567 | * @where: location of missing link | ||
568 | * @num: number of indirect blocks we are adding | ||
569 | * @blks: number of direct blocks we are adding | ||
570 | * | ||
571 | * This function fills the missing link and does all housekeeping needed in | ||
572 | * inode (->i_blocks, etc.). In case of success we end up with the full | ||
573 | * chain to new block and return 0. | ||
574 | */ | ||
575 | static int ext4_splice_branch(handle_t *handle, struct inode *inode, | ||
576 | ext4_lblk_t block, Indirect *where, int num, | ||
577 | int blks) | ||
578 | { | ||
579 | int i; | ||
580 | int err = 0; | ||
581 | ext4_fsblk_t current_block; | ||
582 | |||
583 | /* | ||
584 | * If we're splicing into a [td]indirect block (as opposed to the | ||
585 | * inode) then we need to get write access to the [td]indirect block | ||
586 | * before the splice. | ||
587 | */ | ||
588 | if (where->bh) { | ||
589 | BUFFER_TRACE(where->bh, "get_write_access"); | ||
590 | err = ext4_journal_get_write_access(handle, where->bh); | ||
591 | if (err) | ||
592 | goto err_out; | ||
593 | } | ||
594 | /* That's it */ | ||
595 | |||
596 | *where->p = where->key; | ||
597 | |||
598 | /* | ||
599 | * Update the host buffer_head or inode to point to more just allocated | ||
600 | * direct blocks blocks | ||
601 | */ | ||
602 | if (num == 0 && blks > 1) { | ||
603 | current_block = le32_to_cpu(where->key) + 1; | ||
604 | for (i = 1; i < blks; i++) | ||
605 | *(where->p + i) = cpu_to_le32(current_block++); | ||
606 | } | ||
607 | |||
608 | /* We are done with atomic stuff, now do the rest of housekeeping */ | ||
609 | /* had we spliced it onto indirect block? */ | ||
610 | if (where->bh) { | ||
611 | /* | ||
612 | * If we spliced it onto an indirect block, we haven't | ||
613 | * altered the inode. Note however that if it is being spliced | ||
614 | * onto an indirect block at the very end of the file (the | ||
615 | * file is growing) then we *will* alter the inode to reflect | ||
616 | * the new i_size. But that is not done here - it is done in | ||
617 | * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode. | ||
618 | */ | ||
619 | jbd_debug(5, "splicing indirect only\n"); | ||
620 | BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata"); | ||
621 | err = ext4_handle_dirty_metadata(handle, inode, where->bh); | ||
622 | if (err) | ||
623 | goto err_out; | ||
624 | } else { | ||
625 | /* | ||
626 | * OK, we spliced it into the inode itself on a direct block. | ||
627 | */ | ||
628 | ext4_mark_inode_dirty(handle, inode); | ||
629 | jbd_debug(5, "splicing direct\n"); | ||
630 | } | ||
631 | return err; | ||
632 | |||
633 | err_out: | ||
634 | for (i = 1; i <= num; i++) { | ||
635 | /* | ||
636 | * branch[i].bh is newly allocated, so there is no | ||
637 | * need to revoke the block, which is why we don't | ||
638 | * need to set EXT4_FREE_BLOCKS_METADATA. | ||
639 | */ | ||
640 | ext4_free_blocks(handle, inode, where[i].bh, 0, 1, | ||
641 | EXT4_FREE_BLOCKS_FORGET); | ||
642 | } | ||
643 | ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key), | ||
644 | blks, 0); | ||
645 | |||
646 | return err; | ||
647 | } | ||
648 | |||
649 | /* | ||
650 | * The ext4_ind_map_blocks() function handles non-extents inodes | ||
651 | * (i.e., using the traditional indirect/double-indirect i_blocks | ||
652 | * scheme) for ext4_map_blocks(). | ||
653 | * | ||
654 | * Allocation strategy is simple: if we have to allocate something, we will | ||
655 | * have to go the whole way to leaf. So let's do it before attaching anything | ||
656 | * to tree, set linkage between the newborn blocks, write them if sync is | ||
657 | * required, recheck the path, free and repeat if check fails, otherwise | ||
658 | * set the last missing link (that will protect us from any truncate-generated | ||
659 | * removals - all blocks on the path are immune now) and possibly force the | ||
660 | * write on the parent block. | ||
661 | * That has a nice additional property: no special recovery from the failed | ||
662 | * allocations is needed - we simply release blocks and do not touch anything | ||
663 | * reachable from inode. | ||
664 | * | ||
665 | * `handle' can be NULL if create == 0. | ||
666 | * | ||
667 | * return > 0, # of blocks mapped or allocated. | ||
668 | * return = 0, if plain lookup failed. | ||
669 | * return < 0, error case. | ||
670 | * | ||
671 | * The ext4_ind_get_blocks() function should be called with | ||
672 | * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem | ||
673 | * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or | ||
674 | * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system | ||
675 | * blocks. | ||
676 | */ | ||
677 | int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | ||
678 | struct ext4_map_blocks *map, | ||
679 | int flags) | ||
680 | { | ||
681 | int err = -EIO; | ||
682 | ext4_lblk_t offsets[4]; | ||
683 | Indirect chain[4]; | ||
684 | Indirect *partial; | ||
685 | ext4_fsblk_t goal; | ||
686 | int indirect_blks; | ||
687 | int blocks_to_boundary = 0; | ||
688 | int depth; | ||
689 | int count = 0; | ||
690 | ext4_fsblk_t first_block = 0; | ||
691 | |||
692 | trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); | ||
693 | J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); | ||
694 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); | ||
695 | depth = ext4_block_to_path(inode, map->m_lblk, offsets, | ||
696 | &blocks_to_boundary); | ||
697 | |||
698 | if (depth == 0) | ||
699 | goto out; | ||
700 | |||
701 | partial = ext4_get_branch(inode, depth, offsets, chain, &err); | ||
702 | |||
703 | /* Simplest case - block found, no allocation needed */ | ||
704 | if (!partial) { | ||
705 | first_block = le32_to_cpu(chain[depth - 1].key); | ||
706 | count++; | ||
707 | /*map more blocks*/ | ||
708 | while (count < map->m_len && count <= blocks_to_boundary) { | ||
709 | ext4_fsblk_t blk; | ||
710 | |||
711 | blk = le32_to_cpu(*(chain[depth-1].p + count)); | ||
712 | |||
713 | if (blk == first_block + count) | ||
714 | count++; | ||
715 | else | ||
716 | break; | ||
717 | } | ||
718 | goto got_it; | ||
719 | } | ||
720 | |||
721 | /* Next simple case - plain lookup or failed read of indirect block */ | ||
722 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO) | ||
723 | goto cleanup; | ||
724 | |||
725 | /* | ||
726 | * Okay, we need to do block allocation. | ||
727 | */ | ||
728 | goal = ext4_find_goal(inode, map->m_lblk, partial); | ||
729 | |||
730 | /* the number of blocks need to allocate for [d,t]indirect blocks */ | ||
731 | indirect_blks = (chain + depth) - partial - 1; | ||
732 | |||
733 | /* | ||
734 | * Next look up the indirect map to count the totoal number of | ||
735 | * direct blocks to allocate for this branch. | ||
736 | */ | ||
737 | count = ext4_blks_to_allocate(partial, indirect_blks, | ||
738 | map->m_len, blocks_to_boundary); | ||
739 | /* | ||
740 | * Block out ext4_truncate while we alter the tree | ||
741 | */ | ||
742 | err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks, | ||
743 | &count, goal, | ||
744 | offsets + (partial - chain), partial); | ||
745 | |||
746 | /* | ||
747 | * The ext4_splice_branch call will free and forget any buffers | ||
748 | * on the new chain if there is a failure, but that risks using | ||
749 | * up transaction credits, especially for bitmaps where the | ||
750 | * credits cannot be returned. Can we handle this somehow? We | ||
751 | * may need to return -EAGAIN upwards in the worst case. --sct | ||
752 | */ | ||
753 | if (!err) | ||
754 | err = ext4_splice_branch(handle, inode, map->m_lblk, | ||
755 | partial, indirect_blks, count); | ||
756 | if (err) | ||
757 | goto cleanup; | ||
758 | |||
759 | map->m_flags |= EXT4_MAP_NEW; | ||
760 | |||
761 | ext4_update_inode_fsync_trans(handle, inode, 1); | ||
762 | got_it: | ||
763 | map->m_flags |= EXT4_MAP_MAPPED; | ||
764 | map->m_pblk = le32_to_cpu(chain[depth-1].key); | ||
765 | map->m_len = count; | ||
766 | if (count > blocks_to_boundary) | ||
767 | map->m_flags |= EXT4_MAP_BOUNDARY; | ||
768 | err = count; | ||
769 | /* Clean up and exit */ | ||
770 | partial = chain + depth - 1; /* the whole chain */ | ||
771 | cleanup: | ||
772 | while (partial > chain) { | ||
773 | BUFFER_TRACE(partial->bh, "call brelse"); | ||
774 | brelse(partial->bh); | ||
775 | partial--; | ||
776 | } | ||
777 | out: | ||
778 | trace_ext4_ind_map_blocks_exit(inode, map->m_lblk, | ||
779 | map->m_pblk, map->m_len, err); | ||
780 | return err; | ||
781 | } | ||
782 | |||
783 | /* | ||
784 | * O_DIRECT for ext3 (or indirect map) based files | ||
785 | * | ||
786 | * If the O_DIRECT write will extend the file then add this inode to the | ||
787 | * orphan list. So recovery will truncate it back to the original size | ||
788 | * if the machine crashes during the write. | ||
789 | * | ||
790 | * If the O_DIRECT write is intantiating holes inside i_size and the machine | ||
791 | * crashes then stale disk data _may_ be exposed inside the file. But current | ||
792 | * VFS code falls back into buffered path in that case so we are safe. | ||
793 | */ | ||
794 | ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | ||
795 | const struct iovec *iov, loff_t offset, | ||
796 | unsigned long nr_segs) | ||
797 | { | ||
798 | struct file *file = iocb->ki_filp; | ||
799 | struct inode *inode = file->f_mapping->host; | ||
800 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
801 | handle_t *handle; | ||
802 | ssize_t ret; | ||
803 | int orphan = 0; | ||
804 | size_t count = iov_length(iov, nr_segs); | ||
805 | int retries = 0; | ||
806 | |||
807 | if (rw == WRITE) { | ||
808 | loff_t final_size = offset + count; | ||
809 | |||
810 | if (final_size > inode->i_size) { | ||
811 | /* Credits for sb + inode write */ | ||
812 | handle = ext4_journal_start(inode, 2); | ||
813 | if (IS_ERR(handle)) { | ||
814 | ret = PTR_ERR(handle); | ||
815 | goto out; | ||
816 | } | ||
817 | ret = ext4_orphan_add(handle, inode); | ||
818 | if (ret) { | ||
819 | ext4_journal_stop(handle); | ||
820 | goto out; | ||
821 | } | ||
822 | orphan = 1; | ||
823 | ei->i_disksize = inode->i_size; | ||
824 | ext4_journal_stop(handle); | ||
825 | } | ||
826 | } | ||
827 | |||
828 | retry: | ||
829 | if (rw == READ && ext4_should_dioread_nolock(inode)) | ||
830 | ret = __blockdev_direct_IO(rw, iocb, inode, | ||
831 | inode->i_sb->s_bdev, iov, | ||
832 | offset, nr_segs, | ||
833 | ext4_get_block, NULL, NULL, 0); | ||
834 | else { | ||
835 | ret = blockdev_direct_IO(rw, iocb, inode, | ||
836 | inode->i_sb->s_bdev, iov, | ||
837 | offset, nr_segs, | ||
838 | ext4_get_block, NULL); | ||
839 | |||
840 | if (unlikely((rw & WRITE) && ret < 0)) { | ||
841 | loff_t isize = i_size_read(inode); | ||
842 | loff_t end = offset + iov_length(iov, nr_segs); | ||
843 | |||
844 | if (end > isize) | ||
845 | ext4_truncate_failed_write(inode); | ||
846 | } | ||
847 | } | ||
848 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | ||
849 | goto retry; | ||
850 | |||
851 | if (orphan) { | ||
852 | int err; | ||
853 | |||
854 | /* Credits for sb + inode write */ | ||
855 | handle = ext4_journal_start(inode, 2); | ||
856 | if (IS_ERR(handle)) { | ||
857 | /* This is really bad luck. We've written the data | ||
858 | * but cannot extend i_size. Bail out and pretend | ||
859 | * the write failed... */ | ||
860 | ret = PTR_ERR(handle); | ||
861 | if (inode->i_nlink) | ||
862 | ext4_orphan_del(NULL, inode); | ||
863 | |||
864 | goto out; | ||
865 | } | ||
866 | if (inode->i_nlink) | ||
867 | ext4_orphan_del(handle, inode); | ||
868 | if (ret > 0) { | ||
869 | loff_t end = offset + ret; | ||
870 | if (end > inode->i_size) { | ||
871 | ei->i_disksize = end; | ||
872 | i_size_write(inode, end); | ||
873 | /* | ||
874 | * We're going to return a positive `ret' | ||
875 | * here due to non-zero-length I/O, so there's | ||
876 | * no way of reporting error returns from | ||
877 | * ext4_mark_inode_dirty() to userspace. So | ||
878 | * ignore it. | ||
879 | */ | ||
880 | ext4_mark_inode_dirty(handle, inode); | ||
881 | } | ||
882 | } | ||
883 | err = ext4_journal_stop(handle); | ||
884 | if (ret == 0) | ||
885 | ret = err; | ||
886 | } | ||
887 | out: | ||
888 | return ret; | ||
889 | } | ||
890 | |||
891 | /* | ||
892 | * Calculate the number of metadata blocks need to reserve | ||
893 | * to allocate a new block at @lblocks for non extent file based file | ||
894 | */ | ||
895 | int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock) | ||
896 | { | ||
897 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
898 | sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1); | ||
899 | int blk_bits; | ||
900 | |||
901 | if (lblock < EXT4_NDIR_BLOCKS) | ||
902 | return 0; | ||
903 | |||
904 | lblock -= EXT4_NDIR_BLOCKS; | ||
905 | |||
906 | if (ei->i_da_metadata_calc_len && | ||
907 | (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) { | ||
908 | ei->i_da_metadata_calc_len++; | ||
909 | return 0; | ||
910 | } | ||
911 | ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; | ||
912 | ei->i_da_metadata_calc_len = 1; | ||
913 | blk_bits = order_base_2(lblock); | ||
914 | return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; | ||
915 | } | ||
916 | |||
917 | int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk) | ||
918 | { | ||
919 | int indirects; | ||
920 | |||
921 | /* if nrblocks are contiguous */ | ||
922 | if (chunk) { | ||
923 | /* | ||
924 | * With N contiguous data blocks, we need at most | ||
925 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, | ||
926 | * 2 dindirect blocks, and 1 tindirect block | ||
927 | */ | ||
928 | return DIV_ROUND_UP(nrblocks, | ||
929 | EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; | ||
930 | } | ||
931 | /* | ||
932 | * if nrblocks are not contiguous, worse case, each block touch | ||
933 | * a indirect block, and each indirect block touch a double indirect | ||
934 | * block, plus a triple indirect block | ||
935 | */ | ||
936 | indirects = nrblocks * 2 + 1; | ||
937 | return indirects; | ||
938 | } | ||
939 | |||
940 | /* | ||
941 | * Truncate transactions can be complex and absolutely huge. So we need to | ||
942 | * be able to restart the transaction at a conventient checkpoint to make | ||
943 | * sure we don't overflow the journal. | ||
944 | * | ||
945 | * start_transaction gets us a new handle for a truncate transaction, | ||
946 | * and extend_transaction tries to extend the existing one a bit. If | ||
947 | * extend fails, we need to propagate the failure up and restart the | ||
948 | * transaction in the top-level truncate loop. --sct | ||
949 | */ | ||
950 | static handle_t *start_transaction(struct inode *inode) | ||
951 | { | ||
952 | handle_t *result; | ||
953 | |||
954 | result = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)); | ||
955 | if (!IS_ERR(result)) | ||
956 | return result; | ||
957 | |||
958 | ext4_std_error(inode->i_sb, PTR_ERR(result)); | ||
959 | return result; | ||
960 | } | ||
961 | |||
962 | /* | ||
963 | * Try to extend this transaction for the purposes of truncation. | ||
964 | * | ||
965 | * Returns 0 if we managed to create more room. If we can't create more | ||
966 | * room, and the transaction must be restarted we return 1. | ||
967 | */ | ||
968 | static int try_to_extend_transaction(handle_t *handle, struct inode *inode) | ||
969 | { | ||
970 | if (!ext4_handle_valid(handle)) | ||
971 | return 0; | ||
972 | if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1)) | ||
973 | return 0; | ||
974 | if (!ext4_journal_extend(handle, ext4_blocks_for_truncate(inode))) | ||
975 | return 0; | ||
976 | return 1; | ||
977 | } | ||
978 | |||
979 | /* | ||
980 | * Probably it should be a library function... search for first non-zero word | ||
981 | * or memcmp with zero_page, whatever is better for particular architecture. | ||
982 | * Linus? | ||
983 | */ | ||
984 | static inline int all_zeroes(__le32 *p, __le32 *q) | ||
985 | { | ||
986 | while (p < q) | ||
987 | if (*p++) | ||
988 | return 0; | ||
989 | return 1; | ||
990 | } | ||
991 | |||
992 | /** | ||
993 | * ext4_find_shared - find the indirect blocks for partial truncation. | ||
994 | * @inode: inode in question | ||
995 | * @depth: depth of the affected branch | ||
996 | * @offsets: offsets of pointers in that branch (see ext4_block_to_path) | ||
997 | * @chain: place to store the pointers to partial indirect blocks | ||
998 | * @top: place to the (detached) top of branch | ||
999 | * | ||
1000 | * This is a helper function used by ext4_truncate(). | ||
1001 | * | ||
1002 | * When we do truncate() we may have to clean the ends of several | ||
1003 | * indirect blocks but leave the blocks themselves alive. Block is | ||
1004 | * partially truncated if some data below the new i_size is referred | ||
1005 | * from it (and it is on the path to the first completely truncated | ||
1006 | * data block, indeed). We have to free the top of that path along | ||
1007 | * with everything to the right of the path. Since no allocation | ||
1008 | * past the truncation point is possible until ext4_truncate() | ||
1009 | * finishes, we may safely do the latter, but top of branch may | ||
1010 | * require special attention - pageout below the truncation point | ||
1011 | * might try to populate it. | ||
1012 | * | ||
1013 | * We atomically detach the top of branch from the tree, store the | ||
1014 | * block number of its root in *@top, pointers to buffer_heads of | ||
1015 | * partially truncated blocks - in @chain[].bh and pointers to | ||
1016 | * their last elements that should not be removed - in | ||
1017 | * @chain[].p. Return value is the pointer to last filled element | ||
1018 | * of @chain. | ||
1019 | * | ||
1020 | * The work left to caller to do the actual freeing of subtrees: | ||
1021 | * a) free the subtree starting from *@top | ||
1022 | * b) free the subtrees whose roots are stored in | ||
1023 | * (@chain[i].p+1 .. end of @chain[i].bh->b_data) | ||
1024 | * c) free the subtrees growing from the inode past the @chain[0]. | ||
1025 | * (no partially truncated stuff there). */ | ||
1026 | |||
1027 | static Indirect *ext4_find_shared(struct inode *inode, int depth, | ||
1028 | ext4_lblk_t offsets[4], Indirect chain[4], | ||
1029 | __le32 *top) | ||
1030 | { | ||
1031 | Indirect *partial, *p; | ||
1032 | int k, err; | ||
1033 | |||
1034 | *top = 0; | ||
1035 | /* Make k index the deepest non-null offset + 1 */ | ||
1036 | for (k = depth; k > 1 && !offsets[k-1]; k--) | ||
1037 | ; | ||
1038 | partial = ext4_get_branch(inode, k, offsets, chain, &err); | ||
1039 | /* Writer: pointers */ | ||
1040 | if (!partial) | ||
1041 | partial = chain + k-1; | ||
1042 | /* | ||
1043 | * If the branch acquired continuation since we've looked at it - | ||
1044 | * fine, it should all survive and (new) top doesn't belong to us. | ||
1045 | */ | ||
1046 | if (!partial->key && *partial->p) | ||
1047 | /* Writer: end */ | ||
1048 | goto no_top; | ||
1049 | for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--) | ||
1050 | ; | ||
1051 | /* | ||
1052 | * OK, we've found the last block that must survive. The rest of our | ||
1053 | * branch should be detached before unlocking. However, if that rest | ||
1054 | * of branch is all ours and does not grow immediately from the inode | ||
1055 | * it's easier to cheat and just decrement partial->p. | ||
1056 | */ | ||
1057 | if (p == chain + k - 1 && p > chain) { | ||
1058 | p->p--; | ||
1059 | } else { | ||
1060 | *top = *p->p; | ||
1061 | /* Nope, don't do this in ext4. Must leave the tree intact */ | ||
1062 | #if 0 | ||
1063 | *p->p = 0; | ||
1064 | #endif | ||
1065 | } | ||
1066 | /* Writer: end */ | ||
1067 | |||
1068 | while (partial > p) { | ||
1069 | brelse(partial->bh); | ||
1070 | partial--; | ||
1071 | } | ||
1072 | no_top: | ||
1073 | return partial; | ||
1074 | } | ||
1075 | |||
1076 | /* | ||
1077 | * Zero a number of block pointers in either an inode or an indirect block. | ||
1078 | * If we restart the transaction we must again get write access to the | ||
1079 | * indirect block for further modification. | ||
1080 | * | ||
1081 | * We release `count' blocks on disk, but (last - first) may be greater | ||
1082 | * than `count' because there can be holes in there. | ||
1083 | * | ||
1084 | * Return 0 on success, 1 on invalid block range | ||
1085 | * and < 0 on fatal error. | ||
1086 | */ | ||
1087 | static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | ||
1088 | struct buffer_head *bh, | ||
1089 | ext4_fsblk_t block_to_free, | ||
1090 | unsigned long count, __le32 *first, | ||
1091 | __le32 *last) | ||
1092 | { | ||
1093 | __le32 *p; | ||
1094 | int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; | ||
1095 | int err; | ||
1096 | |||
1097 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
1098 | flags |= EXT4_FREE_BLOCKS_METADATA; | ||
1099 | |||
1100 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, | ||
1101 | count)) { | ||
1102 | EXT4_ERROR_INODE(inode, "attempt to clear invalid " | ||
1103 | "blocks %llu len %lu", | ||
1104 | (unsigned long long) block_to_free, count); | ||
1105 | return 1; | ||
1106 | } | ||
1107 | |||
1108 | if (try_to_extend_transaction(handle, inode)) { | ||
1109 | if (bh) { | ||
1110 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | ||
1111 | err = ext4_handle_dirty_metadata(handle, inode, bh); | ||
1112 | if (unlikely(err)) | ||
1113 | goto out_err; | ||
1114 | } | ||
1115 | err = ext4_mark_inode_dirty(handle, inode); | ||
1116 | if (unlikely(err)) | ||
1117 | goto out_err; | ||
1118 | err = ext4_truncate_restart_trans(handle, inode, | ||
1119 | ext4_blocks_for_truncate(inode)); | ||
1120 | if (unlikely(err)) | ||
1121 | goto out_err; | ||
1122 | if (bh) { | ||
1123 | BUFFER_TRACE(bh, "retaking write access"); | ||
1124 | err = ext4_journal_get_write_access(handle, bh); | ||
1125 | if (unlikely(err)) | ||
1126 | goto out_err; | ||
1127 | } | ||
1128 | } | ||
1129 | |||
1130 | for (p = first; p < last; p++) | ||
1131 | *p = 0; | ||
1132 | |||
1133 | ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags); | ||
1134 | return 0; | ||
1135 | out_err: | ||
1136 | ext4_std_error(inode->i_sb, err); | ||
1137 | return err; | ||
1138 | } | ||
1139 | |||
1140 | /** | ||
1141 | * ext4_free_data - free a list of data blocks | ||
1142 | * @handle: handle for this transaction | ||
1143 | * @inode: inode we are dealing with | ||
1144 | * @this_bh: indirect buffer_head which contains *@first and *@last | ||
1145 | * @first: array of block numbers | ||
1146 | * @last: points immediately past the end of array | ||
1147 | * | ||
1148 | * We are freeing all blocks referred from that array (numbers are stored as | ||
1149 | * little-endian 32-bit) and updating @inode->i_blocks appropriately. | ||
1150 | * | ||
1151 | * We accumulate contiguous runs of blocks to free. Conveniently, if these | ||
1152 | * blocks are contiguous then releasing them at one time will only affect one | ||
1153 | * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't | ||
1154 | * actually use a lot of journal space. | ||
1155 | * | ||
1156 | * @this_bh will be %NULL if @first and @last point into the inode's direct | ||
1157 | * block pointers. | ||
1158 | */ | ||
1159 | static void ext4_free_data(handle_t *handle, struct inode *inode, | ||
1160 | struct buffer_head *this_bh, | ||
1161 | __le32 *first, __le32 *last) | ||
1162 | { | ||
1163 | ext4_fsblk_t block_to_free = 0; /* Starting block # of a run */ | ||
1164 | unsigned long count = 0; /* Number of blocks in the run */ | ||
1165 | __le32 *block_to_free_p = NULL; /* Pointer into inode/ind | ||
1166 | corresponding to | ||
1167 | block_to_free */ | ||
1168 | ext4_fsblk_t nr; /* Current block # */ | ||
1169 | __le32 *p; /* Pointer into inode/ind | ||
1170 | for current block */ | ||
1171 | int err = 0; | ||
1172 | |||
1173 | if (this_bh) { /* For indirect block */ | ||
1174 | BUFFER_TRACE(this_bh, "get_write_access"); | ||
1175 | err = ext4_journal_get_write_access(handle, this_bh); | ||
1176 | /* Important: if we can't update the indirect pointers | ||
1177 | * to the blocks, we can't free them. */ | ||
1178 | if (err) | ||
1179 | return; | ||
1180 | } | ||
1181 | |||
1182 | for (p = first; p < last; p++) { | ||
1183 | nr = le32_to_cpu(*p); | ||
1184 | if (nr) { | ||
1185 | /* accumulate blocks to free if they're contiguous */ | ||
1186 | if (count == 0) { | ||
1187 | block_to_free = nr; | ||
1188 | block_to_free_p = p; | ||
1189 | count = 1; | ||
1190 | } else if (nr == block_to_free + count) { | ||
1191 | count++; | ||
1192 | } else { | ||
1193 | err = ext4_clear_blocks(handle, inode, this_bh, | ||
1194 | block_to_free, count, | ||
1195 | block_to_free_p, p); | ||
1196 | if (err) | ||
1197 | break; | ||
1198 | block_to_free = nr; | ||
1199 | block_to_free_p = p; | ||
1200 | count = 1; | ||
1201 | } | ||
1202 | } | ||
1203 | } | ||
1204 | |||
1205 | if (!err && count > 0) | ||
1206 | err = ext4_clear_blocks(handle, inode, this_bh, block_to_free, | ||
1207 | count, block_to_free_p, p); | ||
1208 | if (err < 0) | ||
1209 | /* fatal error */ | ||
1210 | return; | ||
1211 | |||
1212 | if (this_bh) { | ||
1213 | BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); | ||
1214 | |||
1215 | /* | ||
1216 | * The buffer head should have an attached journal head at this | ||
1217 | * point. However, if the data is corrupted and an indirect | ||
1218 | * block pointed to itself, it would have been detached when | ||
1219 | * the block was cleared. Check for this instead of OOPSing. | ||
1220 | */ | ||
1221 | if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) | ||
1222 | ext4_handle_dirty_metadata(handle, inode, this_bh); | ||
1223 | else | ||
1224 | EXT4_ERROR_INODE(inode, | ||
1225 | "circular indirect block detected at " | ||
1226 | "block %llu", | ||
1227 | (unsigned long long) this_bh->b_blocknr); | ||
1228 | } | ||
1229 | } | ||
1230 | |||
1231 | /** | ||
1232 | * ext4_free_branches - free an array of branches | ||
1233 | * @handle: JBD handle for this transaction | ||
1234 | * @inode: inode we are dealing with | ||
1235 | * @parent_bh: the buffer_head which contains *@first and *@last | ||
1236 | * @first: array of block numbers | ||
1237 | * @last: pointer immediately past the end of array | ||
1238 | * @depth: depth of the branches to free | ||
1239 | * | ||
1240 | * We are freeing all blocks referred from these branches (numbers are | ||
1241 | * stored as little-endian 32-bit) and updating @inode->i_blocks | ||
1242 | * appropriately. | ||
1243 | */ | ||
1244 | static void ext4_free_branches(handle_t *handle, struct inode *inode, | ||
1245 | struct buffer_head *parent_bh, | ||
1246 | __le32 *first, __le32 *last, int depth) | ||
1247 | { | ||
1248 | ext4_fsblk_t nr; | ||
1249 | __le32 *p; | ||
1250 | |||
1251 | if (ext4_handle_is_aborted(handle)) | ||
1252 | return; | ||
1253 | |||
1254 | if (depth--) { | ||
1255 | struct buffer_head *bh; | ||
1256 | int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); | ||
1257 | p = last; | ||
1258 | while (--p >= first) { | ||
1259 | nr = le32_to_cpu(*p); | ||
1260 | if (!nr) | ||
1261 | continue; /* A hole */ | ||
1262 | |||
1263 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), | ||
1264 | nr, 1)) { | ||
1265 | EXT4_ERROR_INODE(inode, | ||
1266 | "invalid indirect mapped " | ||
1267 | "block %lu (level %d)", | ||
1268 | (unsigned long) nr, depth); | ||
1269 | break; | ||
1270 | } | ||
1271 | |||
1272 | /* Go read the buffer for the next level down */ | ||
1273 | bh = sb_bread(inode->i_sb, nr); | ||
1274 | |||
1275 | /* | ||
1276 | * A read failure? Report error and clear slot | ||
1277 | * (should be rare). | ||
1278 | */ | ||
1279 | if (!bh) { | ||
1280 | EXT4_ERROR_INODE_BLOCK(inode, nr, | ||
1281 | "Read failure"); | ||
1282 | continue; | ||
1283 | } | ||
1284 | |||
1285 | /* This zaps the entire block. Bottom up. */ | ||
1286 | BUFFER_TRACE(bh, "free child branches"); | ||
1287 | ext4_free_branches(handle, inode, bh, | ||
1288 | (__le32 *) bh->b_data, | ||
1289 | (__le32 *) bh->b_data + addr_per_block, | ||
1290 | depth); | ||
1291 | brelse(bh); | ||
1292 | |||
1293 | /* | ||
1294 | * Everything below this this pointer has been | ||
1295 | * released. Now let this top-of-subtree go. | ||
1296 | * | ||
1297 | * We want the freeing of this indirect block to be | ||
1298 | * atomic in the journal with the updating of the | ||
1299 | * bitmap block which owns it. So make some room in | ||
1300 | * the journal. | ||
1301 | * | ||
1302 | * We zero the parent pointer *after* freeing its | ||
1303 | * pointee in the bitmaps, so if extend_transaction() | ||
1304 | * for some reason fails to put the bitmap changes and | ||
1305 | * the release into the same transaction, recovery | ||
1306 | * will merely complain about releasing a free block, | ||
1307 | * rather than leaking blocks. | ||
1308 | */ | ||
1309 | if (ext4_handle_is_aborted(handle)) | ||
1310 | return; | ||
1311 | if (try_to_extend_transaction(handle, inode)) { | ||
1312 | ext4_mark_inode_dirty(handle, inode); | ||
1313 | ext4_truncate_restart_trans(handle, inode, | ||
1314 | ext4_blocks_for_truncate(inode)); | ||
1315 | } | ||
1316 | |||
1317 | /* | ||
1318 | * The forget flag here is critical because if | ||
1319 | * we are journaling (and not doing data | ||
1320 | * journaling), we have to make sure a revoke | ||
1321 | * record is written to prevent the journal | ||
1322 | * replay from overwriting the (former) | ||
1323 | * indirect block if it gets reallocated as a | ||
1324 | * data block. This must happen in the same | ||
1325 | * transaction where the data blocks are | ||
1326 | * actually freed. | ||
1327 | */ | ||
1328 | ext4_free_blocks(handle, inode, NULL, nr, 1, | ||
1329 | EXT4_FREE_BLOCKS_METADATA| | ||
1330 | EXT4_FREE_BLOCKS_FORGET); | ||
1331 | |||
1332 | if (parent_bh) { | ||
1333 | /* | ||
1334 | * The block which we have just freed is | ||
1335 | * pointed to by an indirect block: journal it | ||
1336 | */ | ||
1337 | BUFFER_TRACE(parent_bh, "get_write_access"); | ||
1338 | if (!ext4_journal_get_write_access(handle, | ||
1339 | parent_bh)){ | ||
1340 | *p = 0; | ||
1341 | BUFFER_TRACE(parent_bh, | ||
1342 | "call ext4_handle_dirty_metadata"); | ||
1343 | ext4_handle_dirty_metadata(handle, | ||
1344 | inode, | ||
1345 | parent_bh); | ||
1346 | } | ||
1347 | } | ||
1348 | } | ||
1349 | } else { | ||
1350 | /* We have reached the bottom of the tree. */ | ||
1351 | BUFFER_TRACE(parent_bh, "free data blocks"); | ||
1352 | ext4_free_data(handle, inode, parent_bh, first, last); | ||
1353 | } | ||
1354 | } | ||
1355 | |||
1356 | void ext4_ind_truncate(struct inode *inode) | ||
1357 | { | ||
1358 | handle_t *handle; | ||
1359 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
1360 | __le32 *i_data = ei->i_data; | ||
1361 | int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); | ||
1362 | struct address_space *mapping = inode->i_mapping; | ||
1363 | ext4_lblk_t offsets[4]; | ||
1364 | Indirect chain[4]; | ||
1365 | Indirect *partial; | ||
1366 | __le32 nr = 0; | ||
1367 | int n = 0; | ||
1368 | ext4_lblk_t last_block, max_block; | ||
1369 | unsigned blocksize = inode->i_sb->s_blocksize; | ||
1370 | |||
1371 | handle = start_transaction(inode); | ||
1372 | if (IS_ERR(handle)) | ||
1373 | return; /* AKPM: return what? */ | ||
1374 | |||
1375 | last_block = (inode->i_size + blocksize-1) | ||
1376 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); | ||
1377 | max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) | ||
1378 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); | ||
1379 | |||
1380 | if (inode->i_size & (blocksize - 1)) | ||
1381 | if (ext4_block_truncate_page(handle, mapping, inode->i_size)) | ||
1382 | goto out_stop; | ||
1383 | |||
1384 | if (last_block != max_block) { | ||
1385 | n = ext4_block_to_path(inode, last_block, offsets, NULL); | ||
1386 | if (n == 0) | ||
1387 | goto out_stop; /* error */ | ||
1388 | } | ||
1389 | |||
1390 | /* | ||
1391 | * OK. This truncate is going to happen. We add the inode to the | ||
1392 | * orphan list, so that if this truncate spans multiple transactions, | ||
1393 | * and we crash, we will resume the truncate when the filesystem | ||
1394 | * recovers. It also marks the inode dirty, to catch the new size. | ||
1395 | * | ||
1396 | * Implication: the file must always be in a sane, consistent | ||
1397 | * truncatable state while each transaction commits. | ||
1398 | */ | ||
1399 | if (ext4_orphan_add(handle, inode)) | ||
1400 | goto out_stop; | ||
1401 | |||
1402 | /* | ||
1403 | * From here we block out all ext4_get_block() callers who want to | ||
1404 | * modify the block allocation tree. | ||
1405 | */ | ||
1406 | down_write(&ei->i_data_sem); | ||
1407 | |||
1408 | ext4_discard_preallocations(inode); | ||
1409 | |||
1410 | /* | ||
1411 | * The orphan list entry will now protect us from any crash which | ||
1412 | * occurs before the truncate completes, so it is now safe to propagate | ||
1413 | * the new, shorter inode size (held for now in i_size) into the | ||
1414 | * on-disk inode. We do this via i_disksize, which is the value which | ||
1415 | * ext4 *really* writes onto the disk inode. | ||
1416 | */ | ||
1417 | ei->i_disksize = inode->i_size; | ||
1418 | |||
1419 | if (last_block == max_block) { | ||
1420 | /* | ||
1421 | * It is unnecessary to free any data blocks if last_block is | ||
1422 | * equal to the indirect block limit. | ||
1423 | */ | ||
1424 | goto out_unlock; | ||
1425 | } else if (n == 1) { /* direct blocks */ | ||
1426 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], | ||
1427 | i_data + EXT4_NDIR_BLOCKS); | ||
1428 | goto do_indirects; | ||
1429 | } | ||
1430 | |||
1431 | partial = ext4_find_shared(inode, n, offsets, chain, &nr); | ||
1432 | /* Kill the top of shared branch (not detached) */ | ||
1433 | if (nr) { | ||
1434 | if (partial == chain) { | ||
1435 | /* Shared branch grows from the inode */ | ||
1436 | ext4_free_branches(handle, inode, NULL, | ||
1437 | &nr, &nr+1, (chain+n-1) - partial); | ||
1438 | *partial->p = 0; | ||
1439 | /* | ||
1440 | * We mark the inode dirty prior to restart, | ||
1441 | * and prior to stop. No need for it here. | ||
1442 | */ | ||
1443 | } else { | ||
1444 | /* Shared branch grows from an indirect block */ | ||
1445 | BUFFER_TRACE(partial->bh, "get_write_access"); | ||
1446 | ext4_free_branches(handle, inode, partial->bh, | ||
1447 | partial->p, | ||
1448 | partial->p+1, (chain+n-1) - partial); | ||
1449 | } | ||
1450 | } | ||
1451 | /* Clear the ends of indirect blocks on the shared branch */ | ||
1452 | while (partial > chain) { | ||
1453 | ext4_free_branches(handle, inode, partial->bh, partial->p + 1, | ||
1454 | (__le32*)partial->bh->b_data+addr_per_block, | ||
1455 | (chain+n-1) - partial); | ||
1456 | BUFFER_TRACE(partial->bh, "call brelse"); | ||
1457 | brelse(partial->bh); | ||
1458 | partial--; | ||
1459 | } | ||
1460 | do_indirects: | ||
1461 | /* Kill the remaining (whole) subtrees */ | ||
1462 | switch (offsets[0]) { | ||
1463 | default: | ||
1464 | nr = i_data[EXT4_IND_BLOCK]; | ||
1465 | if (nr) { | ||
1466 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); | ||
1467 | i_data[EXT4_IND_BLOCK] = 0; | ||
1468 | } | ||
1469 | case EXT4_IND_BLOCK: | ||
1470 | nr = i_data[EXT4_DIND_BLOCK]; | ||
1471 | if (nr) { | ||
1472 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); | ||
1473 | i_data[EXT4_DIND_BLOCK] = 0; | ||
1474 | } | ||
1475 | case EXT4_DIND_BLOCK: | ||
1476 | nr = i_data[EXT4_TIND_BLOCK]; | ||
1477 | if (nr) { | ||
1478 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); | ||
1479 | i_data[EXT4_TIND_BLOCK] = 0; | ||
1480 | } | ||
1481 | case EXT4_TIND_BLOCK: | ||
1482 | ; | ||
1483 | } | ||
1484 | |||
1485 | out_unlock: | ||
1486 | up_write(&ei->i_data_sem); | ||
1487 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | ||
1488 | ext4_mark_inode_dirty(handle, inode); | ||
1489 | |||
1490 | /* | ||
1491 | * In a multi-transaction truncate, we only make the final transaction | ||
1492 | * synchronous | ||
1493 | */ | ||
1494 | if (IS_SYNC(inode)) | ||
1495 | ext4_handle_sync(handle); | ||
1496 | out_stop: | ||
1497 | /* | ||
1498 | * If this was a simple ftruncate(), and the file will remain alive | ||
1499 | * then we need to clear up the orphan record which we created above. | ||
1500 | * However, if this was a real unlink then we were called by | ||
1501 | * ext4_delete_inode(), and we allow that function to clean up the | ||
1502 | * orphan info for us. | ||
1503 | */ | ||
1504 | if (inode->i_nlink) | ||
1505 | ext4_orphan_del(handle, inode); | ||
1506 | |||
1507 | ext4_journal_stop(handle); | ||
1508 | trace_ext4_truncate_exit(inode); | ||
1509 | } | ||
1510 | |||
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9b82ac7b0f55..de50b16a8f67 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -12,10 +12,6 @@ | |||
12 | * | 12 | * |
13 | * Copyright (C) 1991, 1992 Linus Torvalds | 13 | * Copyright (C) 1991, 1992 Linus Torvalds |
14 | * | 14 | * |
15 | * Goal-directed block allocation by Stephen Tweedie | ||
16 | * (sct@redhat.com), 1993, 1998 | ||
17 | * Big-endian to little-endian byte-swapping/bitmaps by | ||
18 | * David S. Miller (davem@caip.rutgers.edu), 1995 | ||
19 | * 64-bit file support on 64-bit platforms by Jakub Jelinek | 15 | * 64-bit file support on 64-bit platforms by Jakub Jelinek |
20 | * (jj@sunsite.ms.mff.cuni.cz) | 16 | * (jj@sunsite.ms.mff.cuni.cz) |
21 | * | 17 | * |
@@ -90,45 +86,6 @@ static int ext4_inode_is_fast_symlink(struct inode *inode) | |||
90 | } | 86 | } |
91 | 87 | ||
92 | /* | 88 | /* |
93 | * Truncate transactions can be complex and absolutely huge. So we need to | ||
94 | * be able to restart the transaction at a conventient checkpoint to make | ||
95 | * sure we don't overflow the journal. | ||
96 | * | ||
97 | * start_transaction gets us a new handle for a truncate transaction, | ||
98 | * and extend_transaction tries to extend the existing one a bit. If | ||
99 | * extend fails, we need to propagate the failure up and restart the | ||
100 | * transaction in the top-level truncate loop. --sct | ||
101 | */ | ||
102 | static handle_t *start_transaction(struct inode *inode) | ||
103 | { | ||
104 | handle_t *result; | ||
105 | |||
106 | result = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)); | ||
107 | if (!IS_ERR(result)) | ||
108 | return result; | ||
109 | |||
110 | ext4_std_error(inode->i_sb, PTR_ERR(result)); | ||
111 | return result; | ||
112 | } | ||
113 | |||
114 | /* | ||
115 | * Try to extend this transaction for the purposes of truncation. | ||
116 | * | ||
117 | * Returns 0 if we managed to create more room. If we can't create more | ||
118 | * room, and the transaction must be restarted we return 1. | ||
119 | */ | ||
120 | static int try_to_extend_transaction(handle_t *handle, struct inode *inode) | ||
121 | { | ||
122 | if (!ext4_handle_valid(handle)) | ||
123 | return 0; | ||
124 | if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1)) | ||
125 | return 0; | ||
126 | if (!ext4_journal_extend(handle, ext4_blocks_for_truncate(inode))) | ||
127 | return 0; | ||
128 | return 1; | ||
129 | } | ||
130 | |||
131 | /* | ||
132 | * Restart the transaction associated with *handle. This does a commit, | 89 | * Restart the transaction associated with *handle. This does a commit, |
133 | * so before we call here everything must be consistently dirtied against | 90 | * so before we call here everything must be consistently dirtied against |
134 | * this transaction. | 91 | * this transaction. |
@@ -251,760 +208,6 @@ no_delete: | |||
251 | ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ | 208 | ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ |
252 | } | 209 | } |
253 | 210 | ||
254 | typedef struct { | ||
255 | __le32 *p; | ||
256 | __le32 key; | ||
257 | struct buffer_head *bh; | ||
258 | } Indirect; | ||
259 | |||
260 | static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) | ||
261 | { | ||
262 | p->key = *(p->p = v); | ||
263 | p->bh = bh; | ||
264 | } | ||
265 | |||
266 | /** | ||
267 | * ext4_block_to_path - parse the block number into array of offsets | ||
268 | * @inode: inode in question (we are only interested in its superblock) | ||
269 | * @i_block: block number to be parsed | ||
270 | * @offsets: array to store the offsets in | ||
271 | * @boundary: set this non-zero if the referred-to block is likely to be | ||
272 | * followed (on disk) by an indirect block. | ||
273 | * | ||
274 | * To store the locations of file's data ext4 uses a data structure common | ||
275 | * for UNIX filesystems - tree of pointers anchored in the inode, with | ||
276 | * data blocks at leaves and indirect blocks in intermediate nodes. | ||
277 | * This function translates the block number into path in that tree - | ||
278 | * return value is the path length and @offsets[n] is the offset of | ||
279 | * pointer to (n+1)th node in the nth one. If @block is out of range | ||
280 | * (negative or too large) warning is printed and zero returned. | ||
281 | * | ||
282 | * Note: function doesn't find node addresses, so no IO is needed. All | ||
283 | * we need to know is the capacity of indirect blocks (taken from the | ||
284 | * inode->i_sb). | ||
285 | */ | ||
286 | |||
287 | /* | ||
288 | * Portability note: the last comparison (check that we fit into triple | ||
289 | * indirect block) is spelled differently, because otherwise on an | ||
290 | * architecture with 32-bit longs and 8Kb pages we might get into trouble | ||
291 | * if our filesystem had 8Kb blocks. We might use long long, but that would | ||
292 | * kill us on x86. Oh, well, at least the sign propagation does not matter - | ||
293 | * i_block would have to be negative in the very beginning, so we would not | ||
294 | * get there at all. | ||
295 | */ | ||
296 | |||
297 | static int ext4_block_to_path(struct inode *inode, | ||
298 | ext4_lblk_t i_block, | ||
299 | ext4_lblk_t offsets[4], int *boundary) | ||
300 | { | ||
301 | int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); | ||
302 | int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); | ||
303 | const long direct_blocks = EXT4_NDIR_BLOCKS, | ||
304 | indirect_blocks = ptrs, | ||
305 | double_blocks = (1 << (ptrs_bits * 2)); | ||
306 | int n = 0; | ||
307 | int final = 0; | ||
308 | |||
309 | if (i_block < direct_blocks) { | ||
310 | offsets[n++] = i_block; | ||
311 | final = direct_blocks; | ||
312 | } else if ((i_block -= direct_blocks) < indirect_blocks) { | ||
313 | offsets[n++] = EXT4_IND_BLOCK; | ||
314 | offsets[n++] = i_block; | ||
315 | final = ptrs; | ||
316 | } else if ((i_block -= indirect_blocks) < double_blocks) { | ||
317 | offsets[n++] = EXT4_DIND_BLOCK; | ||
318 | offsets[n++] = i_block >> ptrs_bits; | ||
319 | offsets[n++] = i_block & (ptrs - 1); | ||
320 | final = ptrs; | ||
321 | } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { | ||
322 | offsets[n++] = EXT4_TIND_BLOCK; | ||
323 | offsets[n++] = i_block >> (ptrs_bits * 2); | ||
324 | offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); | ||
325 | offsets[n++] = i_block & (ptrs - 1); | ||
326 | final = ptrs; | ||
327 | } else { | ||
328 | ext4_warning(inode->i_sb, "block %lu > max in inode %lu", | ||
329 | i_block + direct_blocks + | ||
330 | indirect_blocks + double_blocks, inode->i_ino); | ||
331 | } | ||
332 | if (boundary) | ||
333 | *boundary = final - 1 - (i_block & (ptrs - 1)); | ||
334 | return n; | ||
335 | } | ||
336 | |||
337 | /** | ||
338 | * ext4_get_branch - read the chain of indirect blocks leading to data | ||
339 | * @inode: inode in question | ||
340 | * @depth: depth of the chain (1 - direct pointer, etc.) | ||
341 | * @offsets: offsets of pointers in inode/indirect blocks | ||
342 | * @chain: place to store the result | ||
343 | * @err: here we store the error value | ||
344 | * | ||
345 | * Function fills the array of triples <key, p, bh> and returns %NULL | ||
346 | * if everything went OK or the pointer to the last filled triple | ||
347 | * (incomplete one) otherwise. Upon the return chain[i].key contains | ||
348 | * the number of (i+1)-th block in the chain (as it is stored in memory, | ||
349 | * i.e. little-endian 32-bit), chain[i].p contains the address of that | ||
350 | * number (it points into struct inode for i==0 and into the bh->b_data | ||
351 | * for i>0) and chain[i].bh points to the buffer_head of i-th indirect | ||
352 | * block for i>0 and NULL for i==0. In other words, it holds the block | ||
353 | * numbers of the chain, addresses they were taken from (and where we can | ||
354 | * verify that chain did not change) and buffer_heads hosting these | ||
355 | * numbers. | ||
356 | * | ||
357 | * Function stops when it stumbles upon zero pointer (absent block) | ||
358 | * (pointer to last triple returned, *@err == 0) | ||
359 | * or when it gets an IO error reading an indirect block | ||
360 | * (ditto, *@err == -EIO) | ||
361 | * or when it reads all @depth-1 indirect blocks successfully and finds | ||
362 | * the whole chain, all way to the data (returns %NULL, *err == 0). | ||
363 | * | ||
364 | * Need to be called with | ||
365 | * down_read(&EXT4_I(inode)->i_data_sem) | ||
366 | */ | ||
367 | static Indirect *ext4_get_branch(struct inode *inode, int depth, | ||
368 | ext4_lblk_t *offsets, | ||
369 | Indirect chain[4], int *err) | ||
370 | { | ||
371 | struct super_block *sb = inode->i_sb; | ||
372 | Indirect *p = chain; | ||
373 | struct buffer_head *bh; | ||
374 | |||
375 | *err = 0; | ||
376 | /* i_data is not going away, no lock needed */ | ||
377 | add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets); | ||
378 | if (!p->key) | ||
379 | goto no_block; | ||
380 | while (--depth) { | ||
381 | bh = sb_getblk(sb, le32_to_cpu(p->key)); | ||
382 | if (unlikely(!bh)) | ||
383 | goto failure; | ||
384 | |||
385 | if (!bh_uptodate_or_lock(bh)) { | ||
386 | if (bh_submit_read(bh) < 0) { | ||
387 | put_bh(bh); | ||
388 | goto failure; | ||
389 | } | ||
390 | /* validate block references */ | ||
391 | if (ext4_check_indirect_blockref(inode, bh)) { | ||
392 | put_bh(bh); | ||
393 | goto failure; | ||
394 | } | ||
395 | } | ||
396 | |||
397 | add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); | ||
398 | /* Reader: end */ | ||
399 | if (!p->key) | ||
400 | goto no_block; | ||
401 | } | ||
402 | return NULL; | ||
403 | |||
404 | failure: | ||
405 | *err = -EIO; | ||
406 | no_block: | ||
407 | return p; | ||
408 | } | ||
409 | |||
410 | /** | ||
411 | * ext4_find_near - find a place for allocation with sufficient locality | ||
412 | * @inode: owner | ||
413 | * @ind: descriptor of indirect block. | ||
414 | * | ||
415 | * This function returns the preferred place for block allocation. | ||
416 | * It is used when heuristic for sequential allocation fails. | ||
417 | * Rules are: | ||
418 | * + if there is a block to the left of our position - allocate near it. | ||
419 | * + if pointer will live in indirect block - allocate near that block. | ||
420 | * + if pointer will live in inode - allocate in the same | ||
421 | * cylinder group. | ||
422 | * | ||
423 | * In the latter case we colour the starting block by the callers PID to | ||
424 | * prevent it from clashing with concurrent allocations for a different inode | ||
425 | * in the same block group. The PID is used here so that functionally related | ||
426 | * files will be close-by on-disk. | ||
427 | * | ||
428 | * Caller must make sure that @ind is valid and will stay that way. | ||
429 | */ | ||
430 | static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | ||
431 | { | ||
432 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
433 | __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; | ||
434 | __le32 *p; | ||
435 | ext4_fsblk_t bg_start; | ||
436 | ext4_fsblk_t last_block; | ||
437 | ext4_grpblk_t colour; | ||
438 | ext4_group_t block_group; | ||
439 | int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); | ||
440 | |||
441 | /* Try to find previous block */ | ||
442 | for (p = ind->p - 1; p >= start; p--) { | ||
443 | if (*p) | ||
444 | return le32_to_cpu(*p); | ||
445 | } | ||
446 | |||
447 | /* No such thing, so let's try location of indirect block */ | ||
448 | if (ind->bh) | ||
449 | return ind->bh->b_blocknr; | ||
450 | |||
451 | /* | ||
452 | * It is going to be referred to from the inode itself? OK, just put it | ||
453 | * into the same cylinder group then. | ||
454 | */ | ||
455 | block_group = ei->i_block_group; | ||
456 | if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { | ||
457 | block_group &= ~(flex_size-1); | ||
458 | if (S_ISREG(inode->i_mode)) | ||
459 | block_group++; | ||
460 | } | ||
461 | bg_start = ext4_group_first_block_no(inode->i_sb, block_group); | ||
462 | last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; | ||
463 | |||
464 | /* | ||
465 | * If we are doing delayed allocation, we don't need take | ||
466 | * colour into account. | ||
467 | */ | ||
468 | if (test_opt(inode->i_sb, DELALLOC)) | ||
469 | return bg_start; | ||
470 | |||
471 | if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) | ||
472 | colour = (current->pid % 16) * | ||
473 | (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); | ||
474 | else | ||
475 | colour = (current->pid % 16) * ((last_block - bg_start) / 16); | ||
476 | return bg_start + colour; | ||
477 | } | ||
478 | |||
479 | /** | ||
480 | * ext4_find_goal - find a preferred place for allocation. | ||
481 | * @inode: owner | ||
482 | * @block: block we want | ||
483 | * @partial: pointer to the last triple within a chain | ||
484 | * | ||
485 | * Normally this function find the preferred place for block allocation, | ||
486 | * returns it. | ||
487 | * Because this is only used for non-extent files, we limit the block nr | ||
488 | * to 32 bits. | ||
489 | */ | ||
490 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, | ||
491 | Indirect *partial) | ||
492 | { | ||
493 | ext4_fsblk_t goal; | ||
494 | |||
495 | /* | ||
496 | * XXX need to get goal block from mballoc's data structures | ||
497 | */ | ||
498 | |||
499 | goal = ext4_find_near(inode, partial); | ||
500 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; | ||
501 | return goal; | ||
502 | } | ||
503 | |||
504 | /** | ||
505 | * ext4_blks_to_allocate - Look up the block map and count the number | ||
506 | * of direct blocks need to be allocated for the given branch. | ||
507 | * | ||
508 | * @branch: chain of indirect blocks | ||
509 | * @k: number of blocks need for indirect blocks | ||
510 | * @blks: number of data blocks to be mapped. | ||
511 | * @blocks_to_boundary: the offset in the indirect block | ||
512 | * | ||
513 | * return the total number of blocks to be allocate, including the | ||
514 | * direct and indirect blocks. | ||
515 | */ | ||
516 | static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, | ||
517 | int blocks_to_boundary) | ||
518 | { | ||
519 | unsigned int count = 0; | ||
520 | |||
521 | /* | ||
522 | * Simple case, [t,d]Indirect block(s) has not allocated yet | ||
523 | * then it's clear blocks on that path have not allocated | ||
524 | */ | ||
525 | if (k > 0) { | ||
526 | /* right now we don't handle cross boundary allocation */ | ||
527 | if (blks < blocks_to_boundary + 1) | ||
528 | count += blks; | ||
529 | else | ||
530 | count += blocks_to_boundary + 1; | ||
531 | return count; | ||
532 | } | ||
533 | |||
534 | count++; | ||
535 | while (count < blks && count <= blocks_to_boundary && | ||
536 | le32_to_cpu(*(branch[0].p + count)) == 0) { | ||
537 | count++; | ||
538 | } | ||
539 | return count; | ||
540 | } | ||
541 | |||
542 | /** | ||
543 | * ext4_alloc_blocks: multiple allocate blocks needed for a branch | ||
544 | * @handle: handle for this transaction | ||
545 | * @inode: inode which needs allocated blocks | ||
546 | * @iblock: the logical block to start allocated at | ||
547 | * @goal: preferred physical block of allocation | ||
548 | * @indirect_blks: the number of blocks need to allocate for indirect | ||
549 | * blocks | ||
550 | * @blks: number of desired blocks | ||
551 | * @new_blocks: on return it will store the new block numbers for | ||
552 | * the indirect blocks(if needed) and the first direct block, | ||
553 | * @err: on return it will store the error code | ||
554 | * | ||
555 | * This function will return the number of blocks allocated as | ||
556 | * requested by the passed-in parameters. | ||
557 | */ | ||
558 | static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | ||
559 | ext4_lblk_t iblock, ext4_fsblk_t goal, | ||
560 | int indirect_blks, int blks, | ||
561 | ext4_fsblk_t new_blocks[4], int *err) | ||
562 | { | ||
563 | struct ext4_allocation_request ar; | ||
564 | int target, i; | ||
565 | unsigned long count = 0, blk_allocated = 0; | ||
566 | int index = 0; | ||
567 | ext4_fsblk_t current_block = 0; | ||
568 | int ret = 0; | ||
569 | |||
570 | /* | ||
571 | * Here we try to allocate the requested multiple blocks at once, | ||
572 | * on a best-effort basis. | ||
573 | * To build a branch, we should allocate blocks for | ||
574 | * the indirect blocks(if not allocated yet), and at least | ||
575 | * the first direct block of this branch. That's the | ||
576 | * minimum number of blocks need to allocate(required) | ||
577 | */ | ||
578 | /* first we try to allocate the indirect blocks */ | ||
579 | target = indirect_blks; | ||
580 | while (target > 0) { | ||
581 | count = target; | ||
582 | /* allocating blocks for indirect blocks and direct blocks */ | ||
583 | current_block = ext4_new_meta_blocks(handle, inode, goal, | ||
584 | 0, &count, err); | ||
585 | if (*err) | ||
586 | goto failed_out; | ||
587 | |||
588 | if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) { | ||
589 | EXT4_ERROR_INODE(inode, | ||
590 | "current_block %llu + count %lu > %d!", | ||
591 | current_block, count, | ||
592 | EXT4_MAX_BLOCK_FILE_PHYS); | ||
593 | *err = -EIO; | ||
594 | goto failed_out; | ||
595 | } | ||
596 | |||
597 | target -= count; | ||
598 | /* allocate blocks for indirect blocks */ | ||
599 | while (index < indirect_blks && count) { | ||
600 | new_blocks[index++] = current_block++; | ||
601 | count--; | ||
602 | } | ||
603 | if (count > 0) { | ||
604 | /* | ||
605 | * save the new block number | ||
606 | * for the first direct block | ||
607 | */ | ||
608 | new_blocks[index] = current_block; | ||
609 | printk(KERN_INFO "%s returned more blocks than " | ||
610 | "requested\n", __func__); | ||
611 | WARN_ON(1); | ||
612 | break; | ||
613 | } | ||
614 | } | ||
615 | |||
616 | target = blks - count ; | ||
617 | blk_allocated = count; | ||
618 | if (!target) | ||
619 | goto allocated; | ||
620 | /* Now allocate data blocks */ | ||
621 | memset(&ar, 0, sizeof(ar)); | ||
622 | ar.inode = inode; | ||
623 | ar.goal = goal; | ||
624 | ar.len = target; | ||
625 | ar.logical = iblock; | ||
626 | if (S_ISREG(inode->i_mode)) | ||
627 | /* enable in-core preallocation only for regular files */ | ||
628 | ar.flags = EXT4_MB_HINT_DATA; | ||
629 | |||
630 | current_block = ext4_mb_new_blocks(handle, &ar, err); | ||
631 | if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) { | ||
632 | EXT4_ERROR_INODE(inode, | ||
633 | "current_block %llu + ar.len %d > %d!", | ||
634 | current_block, ar.len, | ||
635 | EXT4_MAX_BLOCK_FILE_PHYS); | ||
636 | *err = -EIO; | ||
637 | goto failed_out; | ||
638 | } | ||
639 | |||
640 | if (*err && (target == blks)) { | ||
641 | /* | ||
642 | * if the allocation failed and we didn't allocate | ||
643 | * any blocks before | ||
644 | */ | ||
645 | goto failed_out; | ||
646 | } | ||
647 | if (!*err) { | ||
648 | if (target == blks) { | ||
649 | /* | ||
650 | * save the new block number | ||
651 | * for the first direct block | ||
652 | */ | ||
653 | new_blocks[index] = current_block; | ||
654 | } | ||
655 | blk_allocated += ar.len; | ||
656 | } | ||
657 | allocated: | ||
658 | /* total number of blocks allocated for direct blocks */ | ||
659 | ret = blk_allocated; | ||
660 | *err = 0; | ||
661 | return ret; | ||
662 | failed_out: | ||
663 | for (i = 0; i < index; i++) | ||
664 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); | ||
665 | return ret; | ||
666 | } | ||
667 | |||
668 | /** | ||
669 | * ext4_alloc_branch - allocate and set up a chain of blocks. | ||
670 | * @handle: handle for this transaction | ||
671 | * @inode: owner | ||
672 | * @indirect_blks: number of allocated indirect blocks | ||
673 | * @blks: number of allocated direct blocks | ||
674 | * @goal: preferred place for allocation | ||
675 | * @offsets: offsets (in the blocks) to store the pointers to next. | ||
676 | * @branch: place to store the chain in. | ||
677 | * | ||
678 | * This function allocates blocks, zeroes out all but the last one, | ||
679 | * links them into chain and (if we are synchronous) writes them to disk. | ||
680 | * In other words, it prepares a branch that can be spliced onto the | ||
681 | * inode. It stores the information about that chain in the branch[], in | ||
682 | * the same format as ext4_get_branch() would do. We are calling it after | ||
683 | * we had read the existing part of chain and partial points to the last | ||
684 | * triple of that (one with zero ->key). Upon the exit we have the same | ||
685 | * picture as after the successful ext4_get_block(), except that in one | ||
686 | * place chain is disconnected - *branch->p is still zero (we did not | ||
687 | * set the last link), but branch->key contains the number that should | ||
688 | * be placed into *branch->p to fill that gap. | ||
689 | * | ||
690 | * If allocation fails we free all blocks we've allocated (and forget | ||
691 | * their buffer_heads) and return the error value the from failed | ||
692 | * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain | ||
693 | * as described above and return 0. | ||
694 | */ | ||
695 | static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | ||
696 | ext4_lblk_t iblock, int indirect_blks, | ||
697 | int *blks, ext4_fsblk_t goal, | ||
698 | ext4_lblk_t *offsets, Indirect *branch) | ||
699 | { | ||
700 | int blocksize = inode->i_sb->s_blocksize; | ||
701 | int i, n = 0; | ||
702 | int err = 0; | ||
703 | struct buffer_head *bh; | ||
704 | int num; | ||
705 | ext4_fsblk_t new_blocks[4]; | ||
706 | ext4_fsblk_t current_block; | ||
707 | |||
708 | num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks, | ||
709 | *blks, new_blocks, &err); | ||
710 | if (err) | ||
711 | return err; | ||
712 | |||
713 | branch[0].key = cpu_to_le32(new_blocks[0]); | ||
714 | /* | ||
715 | * metadata blocks and data blocks are allocated. | ||
716 | */ | ||
717 | for (n = 1; n <= indirect_blks; n++) { | ||
718 | /* | ||
719 | * Get buffer_head for parent block, zero it out | ||
720 | * and set the pointer to new one, then send | ||
721 | * parent to disk. | ||
722 | */ | ||
723 | bh = sb_getblk(inode->i_sb, new_blocks[n-1]); | ||
724 | if (unlikely(!bh)) { | ||
725 | err = -EIO; | ||
726 | goto failed; | ||
727 | } | ||
728 | |||
729 | branch[n].bh = bh; | ||
730 | lock_buffer(bh); | ||
731 | BUFFER_TRACE(bh, "call get_create_access"); | ||
732 | err = ext4_journal_get_create_access(handle, bh); | ||
733 | if (err) { | ||
734 | /* Don't brelse(bh) here; it's done in | ||
735 | * ext4_journal_forget() below */ | ||
736 | unlock_buffer(bh); | ||
737 | goto failed; | ||
738 | } | ||
739 | |||
740 | memset(bh->b_data, 0, blocksize); | ||
741 | branch[n].p = (__le32 *) bh->b_data + offsets[n]; | ||
742 | branch[n].key = cpu_to_le32(new_blocks[n]); | ||
743 | *branch[n].p = branch[n].key; | ||
744 | if (n == indirect_blks) { | ||
745 | current_block = new_blocks[n]; | ||
746 | /* | ||
747 | * End of chain, update the last new metablock of | ||
748 | * the chain to point to the new allocated | ||
749 | * data blocks numbers | ||
750 | */ | ||
751 | for (i = 1; i < num; i++) | ||
752 | *(branch[n].p + i) = cpu_to_le32(++current_block); | ||
753 | } | ||
754 | BUFFER_TRACE(bh, "marking uptodate"); | ||
755 | set_buffer_uptodate(bh); | ||
756 | unlock_buffer(bh); | ||
757 | |||
758 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | ||
759 | err = ext4_handle_dirty_metadata(handle, inode, bh); | ||
760 | if (err) | ||
761 | goto failed; | ||
762 | } | ||
763 | *blks = num; | ||
764 | return err; | ||
765 | failed: | ||
766 | /* Allocation failed, free what we already allocated */ | ||
767 | ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0); | ||
768 | for (i = 1; i <= n ; i++) { | ||
769 | /* | ||
770 | * branch[i].bh is newly allocated, so there is no | ||
771 | * need to revoke the block, which is why we don't | ||
772 | * need to set EXT4_FREE_BLOCKS_METADATA. | ||
773 | */ | ||
774 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, | ||
775 | EXT4_FREE_BLOCKS_FORGET); | ||
776 | } | ||
777 | for (i = n+1; i < indirect_blks; i++) | ||
778 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); | ||
779 | |||
780 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0); | ||
781 | |||
782 | return err; | ||
783 | } | ||
784 | |||
785 | /** | ||
786 | * ext4_splice_branch - splice the allocated branch onto inode. | ||
787 | * @handle: handle for this transaction | ||
788 | * @inode: owner | ||
789 | * @block: (logical) number of block we are adding | ||
790 | * @chain: chain of indirect blocks (with a missing link - see | ||
791 | * ext4_alloc_branch) | ||
792 | * @where: location of missing link | ||
793 | * @num: number of indirect blocks we are adding | ||
794 | * @blks: number of direct blocks we are adding | ||
795 | * | ||
796 | * This function fills the missing link and does all housekeeping needed in | ||
797 | * inode (->i_blocks, etc.). In case of success we end up with the full | ||
798 | * chain to new block and return 0. | ||
799 | */ | ||
800 | static int ext4_splice_branch(handle_t *handle, struct inode *inode, | ||
801 | ext4_lblk_t block, Indirect *where, int num, | ||
802 | int blks) | ||
803 | { | ||
804 | int i; | ||
805 | int err = 0; | ||
806 | ext4_fsblk_t current_block; | ||
807 | |||
808 | /* | ||
809 | * If we're splicing into a [td]indirect block (as opposed to the | ||
810 | * inode) then we need to get write access to the [td]indirect block | ||
811 | * before the splice. | ||
812 | */ | ||
813 | if (where->bh) { | ||
814 | BUFFER_TRACE(where->bh, "get_write_access"); | ||
815 | err = ext4_journal_get_write_access(handle, where->bh); | ||
816 | if (err) | ||
817 | goto err_out; | ||
818 | } | ||
819 | /* That's it */ | ||
820 | |||
821 | *where->p = where->key; | ||
822 | |||
823 | /* | ||
824 | * Update the host buffer_head or inode to point to more just allocated | ||
825 | * direct blocks blocks | ||
826 | */ | ||
827 | if (num == 0 && blks > 1) { | ||
828 | current_block = le32_to_cpu(where->key) + 1; | ||
829 | for (i = 1; i < blks; i++) | ||
830 | *(where->p + i) = cpu_to_le32(current_block++); | ||
831 | } | ||
832 | |||
833 | /* We are done with atomic stuff, now do the rest of housekeeping */ | ||
834 | /* had we spliced it onto indirect block? */ | ||
835 | if (where->bh) { | ||
836 | /* | ||
837 | * If we spliced it onto an indirect block, we haven't | ||
838 | * altered the inode. Note however that if it is being spliced | ||
839 | * onto an indirect block at the very end of the file (the | ||
840 | * file is growing) then we *will* alter the inode to reflect | ||
841 | * the new i_size. But that is not done here - it is done in | ||
842 | * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode. | ||
843 | */ | ||
844 | jbd_debug(5, "splicing indirect only\n"); | ||
845 | BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata"); | ||
846 | err = ext4_handle_dirty_metadata(handle, inode, where->bh); | ||
847 | if (err) | ||
848 | goto err_out; | ||
849 | } else { | ||
850 | /* | ||
851 | * OK, we spliced it into the inode itself on a direct block. | ||
852 | */ | ||
853 | ext4_mark_inode_dirty(handle, inode); | ||
854 | jbd_debug(5, "splicing direct\n"); | ||
855 | } | ||
856 | return err; | ||
857 | |||
858 | err_out: | ||
859 | for (i = 1; i <= num; i++) { | ||
860 | /* | ||
861 | * branch[i].bh is newly allocated, so there is no | ||
862 | * need to revoke the block, which is why we don't | ||
863 | * need to set EXT4_FREE_BLOCKS_METADATA. | ||
864 | */ | ||
865 | ext4_free_blocks(handle, inode, where[i].bh, 0, 1, | ||
866 | EXT4_FREE_BLOCKS_FORGET); | ||
867 | } | ||
868 | ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key), | ||
869 | blks, 0); | ||
870 | |||
871 | return err; | ||
872 | } | ||
873 | |||
874 | /* | ||
875 | * The ext4_ind_map_blocks() function handles non-extents inodes | ||
876 | * (i.e., using the traditional indirect/double-indirect i_blocks | ||
877 | * scheme) for ext4_map_blocks(). | ||
878 | * | ||
879 | * Allocation strategy is simple: if we have to allocate something, we will | ||
880 | * have to go the whole way to leaf. So let's do it before attaching anything | ||
881 | * to tree, set linkage between the newborn blocks, write them if sync is | ||
882 | * required, recheck the path, free and repeat if check fails, otherwise | ||
883 | * set the last missing link (that will protect us from any truncate-generated | ||
884 | * removals - all blocks on the path are immune now) and possibly force the | ||
885 | * write on the parent block. | ||
886 | * That has a nice additional property: no special recovery from the failed | ||
887 | * allocations is needed - we simply release blocks and do not touch anything | ||
888 | * reachable from inode. | ||
889 | * | ||
890 | * `handle' can be NULL if create == 0. | ||
891 | * | ||
892 | * return > 0, # of blocks mapped or allocated. | ||
893 | * return = 0, if plain lookup failed. | ||
894 | * return < 0, error case. | ||
895 | * | ||
896 | * The ext4_ind_get_blocks() function should be called with | ||
897 | * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem | ||
898 | * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or | ||
899 | * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system | ||
900 | * blocks. | ||
901 | */ | ||
902 | static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | ||
903 | struct ext4_map_blocks *map, | ||
904 | int flags) | ||
905 | { | ||
906 | int err = -EIO; | ||
907 | ext4_lblk_t offsets[4]; | ||
908 | Indirect chain[4]; | ||
909 | Indirect *partial; | ||
910 | ext4_fsblk_t goal; | ||
911 | int indirect_blks; | ||
912 | int blocks_to_boundary = 0; | ||
913 | int depth; | ||
914 | int count = 0; | ||
915 | ext4_fsblk_t first_block = 0; | ||
916 | |||
917 | trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); | ||
918 | J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); | ||
919 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); | ||
920 | depth = ext4_block_to_path(inode, map->m_lblk, offsets, | ||
921 | &blocks_to_boundary); | ||
922 | |||
923 | if (depth == 0) | ||
924 | goto out; | ||
925 | |||
926 | partial = ext4_get_branch(inode, depth, offsets, chain, &err); | ||
927 | |||
928 | /* Simplest case - block found, no allocation needed */ | ||
929 | if (!partial) { | ||
930 | first_block = le32_to_cpu(chain[depth - 1].key); | ||
931 | count++; | ||
932 | /*map more blocks*/ | ||
933 | while (count < map->m_len && count <= blocks_to_boundary) { | ||
934 | ext4_fsblk_t blk; | ||
935 | |||
936 | blk = le32_to_cpu(*(chain[depth-1].p + count)); | ||
937 | |||
938 | if (blk == first_block + count) | ||
939 | count++; | ||
940 | else | ||
941 | break; | ||
942 | } | ||
943 | goto got_it; | ||
944 | } | ||
945 | |||
946 | /* Next simple case - plain lookup or failed read of indirect block */ | ||
947 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO) | ||
948 | goto cleanup; | ||
949 | |||
950 | /* | ||
951 | * Okay, we need to do block allocation. | ||
952 | */ | ||
953 | goal = ext4_find_goal(inode, map->m_lblk, partial); | ||
954 | |||
955 | /* the number of blocks need to allocate for [d,t]indirect blocks */ | ||
956 | indirect_blks = (chain + depth) - partial - 1; | ||
957 | |||
958 | /* | ||
959 | * Next look up the indirect map to count the totoal number of | ||
960 | * direct blocks to allocate for this branch. | ||
961 | */ | ||
962 | count = ext4_blks_to_allocate(partial, indirect_blks, | ||
963 | map->m_len, blocks_to_boundary); | ||
964 | /* | ||
965 | * Block out ext4_truncate while we alter the tree | ||
966 | */ | ||
967 | err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks, | ||
968 | &count, goal, | ||
969 | offsets + (partial - chain), partial); | ||
970 | |||
971 | /* | ||
972 | * The ext4_splice_branch call will free and forget any buffers | ||
973 | * on the new chain if there is a failure, but that risks using | ||
974 | * up transaction credits, especially for bitmaps where the | ||
975 | * credits cannot be returned. Can we handle this somehow? We | ||
976 | * may need to return -EAGAIN upwards in the worst case. --sct | ||
977 | */ | ||
978 | if (!err) | ||
979 | err = ext4_splice_branch(handle, inode, map->m_lblk, | ||
980 | partial, indirect_blks, count); | ||
981 | if (err) | ||
982 | goto cleanup; | ||
983 | |||
984 | map->m_flags |= EXT4_MAP_NEW; | ||
985 | |||
986 | ext4_update_inode_fsync_trans(handle, inode, 1); | ||
987 | got_it: | ||
988 | map->m_flags |= EXT4_MAP_MAPPED; | ||
989 | map->m_pblk = le32_to_cpu(chain[depth-1].key); | ||
990 | map->m_len = count; | ||
991 | if (count > blocks_to_boundary) | ||
992 | map->m_flags |= EXT4_MAP_BOUNDARY; | ||
993 | err = count; | ||
994 | /* Clean up and exit */ | ||
995 | partial = chain + depth - 1; /* the whole chain */ | ||
996 | cleanup: | ||
997 | while (partial > chain) { | ||
998 | BUFFER_TRACE(partial->bh, "call brelse"); | ||
999 | brelse(partial->bh); | ||
1000 | partial--; | ||
1001 | } | ||
1002 | out: | ||
1003 | trace_ext4_ind_map_blocks_exit(inode, map->m_lblk, | ||
1004 | map->m_pblk, map->m_len, err); | ||
1005 | return err; | ||
1006 | } | ||
1007 | |||
1008 | #ifdef CONFIG_QUOTA | 211 | #ifdef CONFIG_QUOTA |
1009 | qsize_t *ext4_get_reserved_space(struct inode *inode) | 212 | qsize_t *ext4_get_reserved_space(struct inode *inode) |
1010 | { | 213 | { |
@@ -1014,32 +217,6 @@ qsize_t *ext4_get_reserved_space(struct inode *inode) | |||
1014 | 217 | ||
1015 | /* | 218 | /* |
1016 | * Calculate the number of metadata blocks need to reserve | 219 | * Calculate the number of metadata blocks need to reserve |
1017 | * to allocate a new block at @lblocks for non extent file based file | ||
1018 | */ | ||
1019 | static int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock) | ||
1020 | { | ||
1021 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
1022 | sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1); | ||
1023 | int blk_bits; | ||
1024 | |||
1025 | if (lblock < EXT4_NDIR_BLOCKS) | ||
1026 | return 0; | ||
1027 | |||
1028 | lblock -= EXT4_NDIR_BLOCKS; | ||
1029 | |||
1030 | if (ei->i_da_metadata_calc_len && | ||
1031 | (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) { | ||
1032 | ei->i_da_metadata_calc_len++; | ||
1033 | return 0; | ||
1034 | } | ||
1035 | ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; | ||
1036 | ei->i_da_metadata_calc_len = 1; | ||
1037 | blk_bits = order_base_2(lblock); | ||
1038 | return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; | ||
1039 | } | ||
1040 | |||
1041 | /* | ||
1042 | * Calculate the number of metadata blocks need to reserve | ||
1043 | * to allocate a block located at @lblock | 220 | * to allocate a block located at @lblock |
1044 | */ | 221 | */ |
1045 | static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) | 222 | static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) |
@@ -3380,114 +2557,6 @@ static int ext4_releasepage(struct page *page, gfp_t wait) | |||
3380 | } | 2557 | } |
3381 | 2558 | ||
3382 | /* | 2559 | /* |
3383 | * O_DIRECT for ext3 (or indirect map) based files | ||
3384 | * | ||
3385 | * If the O_DIRECT write will extend the file then add this inode to the | ||
3386 | * orphan list. So recovery will truncate it back to the original size | ||
3387 | * if the machine crashes during the write. | ||
3388 | * | ||
3389 | * If the O_DIRECT write is intantiating holes inside i_size and the machine | ||
3390 | * crashes then stale disk data _may_ be exposed inside the file. But current | ||
3391 | * VFS code falls back into buffered path in that case so we are safe. | ||
3392 | */ | ||
3393 | static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | ||
3394 | const struct iovec *iov, loff_t offset, | ||
3395 | unsigned long nr_segs) | ||
3396 | { | ||
3397 | struct file *file = iocb->ki_filp; | ||
3398 | struct inode *inode = file->f_mapping->host; | ||
3399 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
3400 | handle_t *handle; | ||
3401 | ssize_t ret; | ||
3402 | int orphan = 0; | ||
3403 | size_t count = iov_length(iov, nr_segs); | ||
3404 | int retries = 0; | ||
3405 | |||
3406 | if (rw == WRITE) { | ||
3407 | loff_t final_size = offset + count; | ||
3408 | |||
3409 | if (final_size > inode->i_size) { | ||
3410 | /* Credits for sb + inode write */ | ||
3411 | handle = ext4_journal_start(inode, 2); | ||
3412 | if (IS_ERR(handle)) { | ||
3413 | ret = PTR_ERR(handle); | ||
3414 | goto out; | ||
3415 | } | ||
3416 | ret = ext4_orphan_add(handle, inode); | ||
3417 | if (ret) { | ||
3418 | ext4_journal_stop(handle); | ||
3419 | goto out; | ||
3420 | } | ||
3421 | orphan = 1; | ||
3422 | ei->i_disksize = inode->i_size; | ||
3423 | ext4_journal_stop(handle); | ||
3424 | } | ||
3425 | } | ||
3426 | |||
3427 | retry: | ||
3428 | if (rw == READ && ext4_should_dioread_nolock(inode)) | ||
3429 | ret = __blockdev_direct_IO(rw, iocb, inode, | ||
3430 | inode->i_sb->s_bdev, iov, | ||
3431 | offset, nr_segs, | ||
3432 | ext4_get_block, NULL, NULL, 0); | ||
3433 | else { | ||
3434 | ret = blockdev_direct_IO(rw, iocb, inode, | ||
3435 | inode->i_sb->s_bdev, iov, | ||
3436 | offset, nr_segs, | ||
3437 | ext4_get_block, NULL); | ||
3438 | |||
3439 | if (unlikely((rw & WRITE) && ret < 0)) { | ||
3440 | loff_t isize = i_size_read(inode); | ||
3441 | loff_t end = offset + iov_length(iov, nr_segs); | ||
3442 | |||
3443 | if (end > isize) | ||
3444 | ext4_truncate_failed_write(inode); | ||
3445 | } | ||
3446 | } | ||
3447 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | ||
3448 | goto retry; | ||
3449 | |||
3450 | if (orphan) { | ||
3451 | int err; | ||
3452 | |||
3453 | /* Credits for sb + inode write */ | ||
3454 | handle = ext4_journal_start(inode, 2); | ||
3455 | if (IS_ERR(handle)) { | ||
3456 | /* This is really bad luck. We've written the data | ||
3457 | * but cannot extend i_size. Bail out and pretend | ||
3458 | * the write failed... */ | ||
3459 | ret = PTR_ERR(handle); | ||
3460 | if (inode->i_nlink) | ||
3461 | ext4_orphan_del(NULL, inode); | ||
3462 | |||
3463 | goto out; | ||
3464 | } | ||
3465 | if (inode->i_nlink) | ||
3466 | ext4_orphan_del(handle, inode); | ||
3467 | if (ret > 0) { | ||
3468 | loff_t end = offset + ret; | ||
3469 | if (end > inode->i_size) { | ||
3470 | ei->i_disksize = end; | ||
3471 | i_size_write(inode, end); | ||
3472 | /* | ||
3473 | * We're going to return a positive `ret' | ||
3474 | * here due to non-zero-length I/O, so there's | ||
3475 | * no way of reporting error returns from | ||
3476 | * ext4_mark_inode_dirty() to userspace. So | ||
3477 | * ignore it. | ||
3478 | */ | ||
3479 | ext4_mark_inode_dirty(handle, inode); | ||
3480 | } | ||
3481 | } | ||
3482 | err = ext4_journal_stop(handle); | ||
3483 | if (ret == 0) | ||
3484 | ret = err; | ||
3485 | } | ||
3486 | out: | ||
3487 | return ret; | ||
3488 | } | ||
3489 | |||
3490 | /* | ||
3491 | * ext4_get_block used when preparing for a DIO write or buffer write. | 2560 | * ext4_get_block used when preparing for a DIO write or buffer write. |
3492 | * We allocate an uinitialized extent if blocks haven't been allocated. | 2561 | * We allocate an uinitialized extent if blocks haven't been allocated. |
3493 | * The extent will be converted to initialized after the IO is complete. | 2562 | * The extent will be converted to initialized after the IO is complete. |
@@ -3958,383 +3027,6 @@ unlock: | |||
3958 | return err; | 3027 | return err; |
3959 | } | 3028 | } |
3960 | 3029 | ||
3961 | /* | ||
3962 | * Probably it should be a library function... search for first non-zero word | ||
3963 | * or memcmp with zero_page, whatever is better for particular architecture. | ||
3964 | * Linus? | ||
3965 | */ | ||
3966 | static inline int all_zeroes(__le32 *p, __le32 *q) | ||
3967 | { | ||
3968 | while (p < q) | ||
3969 | if (*p++) | ||
3970 | return 0; | ||
3971 | return 1; | ||
3972 | } | ||
3973 | |||
3974 | /** | ||
3975 | * ext4_find_shared - find the indirect blocks for partial truncation. | ||
3976 | * @inode: inode in question | ||
3977 | * @depth: depth of the affected branch | ||
3978 | * @offsets: offsets of pointers in that branch (see ext4_block_to_path) | ||
3979 | * @chain: place to store the pointers to partial indirect blocks | ||
3980 | * @top: place to the (detached) top of branch | ||
3981 | * | ||
3982 | * This is a helper function used by ext4_truncate(). | ||
3983 | * | ||
3984 | * When we do truncate() we may have to clean the ends of several | ||
3985 | * indirect blocks but leave the blocks themselves alive. Block is | ||
3986 | * partially truncated if some data below the new i_size is referred | ||
3987 | * from it (and it is on the path to the first completely truncated | ||
3988 | * data block, indeed). We have to free the top of that path along | ||
3989 | * with everything to the right of the path. Since no allocation | ||
3990 | * past the truncation point is possible until ext4_truncate() | ||
3991 | * finishes, we may safely do the latter, but top of branch may | ||
3992 | * require special attention - pageout below the truncation point | ||
3993 | * might try to populate it. | ||
3994 | * | ||
3995 | * We atomically detach the top of branch from the tree, store the | ||
3996 | * block number of its root in *@top, pointers to buffer_heads of | ||
3997 | * partially truncated blocks - in @chain[].bh and pointers to | ||
3998 | * their last elements that should not be removed - in | ||
3999 | * @chain[].p. Return value is the pointer to last filled element | ||
4000 | * of @chain. | ||
4001 | * | ||
4002 | * The work left to caller to do the actual freeing of subtrees: | ||
4003 | * a) free the subtree starting from *@top | ||
4004 | * b) free the subtrees whose roots are stored in | ||
4005 | * (@chain[i].p+1 .. end of @chain[i].bh->b_data) | ||
4006 | * c) free the subtrees growing from the inode past the @chain[0]. | ||
4007 | * (no partially truncated stuff there). */ | ||
4008 | |||
4009 | static Indirect *ext4_find_shared(struct inode *inode, int depth, | ||
4010 | ext4_lblk_t offsets[4], Indirect chain[4], | ||
4011 | __le32 *top) | ||
4012 | { | ||
4013 | Indirect *partial, *p; | ||
4014 | int k, err; | ||
4015 | |||
4016 | *top = 0; | ||
4017 | /* Make k index the deepest non-null offset + 1 */ | ||
4018 | for (k = depth; k > 1 && !offsets[k-1]; k--) | ||
4019 | ; | ||
4020 | partial = ext4_get_branch(inode, k, offsets, chain, &err); | ||
4021 | /* Writer: pointers */ | ||
4022 | if (!partial) | ||
4023 | partial = chain + k-1; | ||
4024 | /* | ||
4025 | * If the branch acquired continuation since we've looked at it - | ||
4026 | * fine, it should all survive and (new) top doesn't belong to us. | ||
4027 | */ | ||
4028 | if (!partial->key && *partial->p) | ||
4029 | /* Writer: end */ | ||
4030 | goto no_top; | ||
4031 | for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--) | ||
4032 | ; | ||
4033 | /* | ||
4034 | * OK, we've found the last block that must survive. The rest of our | ||
4035 | * branch should be detached before unlocking. However, if that rest | ||
4036 | * of branch is all ours and does not grow immediately from the inode | ||
4037 | * it's easier to cheat and just decrement partial->p. | ||
4038 | */ | ||
4039 | if (p == chain + k - 1 && p > chain) { | ||
4040 | p->p--; | ||
4041 | } else { | ||
4042 | *top = *p->p; | ||
4043 | /* Nope, don't do this in ext4. Must leave the tree intact */ | ||
4044 | #if 0 | ||
4045 | *p->p = 0; | ||
4046 | #endif | ||
4047 | } | ||
4048 | /* Writer: end */ | ||
4049 | |||
4050 | while (partial > p) { | ||
4051 | brelse(partial->bh); | ||
4052 | partial--; | ||
4053 | } | ||
4054 | no_top: | ||
4055 | return partial; | ||
4056 | } | ||
4057 | |||
4058 | /* | ||
4059 | * Zero a number of block pointers in either an inode or an indirect block. | ||
4060 | * If we restart the transaction we must again get write access to the | ||
4061 | * indirect block for further modification. | ||
4062 | * | ||
4063 | * We release `count' blocks on disk, but (last - first) may be greater | ||
4064 | * than `count' because there can be holes in there. | ||
4065 | * | ||
4066 | * Return 0 on success, 1 on invalid block range | ||
4067 | * and < 0 on fatal error. | ||
4068 | */ | ||
4069 | static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | ||
4070 | struct buffer_head *bh, | ||
4071 | ext4_fsblk_t block_to_free, | ||
4072 | unsigned long count, __le32 *first, | ||
4073 | __le32 *last) | ||
4074 | { | ||
4075 | __le32 *p; | ||
4076 | int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; | ||
4077 | int err; | ||
4078 | |||
4079 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
4080 | flags |= EXT4_FREE_BLOCKS_METADATA; | ||
4081 | |||
4082 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, | ||
4083 | count)) { | ||
4084 | EXT4_ERROR_INODE(inode, "attempt to clear invalid " | ||
4085 | "blocks %llu len %lu", | ||
4086 | (unsigned long long) block_to_free, count); | ||
4087 | return 1; | ||
4088 | } | ||
4089 | |||
4090 | if (try_to_extend_transaction(handle, inode)) { | ||
4091 | if (bh) { | ||
4092 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | ||
4093 | err = ext4_handle_dirty_metadata(handle, inode, bh); | ||
4094 | if (unlikely(err)) | ||
4095 | goto out_err; | ||
4096 | } | ||
4097 | err = ext4_mark_inode_dirty(handle, inode); | ||
4098 | if (unlikely(err)) | ||
4099 | goto out_err; | ||
4100 | err = ext4_truncate_restart_trans(handle, inode, | ||
4101 | ext4_blocks_for_truncate(inode)); | ||
4102 | if (unlikely(err)) | ||
4103 | goto out_err; | ||
4104 | if (bh) { | ||
4105 | BUFFER_TRACE(bh, "retaking write access"); | ||
4106 | err = ext4_journal_get_write_access(handle, bh); | ||
4107 | if (unlikely(err)) | ||
4108 | goto out_err; | ||
4109 | } | ||
4110 | } | ||
4111 | |||
4112 | for (p = first; p < last; p++) | ||
4113 | *p = 0; | ||
4114 | |||
4115 | ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags); | ||
4116 | return 0; | ||
4117 | out_err: | ||
4118 | ext4_std_error(inode->i_sb, err); | ||
4119 | return err; | ||
4120 | } | ||
4121 | |||
4122 | /** | ||
4123 | * ext4_free_data - free a list of data blocks | ||
4124 | * @handle: handle for this transaction | ||
4125 | * @inode: inode we are dealing with | ||
4126 | * @this_bh: indirect buffer_head which contains *@first and *@last | ||
4127 | * @first: array of block numbers | ||
4128 | * @last: points immediately past the end of array | ||
4129 | * | ||
4130 | * We are freeing all blocks referred from that array (numbers are stored as | ||
4131 | * little-endian 32-bit) and updating @inode->i_blocks appropriately. | ||
4132 | * | ||
4133 | * We accumulate contiguous runs of blocks to free. Conveniently, if these | ||
4134 | * blocks are contiguous then releasing them at one time will only affect one | ||
4135 | * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't | ||
4136 | * actually use a lot of journal space. | ||
4137 | * | ||
4138 | * @this_bh will be %NULL if @first and @last point into the inode's direct | ||
4139 | * block pointers. | ||
4140 | */ | ||
4141 | static void ext4_free_data(handle_t *handle, struct inode *inode, | ||
4142 | struct buffer_head *this_bh, | ||
4143 | __le32 *first, __le32 *last) | ||
4144 | { | ||
4145 | ext4_fsblk_t block_to_free = 0; /* Starting block # of a run */ | ||
4146 | unsigned long count = 0; /* Number of blocks in the run */ | ||
4147 | __le32 *block_to_free_p = NULL; /* Pointer into inode/ind | ||
4148 | corresponding to | ||
4149 | block_to_free */ | ||
4150 | ext4_fsblk_t nr; /* Current block # */ | ||
4151 | __le32 *p; /* Pointer into inode/ind | ||
4152 | for current block */ | ||
4153 | int err = 0; | ||
4154 | |||
4155 | if (this_bh) { /* For indirect block */ | ||
4156 | BUFFER_TRACE(this_bh, "get_write_access"); | ||
4157 | err = ext4_journal_get_write_access(handle, this_bh); | ||
4158 | /* Important: if we can't update the indirect pointers | ||
4159 | * to the blocks, we can't free them. */ | ||
4160 | if (err) | ||
4161 | return; | ||
4162 | } | ||
4163 | |||
4164 | for (p = first; p < last; p++) { | ||
4165 | nr = le32_to_cpu(*p); | ||
4166 | if (nr) { | ||
4167 | /* accumulate blocks to free if they're contiguous */ | ||
4168 | if (count == 0) { | ||
4169 | block_to_free = nr; | ||
4170 | block_to_free_p = p; | ||
4171 | count = 1; | ||
4172 | } else if (nr == block_to_free + count) { | ||
4173 | count++; | ||
4174 | } else { | ||
4175 | err = ext4_clear_blocks(handle, inode, this_bh, | ||
4176 | block_to_free, count, | ||
4177 | block_to_free_p, p); | ||
4178 | if (err) | ||
4179 | break; | ||
4180 | block_to_free = nr; | ||
4181 | block_to_free_p = p; | ||
4182 | count = 1; | ||
4183 | } | ||
4184 | } | ||
4185 | } | ||
4186 | |||
4187 | if (!err && count > 0) | ||
4188 | err = ext4_clear_blocks(handle, inode, this_bh, block_to_free, | ||
4189 | count, block_to_free_p, p); | ||
4190 | if (err < 0) | ||
4191 | /* fatal error */ | ||
4192 | return; | ||
4193 | |||
4194 | if (this_bh) { | ||
4195 | BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); | ||
4196 | |||
4197 | /* | ||
4198 | * The buffer head should have an attached journal head at this | ||
4199 | * point. However, if the data is corrupted and an indirect | ||
4200 | * block pointed to itself, it would have been detached when | ||
4201 | * the block was cleared. Check for this instead of OOPSing. | ||
4202 | */ | ||
4203 | if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) | ||
4204 | ext4_handle_dirty_metadata(handle, inode, this_bh); | ||
4205 | else | ||
4206 | EXT4_ERROR_INODE(inode, | ||
4207 | "circular indirect block detected at " | ||
4208 | "block %llu", | ||
4209 | (unsigned long long) this_bh->b_blocknr); | ||
4210 | } | ||
4211 | } | ||
4212 | |||
4213 | /** | ||
4214 | * ext4_free_branches - free an array of branches | ||
4215 | * @handle: JBD handle for this transaction | ||
4216 | * @inode: inode we are dealing with | ||
4217 | * @parent_bh: the buffer_head which contains *@first and *@last | ||
4218 | * @first: array of block numbers | ||
4219 | * @last: pointer immediately past the end of array | ||
4220 | * @depth: depth of the branches to free | ||
4221 | * | ||
4222 | * We are freeing all blocks referred from these branches (numbers are | ||
4223 | * stored as little-endian 32-bit) and updating @inode->i_blocks | ||
4224 | * appropriately. | ||
4225 | */ | ||
4226 | static void ext4_free_branches(handle_t *handle, struct inode *inode, | ||
4227 | struct buffer_head *parent_bh, | ||
4228 | __le32 *first, __le32 *last, int depth) | ||
4229 | { | ||
4230 | ext4_fsblk_t nr; | ||
4231 | __le32 *p; | ||
4232 | |||
4233 | if (ext4_handle_is_aborted(handle)) | ||
4234 | return; | ||
4235 | |||
4236 | if (depth--) { | ||
4237 | struct buffer_head *bh; | ||
4238 | int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); | ||
4239 | p = last; | ||
4240 | while (--p >= first) { | ||
4241 | nr = le32_to_cpu(*p); | ||
4242 | if (!nr) | ||
4243 | continue; /* A hole */ | ||
4244 | |||
4245 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), | ||
4246 | nr, 1)) { | ||
4247 | EXT4_ERROR_INODE(inode, | ||
4248 | "invalid indirect mapped " | ||
4249 | "block %lu (level %d)", | ||
4250 | (unsigned long) nr, depth); | ||
4251 | break; | ||
4252 | } | ||
4253 | |||
4254 | /* Go read the buffer for the next level down */ | ||
4255 | bh = sb_bread(inode->i_sb, nr); | ||
4256 | |||
4257 | /* | ||
4258 | * A read failure? Report error and clear slot | ||
4259 | * (should be rare). | ||
4260 | */ | ||
4261 | if (!bh) { | ||
4262 | EXT4_ERROR_INODE_BLOCK(inode, nr, | ||
4263 | "Read failure"); | ||
4264 | continue; | ||
4265 | } | ||
4266 | |||
4267 | /* This zaps the entire block. Bottom up. */ | ||
4268 | BUFFER_TRACE(bh, "free child branches"); | ||
4269 | ext4_free_branches(handle, inode, bh, | ||
4270 | (__le32 *) bh->b_data, | ||
4271 | (__le32 *) bh->b_data + addr_per_block, | ||
4272 | depth); | ||
4273 | brelse(bh); | ||
4274 | |||
4275 | /* | ||
4276 | * Everything below this this pointer has been | ||
4277 | * released. Now let this top-of-subtree go. | ||
4278 | * | ||
4279 | * We want the freeing of this indirect block to be | ||
4280 | * atomic in the journal with the updating of the | ||
4281 | * bitmap block which owns it. So make some room in | ||
4282 | * the journal. | ||
4283 | * | ||
4284 | * We zero the parent pointer *after* freeing its | ||
4285 | * pointee in the bitmaps, so if extend_transaction() | ||
4286 | * for some reason fails to put the bitmap changes and | ||
4287 | * the release into the same transaction, recovery | ||
4288 | * will merely complain about releasing a free block, | ||
4289 | * rather than leaking blocks. | ||
4290 | */ | ||
4291 | if (ext4_handle_is_aborted(handle)) | ||
4292 | return; | ||
4293 | if (try_to_extend_transaction(handle, inode)) { | ||
4294 | ext4_mark_inode_dirty(handle, inode); | ||
4295 | ext4_truncate_restart_trans(handle, inode, | ||
4296 | ext4_blocks_for_truncate(inode)); | ||
4297 | } | ||
4298 | |||
4299 | /* | ||
4300 | * The forget flag here is critical because if | ||
4301 | * we are journaling (and not doing data | ||
4302 | * journaling), we have to make sure a revoke | ||
4303 | * record is written to prevent the journal | ||
4304 | * replay from overwriting the (former) | ||
4305 | * indirect block if it gets reallocated as a | ||
4306 | * data block. This must happen in the same | ||
4307 | * transaction where the data blocks are | ||
4308 | * actually freed. | ||
4309 | */ | ||
4310 | ext4_free_blocks(handle, inode, NULL, nr, 1, | ||
4311 | EXT4_FREE_BLOCKS_METADATA| | ||
4312 | EXT4_FREE_BLOCKS_FORGET); | ||
4313 | |||
4314 | if (parent_bh) { | ||
4315 | /* | ||
4316 | * The block which we have just freed is | ||
4317 | * pointed to by an indirect block: journal it | ||
4318 | */ | ||
4319 | BUFFER_TRACE(parent_bh, "get_write_access"); | ||
4320 | if (!ext4_journal_get_write_access(handle, | ||
4321 | parent_bh)){ | ||
4322 | *p = 0; | ||
4323 | BUFFER_TRACE(parent_bh, | ||
4324 | "call ext4_handle_dirty_metadata"); | ||
4325 | ext4_handle_dirty_metadata(handle, | ||
4326 | inode, | ||
4327 | parent_bh); | ||
4328 | } | ||
4329 | } | ||
4330 | } | ||
4331 | } else { | ||
4332 | /* We have reached the bottom of the tree. */ | ||
4333 | BUFFER_TRACE(parent_bh, "free data blocks"); | ||
4334 | ext4_free_data(handle, inode, parent_bh, first, last); | ||
4335 | } | ||
4336 | } | ||
4337 | |||
4338 | int ext4_can_truncate(struct inode *inode) | 3030 | int ext4_can_truncate(struct inode *inode) |
4339 | { | 3031 | { |
4340 | if (S_ISREG(inode->i_mode)) | 3032 | if (S_ISREG(inode->i_mode)) |
@@ -4419,161 +3111,6 @@ void ext4_truncate(struct inode *inode) | |||
4419 | trace_ext4_truncate_exit(inode); | 3111 | trace_ext4_truncate_exit(inode); |
4420 | } | 3112 | } |
4421 | 3113 | ||
4422 | void ext4_ind_truncate(struct inode *inode) | ||
4423 | { | ||
4424 | handle_t *handle; | ||
4425 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
4426 | __le32 *i_data = ei->i_data; | ||
4427 | int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); | ||
4428 | struct address_space *mapping = inode->i_mapping; | ||
4429 | ext4_lblk_t offsets[4]; | ||
4430 | Indirect chain[4]; | ||
4431 | Indirect *partial; | ||
4432 | __le32 nr = 0; | ||
4433 | int n = 0; | ||
4434 | ext4_lblk_t last_block, max_block; | ||
4435 | unsigned blocksize = inode->i_sb->s_blocksize; | ||
4436 | |||
4437 | handle = start_transaction(inode); | ||
4438 | if (IS_ERR(handle)) | ||
4439 | return; /* AKPM: return what? */ | ||
4440 | |||
4441 | last_block = (inode->i_size + blocksize-1) | ||
4442 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); | ||
4443 | max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) | ||
4444 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); | ||
4445 | |||
4446 | if (inode->i_size & (blocksize - 1)) | ||
4447 | if (ext4_block_truncate_page(handle, mapping, inode->i_size)) | ||
4448 | goto out_stop; | ||
4449 | |||
4450 | if (last_block != max_block) { | ||
4451 | n = ext4_block_to_path(inode, last_block, offsets, NULL); | ||
4452 | if (n == 0) | ||
4453 | goto out_stop; /* error */ | ||
4454 | } | ||
4455 | |||
4456 | /* | ||
4457 | * OK. This truncate is going to happen. We add the inode to the | ||
4458 | * orphan list, so that if this truncate spans multiple transactions, | ||
4459 | * and we crash, we will resume the truncate when the filesystem | ||
4460 | * recovers. It also marks the inode dirty, to catch the new size. | ||
4461 | * | ||
4462 | * Implication: the file must always be in a sane, consistent | ||
4463 | * truncatable state while each transaction commits. | ||
4464 | */ | ||
4465 | if (ext4_orphan_add(handle, inode)) | ||
4466 | goto out_stop; | ||
4467 | |||
4468 | /* | ||
4469 | * From here we block out all ext4_get_block() callers who want to | ||
4470 | * modify the block allocation tree. | ||
4471 | */ | ||
4472 | down_write(&ei->i_data_sem); | ||
4473 | |||
4474 | ext4_discard_preallocations(inode); | ||
4475 | |||
4476 | /* | ||
4477 | * The orphan list entry will now protect us from any crash which | ||
4478 | * occurs before the truncate completes, so it is now safe to propagate | ||
4479 | * the new, shorter inode size (held for now in i_size) into the | ||
4480 | * on-disk inode. We do this via i_disksize, which is the value which | ||
4481 | * ext4 *really* writes onto the disk inode. | ||
4482 | */ | ||
4483 | ei->i_disksize = inode->i_size; | ||
4484 | |||
4485 | if (last_block == max_block) { | ||
4486 | /* | ||
4487 | * It is unnecessary to free any data blocks if last_block is | ||
4488 | * equal to the indirect block limit. | ||
4489 | */ | ||
4490 | goto out_unlock; | ||
4491 | } else if (n == 1) { /* direct blocks */ | ||
4492 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], | ||
4493 | i_data + EXT4_NDIR_BLOCKS); | ||
4494 | goto do_indirects; | ||
4495 | } | ||
4496 | |||
4497 | partial = ext4_find_shared(inode, n, offsets, chain, &nr); | ||
4498 | /* Kill the top of shared branch (not detached) */ | ||
4499 | if (nr) { | ||
4500 | if (partial == chain) { | ||
4501 | /* Shared branch grows from the inode */ | ||
4502 | ext4_free_branches(handle, inode, NULL, | ||
4503 | &nr, &nr+1, (chain+n-1) - partial); | ||
4504 | *partial->p = 0; | ||
4505 | /* | ||
4506 | * We mark the inode dirty prior to restart, | ||
4507 | * and prior to stop. No need for it here. | ||
4508 | */ | ||
4509 | } else { | ||
4510 | /* Shared branch grows from an indirect block */ | ||
4511 | BUFFER_TRACE(partial->bh, "get_write_access"); | ||
4512 | ext4_free_branches(handle, inode, partial->bh, | ||
4513 | partial->p, | ||
4514 | partial->p+1, (chain+n-1) - partial); | ||
4515 | } | ||
4516 | } | ||
4517 | /* Clear the ends of indirect blocks on the shared branch */ | ||
4518 | while (partial > chain) { | ||
4519 | ext4_free_branches(handle, inode, partial->bh, partial->p + 1, | ||
4520 | (__le32*)partial->bh->b_data+addr_per_block, | ||
4521 | (chain+n-1) - partial); | ||
4522 | BUFFER_TRACE(partial->bh, "call brelse"); | ||
4523 | brelse(partial->bh); | ||
4524 | partial--; | ||
4525 | } | ||
4526 | do_indirects: | ||
4527 | /* Kill the remaining (whole) subtrees */ | ||
4528 | switch (offsets[0]) { | ||
4529 | default: | ||
4530 | nr = i_data[EXT4_IND_BLOCK]; | ||
4531 | if (nr) { | ||
4532 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); | ||
4533 | i_data[EXT4_IND_BLOCK] = 0; | ||
4534 | } | ||
4535 | case EXT4_IND_BLOCK: | ||
4536 | nr = i_data[EXT4_DIND_BLOCK]; | ||
4537 | if (nr) { | ||
4538 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); | ||
4539 | i_data[EXT4_DIND_BLOCK] = 0; | ||
4540 | } | ||
4541 | case EXT4_DIND_BLOCK: | ||
4542 | nr = i_data[EXT4_TIND_BLOCK]; | ||
4543 | if (nr) { | ||
4544 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); | ||
4545 | i_data[EXT4_TIND_BLOCK] = 0; | ||
4546 | } | ||
4547 | case EXT4_TIND_BLOCK: | ||
4548 | ; | ||
4549 | } | ||
4550 | |||
4551 | out_unlock: | ||
4552 | up_write(&ei->i_data_sem); | ||
4553 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | ||
4554 | ext4_mark_inode_dirty(handle, inode); | ||
4555 | |||
4556 | /* | ||
4557 | * In a multi-transaction truncate, we only make the final transaction | ||
4558 | * synchronous | ||
4559 | */ | ||
4560 | if (IS_SYNC(inode)) | ||
4561 | ext4_handle_sync(handle); | ||
4562 | out_stop: | ||
4563 | /* | ||
4564 | * If this was a simple ftruncate(), and the file will remain alive | ||
4565 | * then we need to clear up the orphan record which we created above. | ||
4566 | * However, if this was a real unlink then we were called by | ||
4567 | * ext4_delete_inode(), and we allow that function to clean up the | ||
4568 | * orphan info for us. | ||
4569 | */ | ||
4570 | if (inode->i_nlink) | ||
4571 | ext4_orphan_del(handle, inode); | ||
4572 | |||
4573 | ext4_journal_stop(handle); | ||
4574 | trace_ext4_truncate_exit(inode); | ||
4575 | } | ||
4576 | |||
4577 | /* | 3114 | /* |
4578 | * ext4_get_inode_loc returns with an extra refcount against the inode's | 3115 | * ext4_get_inode_loc returns with an extra refcount against the inode's |
4579 | * underlying buffer_head on success. If 'in_mem' is true, we have all | 3116 | * underlying buffer_head on success. If 'in_mem' is true, we have all |
@@ -5386,29 +3923,6 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
5386 | return 0; | 3923 | return 0; |
5387 | } | 3924 | } |
5388 | 3925 | ||
5389 | static int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk) | ||
5390 | { | ||
5391 | int indirects; | ||
5392 | |||
5393 | /* if nrblocks are contiguous */ | ||
5394 | if (chunk) { | ||
5395 | /* | ||
5396 | * With N contiguous data blocks, we need at most | ||
5397 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, | ||
5398 | * 2 dindirect blocks, and 1 tindirect block | ||
5399 | */ | ||
5400 | return DIV_ROUND_UP(nrblocks, | ||
5401 | EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; | ||
5402 | } | ||
5403 | /* | ||
5404 | * if nrblocks are not contiguous, worse case, each block touch | ||
5405 | * a indirect block, and each indirect block touch a double indirect | ||
5406 | * block, plus a triple indirect block | ||
5407 | */ | ||
5408 | indirects = nrblocks * 2 + 1; | ||
5409 | return indirects; | ||
5410 | } | ||
5411 | |||
5412 | static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 3926 | static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) |
5413 | { | 3927 | { |
5414 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 3928 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |