diff options
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 258 |
1 files changed, 141 insertions, 117 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 42272d67955a..4b8debeb3965 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -167,11 +167,16 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, | |||
167 | /* | 167 | /* |
168 | * Called at the last iput() if i_nlink is zero. | 168 | * Called at the last iput() if i_nlink is zero. |
169 | */ | 169 | */ |
170 | void ext4_delete_inode(struct inode *inode) | 170 | void ext4_evict_inode(struct inode *inode) |
171 | { | 171 | { |
172 | handle_t *handle; | 172 | handle_t *handle; |
173 | int err; | 173 | int err; |
174 | 174 | ||
175 | if (inode->i_nlink) { | ||
176 | truncate_inode_pages(&inode->i_data, 0); | ||
177 | goto no_delete; | ||
178 | } | ||
179 | |||
175 | if (!is_bad_inode(inode)) | 180 | if (!is_bad_inode(inode)) |
176 | dquot_initialize(inode); | 181 | dquot_initialize(inode); |
177 | 182 | ||
@@ -221,6 +226,7 @@ void ext4_delete_inode(struct inode *inode) | |||
221 | "couldn't extend journal (err %d)", err); | 226 | "couldn't extend journal (err %d)", err); |
222 | stop_handle: | 227 | stop_handle: |
223 | ext4_journal_stop(handle); | 228 | ext4_journal_stop(handle); |
229 | ext4_orphan_del(NULL, inode); | ||
224 | goto no_delete; | 230 | goto no_delete; |
225 | } | 231 | } |
226 | } | 232 | } |
@@ -245,13 +251,13 @@ void ext4_delete_inode(struct inode *inode) | |||
245 | */ | 251 | */ |
246 | if (ext4_mark_inode_dirty(handle, inode)) | 252 | if (ext4_mark_inode_dirty(handle, inode)) |
247 | /* If that failed, just do the required in-core inode clear. */ | 253 | /* If that failed, just do the required in-core inode clear. */ |
248 | clear_inode(inode); | 254 | ext4_clear_inode(inode); |
249 | else | 255 | else |
250 | ext4_free_inode(handle, inode); | 256 | ext4_free_inode(handle, inode); |
251 | ext4_journal_stop(handle); | 257 | ext4_journal_stop(handle); |
252 | return; | 258 | return; |
253 | no_delete: | 259 | no_delete: |
254 | clear_inode(inode); /* We must guarantee clearing of inode... */ | 260 | ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ |
255 | } | 261 | } |
256 | 262 | ||
257 | typedef struct { | 263 | typedef struct { |
@@ -337,9 +343,11 @@ static int ext4_block_to_path(struct inode *inode, | |||
337 | return n; | 343 | return n; |
338 | } | 344 | } |
339 | 345 | ||
340 | static int __ext4_check_blockref(const char *function, struct inode *inode, | 346 | static int __ext4_check_blockref(const char *function, unsigned int line, |
347 | struct inode *inode, | ||
341 | __le32 *p, unsigned int max) | 348 | __le32 *p, unsigned int max) |
342 | { | 349 | { |
350 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; | ||
343 | __le32 *bref = p; | 351 | __le32 *bref = p; |
344 | unsigned int blk; | 352 | unsigned int blk; |
345 | 353 | ||
@@ -348,8 +356,9 @@ static int __ext4_check_blockref(const char *function, struct inode *inode, | |||
348 | if (blk && | 356 | if (blk && |
349 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), | 357 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), |
350 | blk, 1))) { | 358 | blk, 1))) { |
351 | ext4_error_inode(function, inode, | 359 | es->s_last_error_block = cpu_to_le64(blk); |
352 | "invalid block reference %u", blk); | 360 | ext4_error_inode(inode, function, line, blk, |
361 | "invalid block"); | ||
353 | return -EIO; | 362 | return -EIO; |
354 | } | 363 | } |
355 | } | 364 | } |
@@ -358,11 +367,13 @@ static int __ext4_check_blockref(const char *function, struct inode *inode, | |||
358 | 367 | ||
359 | 368 | ||
360 | #define ext4_check_indirect_blockref(inode, bh) \ | 369 | #define ext4_check_indirect_blockref(inode, bh) \ |
361 | __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \ | 370 | __ext4_check_blockref(__func__, __LINE__, inode, \ |
371 | (__le32 *)(bh)->b_data, \ | ||
362 | EXT4_ADDR_PER_BLOCK((inode)->i_sb)) | 372 | EXT4_ADDR_PER_BLOCK((inode)->i_sb)) |
363 | 373 | ||
364 | #define ext4_check_inode_blockref(inode) \ | 374 | #define ext4_check_inode_blockref(inode) \ |
365 | __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \ | 375 | __ext4_check_blockref(__func__, __LINE__, inode, \ |
376 | EXT4_I(inode)->i_data, \ | ||
366 | EXT4_NDIR_BLOCKS) | 377 | EXT4_NDIR_BLOCKS) |
367 | 378 | ||
368 | /** | 379 | /** |
@@ -1128,20 +1139,24 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
1128 | ext4_discard_preallocations(inode); | 1139 | ext4_discard_preallocations(inode); |
1129 | } | 1140 | } |
1130 | 1141 | ||
1131 | static int check_block_validity(struct inode *inode, const char *func, | 1142 | static int __check_block_validity(struct inode *inode, const char *func, |
1143 | unsigned int line, | ||
1132 | struct ext4_map_blocks *map) | 1144 | struct ext4_map_blocks *map) |
1133 | { | 1145 | { |
1134 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, | 1146 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, |
1135 | map->m_len)) { | 1147 | map->m_len)) { |
1136 | ext4_error_inode(func, inode, | 1148 | ext4_error_inode(inode, func, line, map->m_pblk, |
1137 | "lblock %lu mapped to illegal pblock %llu " | 1149 | "lblock %lu mapped to illegal pblock " |
1138 | "(length %d)", (unsigned long) map->m_lblk, | 1150 | "(length %d)", (unsigned long) map->m_lblk, |
1139 | map->m_pblk, map->m_len); | 1151 | map->m_len); |
1140 | return -EIO; | 1152 | return -EIO; |
1141 | } | 1153 | } |
1142 | return 0; | 1154 | return 0; |
1143 | } | 1155 | } |
1144 | 1156 | ||
1157 | #define check_block_validity(inode, map) \ | ||
1158 | __check_block_validity((inode), __func__, __LINE__, (map)) | ||
1159 | |||
1145 | /* | 1160 | /* |
1146 | * Return the number of contiguous dirty pages in a given inode | 1161 | * Return the number of contiguous dirty pages in a given inode |
1147 | * starting at page frame idx. | 1162 | * starting at page frame idx. |
@@ -1244,7 +1259,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
1244 | up_read((&EXT4_I(inode)->i_data_sem)); | 1259 | up_read((&EXT4_I(inode)->i_data_sem)); |
1245 | 1260 | ||
1246 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 1261 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
1247 | int ret = check_block_validity(inode, __func__, map); | 1262 | int ret = check_block_validity(inode, map); |
1248 | if (ret != 0) | 1263 | if (ret != 0) |
1249 | return ret; | 1264 | return ret; |
1250 | } | 1265 | } |
@@ -1324,9 +1339,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
1324 | 1339 | ||
1325 | up_write((&EXT4_I(inode)->i_data_sem)); | 1340 | up_write((&EXT4_I(inode)->i_data_sem)); |
1326 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 1341 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
1327 | int ret = check_block_validity(inode, | 1342 | int ret = check_block_validity(inode, map); |
1328 | "ext4_map_blocks_after_alloc", | ||
1329 | map); | ||
1330 | if (ret != 0) | 1343 | if (ret != 0) |
1331 | return ret; | 1344 | return ret; |
1332 | } | 1345 | } |
@@ -1519,9 +1532,25 @@ static int walk_page_buffers(handle_t *handle, | |||
1519 | static int do_journal_get_write_access(handle_t *handle, | 1532 | static int do_journal_get_write_access(handle_t *handle, |
1520 | struct buffer_head *bh) | 1533 | struct buffer_head *bh) |
1521 | { | 1534 | { |
1535 | int dirty = buffer_dirty(bh); | ||
1536 | int ret; | ||
1537 | |||
1522 | if (!buffer_mapped(bh) || buffer_freed(bh)) | 1538 | if (!buffer_mapped(bh) || buffer_freed(bh)) |
1523 | return 0; | 1539 | return 0; |
1524 | return ext4_journal_get_write_access(handle, bh); | 1540 | /* |
1541 | * __block_prepare_write() could have dirtied some buffers. Clean | ||
1542 | * the dirty bit as jbd2_journal_get_write_access() could complain | ||
1543 | * otherwise about fs integrity issues. Setting of the dirty bit | ||
1544 | * by __block_prepare_write() isn't a real problem here as we clear | ||
1545 | * the bit before releasing a page lock and thus writeback cannot | ||
1546 | * ever write the buffer. | ||
1547 | */ | ||
1548 | if (dirty) | ||
1549 | clear_buffer_dirty(bh); | ||
1550 | ret = ext4_journal_get_write_access(handle, bh); | ||
1551 | if (!ret && dirty) | ||
1552 | ret = ext4_handle_dirty_metadata(handle, NULL, bh); | ||
1553 | return ret; | ||
1525 | } | 1554 | } |
1526 | 1555 | ||
1527 | /* | 1556 | /* |
@@ -1578,11 +1607,9 @@ retry: | |||
1578 | *pagep = page; | 1607 | *pagep = page; |
1579 | 1608 | ||
1580 | if (ext4_should_dioread_nolock(inode)) | 1609 | if (ext4_should_dioread_nolock(inode)) |
1581 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, | 1610 | ret = __block_write_begin(page, pos, len, ext4_get_block_write); |
1582 | fsdata, ext4_get_block_write); | ||
1583 | else | 1611 | else |
1584 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, | 1612 | ret = __block_write_begin(page, pos, len, ext4_get_block); |
1585 | fsdata, ext4_get_block); | ||
1586 | 1613 | ||
1587 | if (!ret && ext4_should_journal_data(inode)) { | 1614 | if (!ret && ext4_should_journal_data(inode)) { |
1588 | ret = walk_page_buffers(handle, page_buffers(page), | 1615 | ret = walk_page_buffers(handle, page_buffers(page), |
@@ -1593,7 +1620,7 @@ retry: | |||
1593 | unlock_page(page); | 1620 | unlock_page(page); |
1594 | page_cache_release(page); | 1621 | page_cache_release(page); |
1595 | /* | 1622 | /* |
1596 | * block_write_begin may have instantiated a few blocks | 1623 | * __block_write_begin may have instantiated a few blocks |
1597 | * outside i_size. Trim these off again. Don't need | 1624 | * outside i_size. Trim these off again. Don't need |
1598 | * i_size_read because we hold i_mutex. | 1625 | * i_size_read because we hold i_mutex. |
1599 | * | 1626 | * |
@@ -2194,7 +2221,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2194 | BUG_ON(!handle); | 2221 | BUG_ON(!handle); |
2195 | 2222 | ||
2196 | /* | 2223 | /* |
2197 | * Call ext4_get_blocks() to allocate any delayed allocation | 2224 | * Call ext4_map_blocks() to allocate any delayed allocation |
2198 | * blocks, or to convert an uninitialized extent to be | 2225 | * blocks, or to convert an uninitialized extent to be |
2199 | * initialized (in the case where we have written into | 2226 | * initialized (in the case where we have written into |
2200 | * one or more preallocated blocks). | 2227 | * one or more preallocated blocks). |
@@ -2203,7 +2230,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2203 | * indicate that we are on the delayed allocation path. This | 2230 | * indicate that we are on the delayed allocation path. This |
2204 | * affects functions in many different parts of the allocation | 2231 | * affects functions in many different parts of the allocation |
2205 | * call path. This flag exists primarily because we don't | 2232 | * call path. This flag exists primarily because we don't |
2206 | * want to change *many* call functions, so ext4_get_blocks() | 2233 | * want to change *many* call functions, so ext4_map_blocks() |
2207 | * will set the magic i_delalloc_reserved_flag once the | 2234 | * will set the magic i_delalloc_reserved_flag once the |
2208 | * inode's allocation semaphore is taken. | 2235 | * inode's allocation semaphore is taken. |
2209 | * | 2236 | * |
@@ -2221,6 +2248,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2221 | 2248 | ||
2222 | blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags); | 2249 | blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags); |
2223 | if (blks < 0) { | 2250 | if (blks < 0) { |
2251 | struct super_block *sb = mpd->inode->i_sb; | ||
2252 | |||
2224 | err = blks; | 2253 | err = blks; |
2225 | /* | 2254 | /* |
2226 | * If get block returns with error we simply | 2255 | * If get block returns with error we simply |
@@ -2231,7 +2260,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2231 | return 0; | 2260 | return 0; |
2232 | 2261 | ||
2233 | if (err == -ENOSPC && | 2262 | if (err == -ENOSPC && |
2234 | ext4_count_free_blocks(mpd->inode->i_sb)) { | 2263 | ext4_count_free_blocks(sb)) { |
2235 | mpd->retval = err; | 2264 | mpd->retval = err; |
2236 | return 0; | 2265 | return 0; |
2237 | } | 2266 | } |
@@ -2243,16 +2272,17 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2243 | * writepage and writepages will again try to write | 2272 | * writepage and writepages will again try to write |
2244 | * the same. | 2273 | * the same. |
2245 | */ | 2274 | */ |
2246 | ext4_msg(mpd->inode->i_sb, KERN_CRIT, | 2275 | if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) { |
2247 | "delayed block allocation failed for inode %lu at " | 2276 | ext4_msg(sb, KERN_CRIT, |
2248 | "logical offset %llu with max blocks %zd with " | 2277 | "delayed block allocation failed for inode %lu " |
2249 | "error %d", mpd->inode->i_ino, | 2278 | "at logical offset %llu with max blocks %zd " |
2250 | (unsigned long long) next, | 2279 | "with error %d", mpd->inode->i_ino, |
2251 | mpd->b_size >> mpd->inode->i_blkbits, err); | 2280 | (unsigned long long) next, |
2252 | printk(KERN_CRIT "This should not happen!! " | 2281 | mpd->b_size >> mpd->inode->i_blkbits, err); |
2253 | "Data will be lost\n"); | 2282 | ext4_msg(sb, KERN_CRIT, |
2254 | if (err == -ENOSPC) { | 2283 | "This should not happen!! Data will be lost\n"); |
2255 | ext4_print_free_blocks(mpd->inode); | 2284 | if (err == -ENOSPC) |
2285 | ext4_print_free_blocks(mpd->inode); | ||
2256 | } | 2286 | } |
2257 | /* invalidate all the pages */ | 2287 | /* invalidate all the pages */ |
2258 | ext4_da_block_invalidatepages(mpd, next, | 2288 | ext4_da_block_invalidatepages(mpd, next, |
@@ -2320,7 +2350,7 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, | |||
2320 | * XXX Don't go larger than mballoc is willing to allocate | 2350 | * XXX Don't go larger than mballoc is willing to allocate |
2321 | * This is a stopgap solution. We eventually need to fold | 2351 | * This is a stopgap solution. We eventually need to fold |
2322 | * mpage_da_submit_io() into this function and then call | 2352 | * mpage_da_submit_io() into this function and then call |
2323 | * ext4_get_blocks() multiple times in a loop | 2353 | * ext4_map_blocks() multiple times in a loop |
2324 | */ | 2354 | */ |
2325 | if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize) | 2355 | if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize) |
2326 | goto flush_it; | 2356 | goto flush_it; |
@@ -2553,18 +2583,16 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2553 | /* | 2583 | /* |
2554 | * This function is used as a standard get_block_t calback function | 2584 | * This function is used as a standard get_block_t calback function |
2555 | * when there is no desire to allocate any blocks. It is used as a | 2585 | * when there is no desire to allocate any blocks. It is used as a |
2556 | * callback function for block_prepare_write(), nobh_writepage(), and | 2586 | * callback function for block_prepare_write() and block_write_full_page(). |
2557 | * block_write_full_page(). These functions should only try to map a | 2587 | * These functions should only try to map a single block at a time. |
2558 | * single block at a time. | ||
2559 | * | 2588 | * |
2560 | * Since this function doesn't do block allocations even if the caller | 2589 | * Since this function doesn't do block allocations even if the caller |
2561 | * requests it by passing in create=1, it is critically important that | 2590 | * requests it by passing in create=1, it is critically important that |
2562 | * any caller checks to make sure that any buffer heads are returned | 2591 | * any caller checks to make sure that any buffer heads are returned |
2563 | * by this function are either all already mapped or marked for | 2592 | * by this function are either all already mapped or marked for |
2564 | * delayed allocation before calling nobh_writepage() or | 2593 | * delayed allocation before calling block_write_full_page(). Otherwise, |
2565 | * block_write_full_page(). Otherwise, b_blocknr could be left | 2594 | * b_blocknr could be left unitialized, and the page write functions will |
2566 | * unitialized, and the page write functions will be taken by | 2595 | * be taken by surprise. |
2567 | * surprise. | ||
2568 | */ | 2596 | */ |
2569 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, | 2597 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, |
2570 | struct buffer_head *bh_result, int create) | 2598 | struct buffer_head *bh_result, int create) |
@@ -2749,9 +2777,7 @@ static int ext4_writepage(struct page *page, | |||
2749 | return __ext4_journalled_writepage(page, len); | 2777 | return __ext4_journalled_writepage(page, len); |
2750 | } | 2778 | } |
2751 | 2779 | ||
2752 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) | 2780 | if (page_bufs && buffer_uninit(page_bufs)) { |
2753 | ret = nobh_writepage(page, noalloc_get_block_write, wbc); | ||
2754 | else if (page_bufs && buffer_uninit(page_bufs)) { | ||
2755 | ext4_set_bh_endio(page_bufs, inode); | 2781 | ext4_set_bh_endio(page_bufs, inode); |
2756 | ret = block_write_full_page_endio(page, noalloc_get_block_write, | 2782 | ret = block_write_full_page_endio(page, noalloc_get_block_write, |
2757 | wbc, ext4_end_io_buffer_write); | 2783 | wbc, ext4_end_io_buffer_write); |
@@ -3146,13 +3172,10 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | |||
3146 | int ret, retries = 0; | 3172 | int ret, retries = 0; |
3147 | struct page *page; | 3173 | struct page *page; |
3148 | pgoff_t index; | 3174 | pgoff_t index; |
3149 | unsigned from, to; | ||
3150 | struct inode *inode = mapping->host; | 3175 | struct inode *inode = mapping->host; |
3151 | handle_t *handle; | 3176 | handle_t *handle; |
3152 | 3177 | ||
3153 | index = pos >> PAGE_CACHE_SHIFT; | 3178 | index = pos >> PAGE_CACHE_SHIFT; |
3154 | from = pos & (PAGE_CACHE_SIZE - 1); | ||
3155 | to = from + len; | ||
3156 | 3179 | ||
3157 | if (ext4_nonda_switch(inode->i_sb)) { | 3180 | if (ext4_nonda_switch(inode->i_sb)) { |
3158 | *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; | 3181 | *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; |
@@ -3185,8 +3208,7 @@ retry: | |||
3185 | } | 3208 | } |
3186 | *pagep = page; | 3209 | *pagep = page; |
3187 | 3210 | ||
3188 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 3211 | ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); |
3189 | ext4_da_get_block_prep); | ||
3190 | if (ret < 0) { | 3212 | if (ret < 0) { |
3191 | unlock_page(page); | 3213 | unlock_page(page); |
3192 | ext4_journal_stop(handle); | 3214 | ext4_journal_stop(handle); |
@@ -3545,15 +3567,24 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | |||
3545 | 3567 | ||
3546 | retry: | 3568 | retry: |
3547 | if (rw == READ && ext4_should_dioread_nolock(inode)) | 3569 | if (rw == READ && ext4_should_dioread_nolock(inode)) |
3548 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, | 3570 | ret = __blockdev_direct_IO(rw, iocb, inode, |
3549 | inode->i_sb->s_bdev, iov, | 3571 | inode->i_sb->s_bdev, iov, |
3550 | offset, nr_segs, | 3572 | offset, nr_segs, |
3551 | ext4_get_block, NULL); | 3573 | ext4_get_block, NULL, NULL, 0); |
3552 | else | 3574 | else { |
3553 | ret = blockdev_direct_IO(rw, iocb, inode, | 3575 | ret = blockdev_direct_IO(rw, iocb, inode, |
3554 | inode->i_sb->s_bdev, iov, | 3576 | inode->i_sb->s_bdev, iov, |
3555 | offset, nr_segs, | 3577 | offset, nr_segs, |
3556 | ext4_get_block, NULL); | 3578 | ext4_get_block, NULL); |
3579 | |||
3580 | if (unlikely((rw & WRITE) && ret < 0)) { | ||
3581 | loff_t isize = i_size_read(inode); | ||
3582 | loff_t end = offset + iov_length(iov, nr_segs); | ||
3583 | |||
3584 | if (end > isize) | ||
3585 | vmtruncate(inode, isize); | ||
3586 | } | ||
3587 | } | ||
3557 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 3588 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
3558 | goto retry; | 3589 | goto retry; |
3559 | 3590 | ||
@@ -3668,6 +3699,8 @@ static int ext4_end_io_nolock(ext4_io_end_t *io) | |||
3668 | return ret; | 3699 | return ret; |
3669 | } | 3700 | } |
3670 | 3701 | ||
3702 | if (io->iocb) | ||
3703 | aio_complete(io->iocb, io->result, 0); | ||
3671 | /* clear the DIO AIO unwritten flag */ | 3704 | /* clear the DIO AIO unwritten flag */ |
3672 | io->flag = 0; | 3705 | io->flag = 0; |
3673 | return ret; | 3706 | return ret; |
@@ -3767,6 +3800,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags) | |||
3767 | io->offset = 0; | 3800 | io->offset = 0; |
3768 | io->size = 0; | 3801 | io->size = 0; |
3769 | io->page = NULL; | 3802 | io->page = NULL; |
3803 | io->iocb = NULL; | ||
3804 | io->result = 0; | ||
3770 | INIT_WORK(&io->work, ext4_end_io_work); | 3805 | INIT_WORK(&io->work, ext4_end_io_work); |
3771 | INIT_LIST_HEAD(&io->list); | 3806 | INIT_LIST_HEAD(&io->list); |
3772 | } | 3807 | } |
@@ -3775,7 +3810,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags) | |||
3775 | } | 3810 | } |
3776 | 3811 | ||
3777 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | 3812 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, |
3778 | ssize_t size, void *private) | 3813 | ssize_t size, void *private, int ret, |
3814 | bool is_async) | ||
3779 | { | 3815 | { |
3780 | ext4_io_end_t *io_end = iocb->private; | 3816 | ext4_io_end_t *io_end = iocb->private; |
3781 | struct workqueue_struct *wq; | 3817 | struct workqueue_struct *wq; |
@@ -3784,7 +3820,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3784 | 3820 | ||
3785 | /* if not async direct IO or dio with 0 bytes write, just return */ | 3821 | /* if not async direct IO or dio with 0 bytes write, just return */ |
3786 | if (!io_end || !size) | 3822 | if (!io_end || !size) |
3787 | return; | 3823 | goto out; |
3788 | 3824 | ||
3789 | ext_debug("ext4_end_io_dio(): io_end 0x%p" | 3825 | ext_debug("ext4_end_io_dio(): io_end 0x%p" |
3790 | "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", | 3826 | "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", |
@@ -3795,12 +3831,18 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3795 | if (io_end->flag != EXT4_IO_UNWRITTEN){ | 3831 | if (io_end->flag != EXT4_IO_UNWRITTEN){ |
3796 | ext4_free_io_end(io_end); | 3832 | ext4_free_io_end(io_end); |
3797 | iocb->private = NULL; | 3833 | iocb->private = NULL; |
3834 | out: | ||
3835 | if (is_async) | ||
3836 | aio_complete(iocb, ret, 0); | ||
3798 | return; | 3837 | return; |
3799 | } | 3838 | } |
3800 | 3839 | ||
3801 | io_end->offset = offset; | 3840 | io_end->offset = offset; |
3802 | io_end->size = size; | 3841 | io_end->size = size; |
3803 | io_end->flag = EXT4_IO_UNWRITTEN; | 3842 | if (is_async) { |
3843 | io_end->iocb = iocb; | ||
3844 | io_end->result = ret; | ||
3845 | } | ||
3804 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; | 3846 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; |
3805 | 3847 | ||
3806 | /* queue the work to convert unwritten extents to written */ | 3848 | /* queue the work to convert unwritten extents to written */ |
@@ -3937,7 +3979,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3937 | return -ENOMEM; | 3979 | return -ENOMEM; |
3938 | /* | 3980 | /* |
3939 | * we save the io structure for current async | 3981 | * we save the io structure for current async |
3940 | * direct IO, so that later ext4_get_blocks() | 3982 | * direct IO, so that later ext4_map_blocks() |
3941 | * could flag the io structure whether there | 3983 | * could flag the io structure whether there |
3942 | * is a unwritten extents needs to be converted | 3984 | * is a unwritten extents needs to be converted |
3943 | * when IO is completed. | 3985 | * when IO is completed. |
@@ -4128,17 +4170,6 @@ int ext4_block_truncate_page(handle_t *handle, | |||
4128 | length = blocksize - (offset & (blocksize - 1)); | 4170 | length = blocksize - (offset & (blocksize - 1)); |
4129 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | 4171 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); |
4130 | 4172 | ||
4131 | /* | ||
4132 | * For "nobh" option, we can only work if we don't need to | ||
4133 | * read-in the page - otherwise we create buffers to do the IO. | ||
4134 | */ | ||
4135 | if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && | ||
4136 | ext4_should_writeback_data(inode) && PageUptodate(page)) { | ||
4137 | zero_user(page, offset, length); | ||
4138 | set_page_dirty(page); | ||
4139 | goto unlock; | ||
4140 | } | ||
4141 | |||
4142 | if (!page_has_buffers(page)) | 4173 | if (!page_has_buffers(page)) |
4143 | create_empty_buffers(page, blocksize, 0); | 4174 | create_empty_buffers(page, blocksize, 0); |
4144 | 4175 | ||
@@ -4488,9 +4519,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4488 | * (should be rare). | 4519 | * (should be rare). |
4489 | */ | 4520 | */ |
4490 | if (!bh) { | 4521 | if (!bh) { |
4491 | EXT4_ERROR_INODE(inode, | 4522 | EXT4_ERROR_INODE_BLOCK(inode, nr, |
4492 | "Read failure block=%llu", | 4523 | "Read failure"); |
4493 | (unsigned long long) nr); | ||
4494 | continue; | 4524 | continue; |
4495 | } | 4525 | } |
4496 | 4526 | ||
@@ -4502,27 +4532,6 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4502 | depth); | 4532 | depth); |
4503 | 4533 | ||
4504 | /* | 4534 | /* |
4505 | * We've probably journalled the indirect block several | ||
4506 | * times during the truncate. But it's no longer | ||
4507 | * needed and we now drop it from the transaction via | ||
4508 | * jbd2_journal_revoke(). | ||
4509 | * | ||
4510 | * That's easy if it's exclusively part of this | ||
4511 | * transaction. But if it's part of the committing | ||
4512 | * transaction then jbd2_journal_forget() will simply | ||
4513 | * brelse() it. That means that if the underlying | ||
4514 | * block is reallocated in ext4_get_block(), | ||
4515 | * unmap_underlying_metadata() will find this block | ||
4516 | * and will try to get rid of it. damn, damn. | ||
4517 | * | ||
4518 | * If this block has already been committed to the | ||
4519 | * journal, a revoke record will be written. And | ||
4520 | * revoke records must be emitted *before* clearing | ||
4521 | * this block's bit in the bitmaps. | ||
4522 | */ | ||
4523 | ext4_forget(handle, 1, inode, bh, bh->b_blocknr); | ||
4524 | |||
4525 | /* | ||
4526 | * Everything below this this pointer has been | 4535 | * Everything below this this pointer has been |
4527 | * released. Now let this top-of-subtree go. | 4536 | * released. Now let this top-of-subtree go. |
4528 | * | 4537 | * |
@@ -4546,8 +4555,20 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4546 | blocks_for_truncate(inode)); | 4555 | blocks_for_truncate(inode)); |
4547 | } | 4556 | } |
4548 | 4557 | ||
4558 | /* | ||
4559 | * The forget flag here is critical because if | ||
4560 | * we are journaling (and not doing data | ||
4561 | * journaling), we have to make sure a revoke | ||
4562 | * record is written to prevent the journal | ||
4563 | * replay from overwriting the (former) | ||
4564 | * indirect block if it gets reallocated as a | ||
4565 | * data block. This must happen in the same | ||
4566 | * transaction where the data blocks are | ||
4567 | * actually freed. | ||
4568 | */ | ||
4549 | ext4_free_blocks(handle, inode, 0, nr, 1, | 4569 | ext4_free_blocks(handle, inode, 0, nr, 1, |
4550 | EXT4_FREE_BLOCKS_METADATA); | 4570 | EXT4_FREE_BLOCKS_METADATA| |
4571 | EXT4_FREE_BLOCKS_FORGET); | ||
4551 | 4572 | ||
4552 | if (parent_bh) { | 4573 | if (parent_bh) { |
4553 | /* | 4574 | /* |
@@ -4805,8 +4826,8 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
4805 | 4826 | ||
4806 | bh = sb_getblk(sb, block); | 4827 | bh = sb_getblk(sb, block); |
4807 | if (!bh) { | 4828 | if (!bh) { |
4808 | EXT4_ERROR_INODE(inode, "unable to read inode block - " | 4829 | EXT4_ERROR_INODE_BLOCK(inode, block, |
4809 | "block %llu", block); | 4830 | "unable to read itable block"); |
4810 | return -EIO; | 4831 | return -EIO; |
4811 | } | 4832 | } |
4812 | if (!buffer_uptodate(bh)) { | 4833 | if (!buffer_uptodate(bh)) { |
@@ -4904,8 +4925,8 @@ make_io: | |||
4904 | submit_bh(READ_META, bh); | 4925 | submit_bh(READ_META, bh); |
4905 | wait_on_buffer(bh); | 4926 | wait_on_buffer(bh); |
4906 | if (!buffer_uptodate(bh)) { | 4927 | if (!buffer_uptodate(bh)) { |
4907 | EXT4_ERROR_INODE(inode, "unable to read inode " | 4928 | EXT4_ERROR_INODE_BLOCK(inode, block, |
4908 | "block %llu", block); | 4929 | "unable to read itable block"); |
4909 | brelse(bh); | 4930 | brelse(bh); |
4910 | return -EIO; | 4931 | return -EIO; |
4911 | } | 4932 | } |
@@ -4976,7 +4997,7 @@ static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, | |||
4976 | /* we are using combined 48 bit field */ | 4997 | /* we are using combined 48 bit field */ |
4977 | i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 | | 4998 | i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 | |
4978 | le32_to_cpu(raw_inode->i_blocks_lo); | 4999 | le32_to_cpu(raw_inode->i_blocks_lo); |
4979 | if (ei->i_flags & EXT4_HUGE_FILE_FL) { | 5000 | if (ext4_test_inode_flag(inode, EXT4_INODE_HUGE_FILE)) { |
4980 | /* i_blocks represent file system block size */ | 5001 | /* i_blocks represent file system block size */ |
4981 | return i_blocks << (inode->i_blkbits - 9); | 5002 | return i_blocks << (inode->i_blkbits - 9); |
4982 | } else { | 5003 | } else { |
@@ -5072,7 +5093,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
5072 | transaction_t *transaction; | 5093 | transaction_t *transaction; |
5073 | tid_t tid; | 5094 | tid_t tid; |
5074 | 5095 | ||
5075 | spin_lock(&journal->j_state_lock); | 5096 | read_lock(&journal->j_state_lock); |
5076 | if (journal->j_running_transaction) | 5097 | if (journal->j_running_transaction) |
5077 | transaction = journal->j_running_transaction; | 5098 | transaction = journal->j_running_transaction; |
5078 | else | 5099 | else |
@@ -5081,7 +5102,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
5081 | tid = transaction->t_tid; | 5102 | tid = transaction->t_tid; |
5082 | else | 5103 | else |
5083 | tid = journal->j_commit_sequence; | 5104 | tid = journal->j_commit_sequence; |
5084 | spin_unlock(&journal->j_state_lock); | 5105 | read_unlock(&journal->j_state_lock); |
5085 | ei->i_sync_tid = tid; | 5106 | ei->i_sync_tid = tid; |
5086 | ei->i_datasync_tid = tid; | 5107 | ei->i_datasync_tid = tid; |
5087 | } | 5108 | } |
@@ -5126,7 +5147,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
5126 | ei->i_file_acl); | 5147 | ei->i_file_acl); |
5127 | ret = -EIO; | 5148 | ret = -EIO; |
5128 | goto bad_inode; | 5149 | goto bad_inode; |
5129 | } else if (ei->i_flags & EXT4_EXTENTS_FL) { | 5150 | } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
5130 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 5151 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
5131 | (S_ISLNK(inode->i_mode) && | 5152 | (S_ISLNK(inode->i_mode) && |
5132 | !ext4_inode_is_fast_symlink(inode))) | 5153 | !ext4_inode_is_fast_symlink(inode))) |
@@ -5406,9 +5427,8 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
5406 | if (wbc->sync_mode == WB_SYNC_ALL) | 5427 | if (wbc->sync_mode == WB_SYNC_ALL) |
5407 | sync_dirty_buffer(iloc.bh); | 5428 | sync_dirty_buffer(iloc.bh); |
5408 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { | 5429 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { |
5409 | EXT4_ERROR_INODE(inode, | 5430 | EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr, |
5410 | "IO error syncing inode (block=%llu)", | 5431 | "IO error syncing inode"); |
5411 | (unsigned long long) iloc.bh->b_blocknr); | ||
5412 | err = -EIO; | 5432 | err = -EIO; |
5413 | } | 5433 | } |
5414 | brelse(iloc.bh); | 5434 | brelse(iloc.bh); |
@@ -5483,10 +5503,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5483 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { | 5503 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { |
5484 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 5504 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
5485 | 5505 | ||
5486 | if (attr->ia_size > sbi->s_bitmap_maxbytes) { | 5506 | if (attr->ia_size > sbi->s_bitmap_maxbytes) |
5487 | error = -EFBIG; | 5507 | return -EFBIG; |
5488 | goto err_out; | ||
5489 | } | ||
5490 | } | 5508 | } |
5491 | } | 5509 | } |
5492 | 5510 | ||
@@ -5529,11 +5547,19 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5529 | ext4_truncate(inode); | 5547 | ext4_truncate(inode); |
5530 | } | 5548 | } |
5531 | 5549 | ||
5532 | rc = inode_setattr(inode, attr); | 5550 | if ((attr->ia_valid & ATTR_SIZE) && |
5551 | attr->ia_size != i_size_read(inode)) | ||
5552 | rc = vmtruncate(inode, attr->ia_size); | ||
5553 | |||
5554 | if (!rc) { | ||
5555 | setattr_copy(inode, attr); | ||
5556 | mark_inode_dirty(inode); | ||
5557 | } | ||
5533 | 5558 | ||
5534 | /* If inode_setattr's call to ext4_truncate failed to get a | 5559 | /* |
5535 | * transaction handle at all, we need to clean up the in-core | 5560 | * If the call to ext4_truncate failed to get a transaction handle at |
5536 | * orphan list manually. */ | 5561 | * all, we need to clean up the in-core orphan list manually. |
5562 | */ | ||
5537 | if (inode->i_nlink) | 5563 | if (inode->i_nlink) |
5538 | ext4_orphan_del(NULL, inode); | 5564 | ext4_orphan_del(NULL, inode); |
5539 | 5565 | ||
@@ -5688,7 +5714,7 @@ int ext4_writepage_trans_blocks(struct inode *inode) | |||
5688 | * Calculate the journal credits for a chunk of data modification. | 5714 | * Calculate the journal credits for a chunk of data modification. |
5689 | * | 5715 | * |
5690 | * This is called from DIO, fallocate or whoever calling | 5716 | * This is called from DIO, fallocate or whoever calling |
5691 | * ext4_get_blocks() to map/allocate a chunk of contiguous disk blocks. | 5717 | * ext4_map_blocks() to map/allocate a chunk of contiguous disk blocks. |
5692 | * | 5718 | * |
5693 | * journal buffers for data blocks are not included here, as DIO | 5719 | * journal buffers for data blocks are not included here, as DIO |
5694 | * and fallocate do no need to journal data buffers. | 5720 | * and fallocate do no need to journal data buffers. |
@@ -5754,7 +5780,6 @@ static int ext4_expand_extra_isize(struct inode *inode, | |||
5754 | { | 5780 | { |
5755 | struct ext4_inode *raw_inode; | 5781 | struct ext4_inode *raw_inode; |
5756 | struct ext4_xattr_ibody_header *header; | 5782 | struct ext4_xattr_ibody_header *header; |
5757 | struct ext4_xattr_entry *entry; | ||
5758 | 5783 | ||
5759 | if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) | 5784 | if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) |
5760 | return 0; | 5785 | return 0; |
@@ -5762,7 +5787,6 @@ static int ext4_expand_extra_isize(struct inode *inode, | |||
5762 | raw_inode = ext4_raw_inode(&iloc); | 5787 | raw_inode = ext4_raw_inode(&iloc); |
5763 | 5788 | ||
5764 | header = IHDR(inode, raw_inode); | 5789 | header = IHDR(inode, raw_inode); |
5765 | entry = IFIRST(header); | ||
5766 | 5790 | ||
5767 | /* No extended attributes present */ | 5791 | /* No extended attributes present */ |
5768 | if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || | 5792 | if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || |