aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c258
1 files changed, 141 insertions, 117 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 42272d67955a..4b8debeb3965 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -167,11 +167,16 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode,
167/* 167/*
168 * Called at the last iput() if i_nlink is zero. 168 * Called at the last iput() if i_nlink is zero.
169 */ 169 */
170void ext4_delete_inode(struct inode *inode) 170void ext4_evict_inode(struct inode *inode)
171{ 171{
172 handle_t *handle; 172 handle_t *handle;
173 int err; 173 int err;
174 174
175 if (inode->i_nlink) {
176 truncate_inode_pages(&inode->i_data, 0);
177 goto no_delete;
178 }
179
175 if (!is_bad_inode(inode)) 180 if (!is_bad_inode(inode))
176 dquot_initialize(inode); 181 dquot_initialize(inode);
177 182
@@ -221,6 +226,7 @@ void ext4_delete_inode(struct inode *inode)
221 "couldn't extend journal (err %d)", err); 226 "couldn't extend journal (err %d)", err);
222 stop_handle: 227 stop_handle:
223 ext4_journal_stop(handle); 228 ext4_journal_stop(handle);
229 ext4_orphan_del(NULL, inode);
224 goto no_delete; 230 goto no_delete;
225 } 231 }
226 } 232 }
@@ -245,13 +251,13 @@ void ext4_delete_inode(struct inode *inode)
245 */ 251 */
246 if (ext4_mark_inode_dirty(handle, inode)) 252 if (ext4_mark_inode_dirty(handle, inode))
247 /* If that failed, just do the required in-core inode clear. */ 253 /* If that failed, just do the required in-core inode clear. */
248 clear_inode(inode); 254 ext4_clear_inode(inode);
249 else 255 else
250 ext4_free_inode(handle, inode); 256 ext4_free_inode(handle, inode);
251 ext4_journal_stop(handle); 257 ext4_journal_stop(handle);
252 return; 258 return;
253no_delete: 259no_delete:
254 clear_inode(inode); /* We must guarantee clearing of inode... */ 260 ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
255} 261}
256 262
257typedef struct { 263typedef struct {
@@ -337,9 +343,11 @@ static int ext4_block_to_path(struct inode *inode,
337 return n; 343 return n;
338} 344}
339 345
340static int __ext4_check_blockref(const char *function, struct inode *inode, 346static int __ext4_check_blockref(const char *function, unsigned int line,
347 struct inode *inode,
341 __le32 *p, unsigned int max) 348 __le32 *p, unsigned int max)
342{ 349{
350 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
343 __le32 *bref = p; 351 __le32 *bref = p;
344 unsigned int blk; 352 unsigned int blk;
345 353
@@ -348,8 +356,9 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
348 if (blk && 356 if (blk &&
349 unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), 357 unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
350 blk, 1))) { 358 blk, 1))) {
351 ext4_error_inode(function, inode, 359 es->s_last_error_block = cpu_to_le64(blk);
352 "invalid block reference %u", blk); 360 ext4_error_inode(inode, function, line, blk,
361 "invalid block");
353 return -EIO; 362 return -EIO;
354 } 363 }
355 } 364 }
@@ -358,11 +367,13 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
358 367
359 368
360#define ext4_check_indirect_blockref(inode, bh) \ 369#define ext4_check_indirect_blockref(inode, bh) \
361 __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \ 370 __ext4_check_blockref(__func__, __LINE__, inode, \
371 (__le32 *)(bh)->b_data, \
362 EXT4_ADDR_PER_BLOCK((inode)->i_sb)) 372 EXT4_ADDR_PER_BLOCK((inode)->i_sb))
363 373
364#define ext4_check_inode_blockref(inode) \ 374#define ext4_check_inode_blockref(inode) \
365 __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \ 375 __ext4_check_blockref(__func__, __LINE__, inode, \
376 EXT4_I(inode)->i_data, \
366 EXT4_NDIR_BLOCKS) 377 EXT4_NDIR_BLOCKS)
367 378
368/** 379/**
@@ -1128,20 +1139,24 @@ void ext4_da_update_reserve_space(struct inode *inode,
1128 ext4_discard_preallocations(inode); 1139 ext4_discard_preallocations(inode);
1129} 1140}
1130 1141
1131static int check_block_validity(struct inode *inode, const char *func, 1142static int __check_block_validity(struct inode *inode, const char *func,
1143 unsigned int line,
1132 struct ext4_map_blocks *map) 1144 struct ext4_map_blocks *map)
1133{ 1145{
1134 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, 1146 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
1135 map->m_len)) { 1147 map->m_len)) {
1136 ext4_error_inode(func, inode, 1148 ext4_error_inode(inode, func, line, map->m_pblk,
1137 "lblock %lu mapped to illegal pblock %llu " 1149 "lblock %lu mapped to illegal pblock "
1138 "(length %d)", (unsigned long) map->m_lblk, 1150 "(length %d)", (unsigned long) map->m_lblk,
1139 map->m_pblk, map->m_len); 1151 map->m_len);
1140 return -EIO; 1152 return -EIO;
1141 } 1153 }
1142 return 0; 1154 return 0;
1143} 1155}
1144 1156
1157#define check_block_validity(inode, map) \
1158 __check_block_validity((inode), __func__, __LINE__, (map))
1159
1145/* 1160/*
1146 * Return the number of contiguous dirty pages in a given inode 1161 * Return the number of contiguous dirty pages in a given inode
1147 * starting at page frame idx. 1162 * starting at page frame idx.
@@ -1244,7 +1259,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
1244 up_read((&EXT4_I(inode)->i_data_sem)); 1259 up_read((&EXT4_I(inode)->i_data_sem));
1245 1260
1246 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 1261 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
1247 int ret = check_block_validity(inode, __func__, map); 1262 int ret = check_block_validity(inode, map);
1248 if (ret != 0) 1263 if (ret != 0)
1249 return ret; 1264 return ret;
1250 } 1265 }
@@ -1324,9 +1339,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
1324 1339
1325 up_write((&EXT4_I(inode)->i_data_sem)); 1340 up_write((&EXT4_I(inode)->i_data_sem));
1326 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 1341 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
1327 int ret = check_block_validity(inode, 1342 int ret = check_block_validity(inode, map);
1328 "ext4_map_blocks_after_alloc",
1329 map);
1330 if (ret != 0) 1343 if (ret != 0)
1331 return ret; 1344 return ret;
1332 } 1345 }
@@ -1519,9 +1532,25 @@ static int walk_page_buffers(handle_t *handle,
1519static int do_journal_get_write_access(handle_t *handle, 1532static int do_journal_get_write_access(handle_t *handle,
1520 struct buffer_head *bh) 1533 struct buffer_head *bh)
1521{ 1534{
1535 int dirty = buffer_dirty(bh);
1536 int ret;
1537
1522 if (!buffer_mapped(bh) || buffer_freed(bh)) 1538 if (!buffer_mapped(bh) || buffer_freed(bh))
1523 return 0; 1539 return 0;
1524 return ext4_journal_get_write_access(handle, bh); 1540 /*
1541 * __block_prepare_write() could have dirtied some buffers. Clean
1542 * the dirty bit as jbd2_journal_get_write_access() could complain
1543 * otherwise about fs integrity issues. Setting of the dirty bit
1544 * by __block_prepare_write() isn't a real problem here as we clear
1545 * the bit before releasing a page lock and thus writeback cannot
1546 * ever write the buffer.
1547 */
1548 if (dirty)
1549 clear_buffer_dirty(bh);
1550 ret = ext4_journal_get_write_access(handle, bh);
1551 if (!ret && dirty)
1552 ret = ext4_handle_dirty_metadata(handle, NULL, bh);
1553 return ret;
1525} 1554}
1526 1555
1527/* 1556/*
@@ -1578,11 +1607,9 @@ retry:
1578 *pagep = page; 1607 *pagep = page;
1579 1608
1580 if (ext4_should_dioread_nolock(inode)) 1609 if (ext4_should_dioread_nolock(inode))
1581 ret = block_write_begin(file, mapping, pos, len, flags, pagep, 1610 ret = __block_write_begin(page, pos, len, ext4_get_block_write);
1582 fsdata, ext4_get_block_write);
1583 else 1611 else
1584 ret = block_write_begin(file, mapping, pos, len, flags, pagep, 1612 ret = __block_write_begin(page, pos, len, ext4_get_block);
1585 fsdata, ext4_get_block);
1586 1613
1587 if (!ret && ext4_should_journal_data(inode)) { 1614 if (!ret && ext4_should_journal_data(inode)) {
1588 ret = walk_page_buffers(handle, page_buffers(page), 1615 ret = walk_page_buffers(handle, page_buffers(page),
@@ -1593,7 +1620,7 @@ retry:
1593 unlock_page(page); 1620 unlock_page(page);
1594 page_cache_release(page); 1621 page_cache_release(page);
1595 /* 1622 /*
1596 * block_write_begin may have instantiated a few blocks 1623 * __block_write_begin may have instantiated a few blocks
1597 * outside i_size. Trim these off again. Don't need 1624 * outside i_size. Trim these off again. Don't need
1598 * i_size_read because we hold i_mutex. 1625 * i_size_read because we hold i_mutex.
1599 * 1626 *
@@ -2194,7 +2221,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2194 BUG_ON(!handle); 2221 BUG_ON(!handle);
2195 2222
2196 /* 2223 /*
2197 * Call ext4_get_blocks() to allocate any delayed allocation 2224 * Call ext4_map_blocks() to allocate any delayed allocation
2198 * blocks, or to convert an uninitialized extent to be 2225 * blocks, or to convert an uninitialized extent to be
2199 * initialized (in the case where we have written into 2226 * initialized (in the case where we have written into
2200 * one or more preallocated blocks). 2227 * one or more preallocated blocks).
@@ -2203,7 +2230,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2203 * indicate that we are on the delayed allocation path. This 2230 * indicate that we are on the delayed allocation path. This
2204 * affects functions in many different parts of the allocation 2231 * affects functions in many different parts of the allocation
2205 * call path. This flag exists primarily because we don't 2232 * call path. This flag exists primarily because we don't
2206 * want to change *many* call functions, so ext4_get_blocks() 2233 * want to change *many* call functions, so ext4_map_blocks()
2207 * will set the magic i_delalloc_reserved_flag once the 2234 * will set the magic i_delalloc_reserved_flag once the
2208 * inode's allocation semaphore is taken. 2235 * inode's allocation semaphore is taken.
2209 * 2236 *
@@ -2221,6 +2248,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2221 2248
2222 blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags); 2249 blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags);
2223 if (blks < 0) { 2250 if (blks < 0) {
2251 struct super_block *sb = mpd->inode->i_sb;
2252
2224 err = blks; 2253 err = blks;
2225 /* 2254 /*
2226 * If get block returns with error we simply 2255 * If get block returns with error we simply
@@ -2231,7 +2260,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2231 return 0; 2260 return 0;
2232 2261
2233 if (err == -ENOSPC && 2262 if (err == -ENOSPC &&
2234 ext4_count_free_blocks(mpd->inode->i_sb)) { 2263 ext4_count_free_blocks(sb)) {
2235 mpd->retval = err; 2264 mpd->retval = err;
2236 return 0; 2265 return 0;
2237 } 2266 }
@@ -2243,16 +2272,17 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2243 * writepage and writepages will again try to write 2272 * writepage and writepages will again try to write
2244 * the same. 2273 * the same.
2245 */ 2274 */
2246 ext4_msg(mpd->inode->i_sb, KERN_CRIT, 2275 if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) {
2247 "delayed block allocation failed for inode %lu at " 2276 ext4_msg(sb, KERN_CRIT,
2248 "logical offset %llu with max blocks %zd with " 2277 "delayed block allocation failed for inode %lu "
2249 "error %d", mpd->inode->i_ino, 2278 "at logical offset %llu with max blocks %zd "
2250 (unsigned long long) next, 2279 "with error %d", mpd->inode->i_ino,
2251 mpd->b_size >> mpd->inode->i_blkbits, err); 2280 (unsigned long long) next,
2252 printk(KERN_CRIT "This should not happen!! " 2281 mpd->b_size >> mpd->inode->i_blkbits, err);
2253 "Data will be lost\n"); 2282 ext4_msg(sb, KERN_CRIT,
2254 if (err == -ENOSPC) { 2283 "This should not happen!! Data will be lost\n");
2255 ext4_print_free_blocks(mpd->inode); 2284 if (err == -ENOSPC)
2285 ext4_print_free_blocks(mpd->inode);
2256 } 2286 }
2257 /* invalidate all the pages */ 2287 /* invalidate all the pages */
2258 ext4_da_block_invalidatepages(mpd, next, 2288 ext4_da_block_invalidatepages(mpd, next,
@@ -2320,7 +2350,7 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
2320 * XXX Don't go larger than mballoc is willing to allocate 2350 * XXX Don't go larger than mballoc is willing to allocate
2321 * This is a stopgap solution. We eventually need to fold 2351 * This is a stopgap solution. We eventually need to fold
2322 * mpage_da_submit_io() into this function and then call 2352 * mpage_da_submit_io() into this function and then call
2323 * ext4_get_blocks() multiple times in a loop 2353 * ext4_map_blocks() multiple times in a loop
2324 */ 2354 */
2325 if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize) 2355 if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize)
2326 goto flush_it; 2356 goto flush_it;
@@ -2553,18 +2583,16 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2553/* 2583/*
2554 * This function is used as a standard get_block_t calback function 2584 * This function is used as a standard get_block_t calback function
2555 * when there is no desire to allocate any blocks. It is used as a 2585 * when there is no desire to allocate any blocks. It is used as a
2556 * callback function for block_prepare_write(), nobh_writepage(), and 2586 * callback function for block_prepare_write() and block_write_full_page().
2557 * block_write_full_page(). These functions should only try to map a 2587 * These functions should only try to map a single block at a time.
2558 * single block at a time.
2559 * 2588 *
2560 * Since this function doesn't do block allocations even if the caller 2589 * Since this function doesn't do block allocations even if the caller
2561 * requests it by passing in create=1, it is critically important that 2590 * requests it by passing in create=1, it is critically important that
2562 * any caller checks to make sure that any buffer heads are returned 2591 * any caller checks to make sure that any buffer heads are returned
2563 * by this function are either all already mapped or marked for 2592 * by this function are either all already mapped or marked for
2564 * delayed allocation before calling nobh_writepage() or 2593 * delayed allocation before calling block_write_full_page(). Otherwise,
2565 * block_write_full_page(). Otherwise, b_blocknr could be left 2594 * b_blocknr could be left unitialized, and the page write functions will
2566 * unitialized, and the page write functions will be taken by 2595 * be taken by surprise.
2567 * surprise.
2568 */ 2596 */
2569static int noalloc_get_block_write(struct inode *inode, sector_t iblock, 2597static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
2570 struct buffer_head *bh_result, int create) 2598 struct buffer_head *bh_result, int create)
@@ -2749,9 +2777,7 @@ static int ext4_writepage(struct page *page,
2749 return __ext4_journalled_writepage(page, len); 2777 return __ext4_journalled_writepage(page, len);
2750 } 2778 }
2751 2779
2752 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) 2780 if (page_bufs && buffer_uninit(page_bufs)) {
2753 ret = nobh_writepage(page, noalloc_get_block_write, wbc);
2754 else if (page_bufs && buffer_uninit(page_bufs)) {
2755 ext4_set_bh_endio(page_bufs, inode); 2781 ext4_set_bh_endio(page_bufs, inode);
2756 ret = block_write_full_page_endio(page, noalloc_get_block_write, 2782 ret = block_write_full_page_endio(page, noalloc_get_block_write,
2757 wbc, ext4_end_io_buffer_write); 2783 wbc, ext4_end_io_buffer_write);
@@ -3146,13 +3172,10 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
3146 int ret, retries = 0; 3172 int ret, retries = 0;
3147 struct page *page; 3173 struct page *page;
3148 pgoff_t index; 3174 pgoff_t index;
3149 unsigned from, to;
3150 struct inode *inode = mapping->host; 3175 struct inode *inode = mapping->host;
3151 handle_t *handle; 3176 handle_t *handle;
3152 3177
3153 index = pos >> PAGE_CACHE_SHIFT; 3178 index = pos >> PAGE_CACHE_SHIFT;
3154 from = pos & (PAGE_CACHE_SIZE - 1);
3155 to = from + len;
3156 3179
3157 if (ext4_nonda_switch(inode->i_sb)) { 3180 if (ext4_nonda_switch(inode->i_sb)) {
3158 *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; 3181 *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
@@ -3185,8 +3208,7 @@ retry:
3185 } 3208 }
3186 *pagep = page; 3209 *pagep = page;
3187 3210
3188 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 3211 ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
3189 ext4_da_get_block_prep);
3190 if (ret < 0) { 3212 if (ret < 0) {
3191 unlock_page(page); 3213 unlock_page(page);
3192 ext4_journal_stop(handle); 3214 ext4_journal_stop(handle);
@@ -3545,15 +3567,24 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
3545 3567
3546retry: 3568retry:
3547 if (rw == READ && ext4_should_dioread_nolock(inode)) 3569 if (rw == READ && ext4_should_dioread_nolock(inode))
3548 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, 3570 ret = __blockdev_direct_IO(rw, iocb, inode,
3549 inode->i_sb->s_bdev, iov, 3571 inode->i_sb->s_bdev, iov,
3550 offset, nr_segs, 3572 offset, nr_segs,
3551 ext4_get_block, NULL); 3573 ext4_get_block, NULL, NULL, 0);
3552 else 3574 else {
3553 ret = blockdev_direct_IO(rw, iocb, inode, 3575 ret = blockdev_direct_IO(rw, iocb, inode,
3554 inode->i_sb->s_bdev, iov, 3576 inode->i_sb->s_bdev, iov,
3555 offset, nr_segs, 3577 offset, nr_segs,
3556 ext4_get_block, NULL); 3578 ext4_get_block, NULL);
3579
3580 if (unlikely((rw & WRITE) && ret < 0)) {
3581 loff_t isize = i_size_read(inode);
3582 loff_t end = offset + iov_length(iov, nr_segs);
3583
3584 if (end > isize)
3585 vmtruncate(inode, isize);
3586 }
3587 }
3557 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 3588 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
3558 goto retry; 3589 goto retry;
3559 3590
@@ -3668,6 +3699,8 @@ static int ext4_end_io_nolock(ext4_io_end_t *io)
3668 return ret; 3699 return ret;
3669 } 3700 }
3670 3701
3702 if (io->iocb)
3703 aio_complete(io->iocb, io->result, 0);
3671 /* clear the DIO AIO unwritten flag */ 3704 /* clear the DIO AIO unwritten flag */
3672 io->flag = 0; 3705 io->flag = 0;
3673 return ret; 3706 return ret;
@@ -3767,6 +3800,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
3767 io->offset = 0; 3800 io->offset = 0;
3768 io->size = 0; 3801 io->size = 0;
3769 io->page = NULL; 3802 io->page = NULL;
3803 io->iocb = NULL;
3804 io->result = 0;
3770 INIT_WORK(&io->work, ext4_end_io_work); 3805 INIT_WORK(&io->work, ext4_end_io_work);
3771 INIT_LIST_HEAD(&io->list); 3806 INIT_LIST_HEAD(&io->list);
3772 } 3807 }
@@ -3775,7 +3810,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
3775} 3810}
3776 3811
3777static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, 3812static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3778 ssize_t size, void *private) 3813 ssize_t size, void *private, int ret,
3814 bool is_async)
3779{ 3815{
3780 ext4_io_end_t *io_end = iocb->private; 3816 ext4_io_end_t *io_end = iocb->private;
3781 struct workqueue_struct *wq; 3817 struct workqueue_struct *wq;
@@ -3784,7 +3820,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3784 3820
3785 /* if not async direct IO or dio with 0 bytes write, just return */ 3821 /* if not async direct IO or dio with 0 bytes write, just return */
3786 if (!io_end || !size) 3822 if (!io_end || !size)
3787 return; 3823 goto out;
3788 3824
3789 ext_debug("ext4_end_io_dio(): io_end 0x%p" 3825 ext_debug("ext4_end_io_dio(): io_end 0x%p"
3790 "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", 3826 "for inode %lu, iocb 0x%p, offset %llu, size %llu\n",
@@ -3795,12 +3831,18 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3795 if (io_end->flag != EXT4_IO_UNWRITTEN){ 3831 if (io_end->flag != EXT4_IO_UNWRITTEN){
3796 ext4_free_io_end(io_end); 3832 ext4_free_io_end(io_end);
3797 iocb->private = NULL; 3833 iocb->private = NULL;
3834out:
3835 if (is_async)
3836 aio_complete(iocb, ret, 0);
3798 return; 3837 return;
3799 } 3838 }
3800 3839
3801 io_end->offset = offset; 3840 io_end->offset = offset;
3802 io_end->size = size; 3841 io_end->size = size;
3803 io_end->flag = EXT4_IO_UNWRITTEN; 3842 if (is_async) {
3843 io_end->iocb = iocb;
3844 io_end->result = ret;
3845 }
3804 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; 3846 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
3805 3847
3806 /* queue the work to convert unwritten extents to written */ 3848 /* queue the work to convert unwritten extents to written */
@@ -3937,7 +3979,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3937 return -ENOMEM; 3979 return -ENOMEM;
3938 /* 3980 /*
3939 * we save the io structure for current async 3981 * we save the io structure for current async
3940 * direct IO, so that later ext4_get_blocks() 3982 * direct IO, so that later ext4_map_blocks()
3941 * could flag the io structure whether there 3983 * could flag the io structure whether there
3942 * is a unwritten extents needs to be converted 3984 * is a unwritten extents needs to be converted
3943 * when IO is completed. 3985 * when IO is completed.
@@ -4128,17 +4170,6 @@ int ext4_block_truncate_page(handle_t *handle,
4128 length = blocksize - (offset & (blocksize - 1)); 4170 length = blocksize - (offset & (blocksize - 1));
4129 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); 4171 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
4130 4172
4131 /*
4132 * For "nobh" option, we can only work if we don't need to
4133 * read-in the page - otherwise we create buffers to do the IO.
4134 */
4135 if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) &&
4136 ext4_should_writeback_data(inode) && PageUptodate(page)) {
4137 zero_user(page, offset, length);
4138 set_page_dirty(page);
4139 goto unlock;
4140 }
4141
4142 if (!page_has_buffers(page)) 4173 if (!page_has_buffers(page))
4143 create_empty_buffers(page, blocksize, 0); 4174 create_empty_buffers(page, blocksize, 0);
4144 4175
@@ -4488,9 +4519,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4488 * (should be rare). 4519 * (should be rare).
4489 */ 4520 */
4490 if (!bh) { 4521 if (!bh) {
4491 EXT4_ERROR_INODE(inode, 4522 EXT4_ERROR_INODE_BLOCK(inode, nr,
4492 "Read failure block=%llu", 4523 "Read failure");
4493 (unsigned long long) nr);
4494 continue; 4524 continue;
4495 } 4525 }
4496 4526
@@ -4502,27 +4532,6 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4502 depth); 4532 depth);
4503 4533
4504 /* 4534 /*
4505 * We've probably journalled the indirect block several
4506 * times during the truncate. But it's no longer
4507 * needed and we now drop it from the transaction via
4508 * jbd2_journal_revoke().
4509 *
4510 * That's easy if it's exclusively part of this
4511 * transaction. But if it's part of the committing
4512 * transaction then jbd2_journal_forget() will simply
4513 * brelse() it. That means that if the underlying
4514 * block is reallocated in ext4_get_block(),
4515 * unmap_underlying_metadata() will find this block
4516 * and will try to get rid of it. damn, damn.
4517 *
4518 * If this block has already been committed to the
4519 * journal, a revoke record will be written. And
4520 * revoke records must be emitted *before* clearing
4521 * this block's bit in the bitmaps.
4522 */
4523 ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
4524
4525 /*
4526 * Everything below this this pointer has been 4535 * Everything below this this pointer has been
4527 * released. Now let this top-of-subtree go. 4536 * released. Now let this top-of-subtree go.
4528 * 4537 *
@@ -4546,8 +4555,20 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4546 blocks_for_truncate(inode)); 4555 blocks_for_truncate(inode));
4547 } 4556 }
4548 4557
4558 /*
4559 * The forget flag here is critical because if
4560 * we are journaling (and not doing data
4561 * journaling), we have to make sure a revoke
4562 * record is written to prevent the journal
4563 * replay from overwriting the (former)
4564 * indirect block if it gets reallocated as a
4565 * data block. This must happen in the same
4566 * transaction where the data blocks are
4567 * actually freed.
4568 */
4549 ext4_free_blocks(handle, inode, 0, nr, 1, 4569 ext4_free_blocks(handle, inode, 0, nr, 1,
4550 EXT4_FREE_BLOCKS_METADATA); 4570 EXT4_FREE_BLOCKS_METADATA|
4571 EXT4_FREE_BLOCKS_FORGET);
4551 4572
4552 if (parent_bh) { 4573 if (parent_bh) {
4553 /* 4574 /*
@@ -4805,8 +4826,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
4805 4826
4806 bh = sb_getblk(sb, block); 4827 bh = sb_getblk(sb, block);
4807 if (!bh) { 4828 if (!bh) {
4808 EXT4_ERROR_INODE(inode, "unable to read inode block - " 4829 EXT4_ERROR_INODE_BLOCK(inode, block,
4809 "block %llu", block); 4830 "unable to read itable block");
4810 return -EIO; 4831 return -EIO;
4811 } 4832 }
4812 if (!buffer_uptodate(bh)) { 4833 if (!buffer_uptodate(bh)) {
@@ -4904,8 +4925,8 @@ make_io:
4904 submit_bh(READ_META, bh); 4925 submit_bh(READ_META, bh);
4905 wait_on_buffer(bh); 4926 wait_on_buffer(bh);
4906 if (!buffer_uptodate(bh)) { 4927 if (!buffer_uptodate(bh)) {
4907 EXT4_ERROR_INODE(inode, "unable to read inode " 4928 EXT4_ERROR_INODE_BLOCK(inode, block,
4908 "block %llu", block); 4929 "unable to read itable block");
4909 brelse(bh); 4930 brelse(bh);
4910 return -EIO; 4931 return -EIO;
4911 } 4932 }
@@ -4976,7 +4997,7 @@ static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
4976 /* we are using combined 48 bit field */ 4997 /* we are using combined 48 bit field */
4977 i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 | 4998 i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 |
4978 le32_to_cpu(raw_inode->i_blocks_lo); 4999 le32_to_cpu(raw_inode->i_blocks_lo);
4979 if (ei->i_flags & EXT4_HUGE_FILE_FL) { 5000 if (ext4_test_inode_flag(inode, EXT4_INODE_HUGE_FILE)) {
4980 /* i_blocks represent file system block size */ 5001 /* i_blocks represent file system block size */
4981 return i_blocks << (inode->i_blkbits - 9); 5002 return i_blocks << (inode->i_blkbits - 9);
4982 } else { 5003 } else {
@@ -5072,7 +5093,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
5072 transaction_t *transaction; 5093 transaction_t *transaction;
5073 tid_t tid; 5094 tid_t tid;
5074 5095
5075 spin_lock(&journal->j_state_lock); 5096 read_lock(&journal->j_state_lock);
5076 if (journal->j_running_transaction) 5097 if (journal->j_running_transaction)
5077 transaction = journal->j_running_transaction; 5098 transaction = journal->j_running_transaction;
5078 else 5099 else
@@ -5081,7 +5102,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
5081 tid = transaction->t_tid; 5102 tid = transaction->t_tid;
5082 else 5103 else
5083 tid = journal->j_commit_sequence; 5104 tid = journal->j_commit_sequence;
5084 spin_unlock(&journal->j_state_lock); 5105 read_unlock(&journal->j_state_lock);
5085 ei->i_sync_tid = tid; 5106 ei->i_sync_tid = tid;
5086 ei->i_datasync_tid = tid; 5107 ei->i_datasync_tid = tid;
5087 } 5108 }
@@ -5126,7 +5147,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
5126 ei->i_file_acl); 5147 ei->i_file_acl);
5127 ret = -EIO; 5148 ret = -EIO;
5128 goto bad_inode; 5149 goto bad_inode;
5129 } else if (ei->i_flags & EXT4_EXTENTS_FL) { 5150 } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
5130 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 5151 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
5131 (S_ISLNK(inode->i_mode) && 5152 (S_ISLNK(inode->i_mode) &&
5132 !ext4_inode_is_fast_symlink(inode))) 5153 !ext4_inode_is_fast_symlink(inode)))
@@ -5406,9 +5427,8 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
5406 if (wbc->sync_mode == WB_SYNC_ALL) 5427 if (wbc->sync_mode == WB_SYNC_ALL)
5407 sync_dirty_buffer(iloc.bh); 5428 sync_dirty_buffer(iloc.bh);
5408 if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { 5429 if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
5409 EXT4_ERROR_INODE(inode, 5430 EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr,
5410 "IO error syncing inode (block=%llu)", 5431 "IO error syncing inode");
5411 (unsigned long long) iloc.bh->b_blocknr);
5412 err = -EIO; 5432 err = -EIO;
5413 } 5433 }
5414 brelse(iloc.bh); 5434 brelse(iloc.bh);
@@ -5483,10 +5503,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5483 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { 5503 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
5484 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 5504 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
5485 5505
5486 if (attr->ia_size > sbi->s_bitmap_maxbytes) { 5506 if (attr->ia_size > sbi->s_bitmap_maxbytes)
5487 error = -EFBIG; 5507 return -EFBIG;
5488 goto err_out;
5489 }
5490 } 5508 }
5491 } 5509 }
5492 5510
@@ -5529,11 +5547,19 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5529 ext4_truncate(inode); 5547 ext4_truncate(inode);
5530 } 5548 }
5531 5549
5532 rc = inode_setattr(inode, attr); 5550 if ((attr->ia_valid & ATTR_SIZE) &&
5551 attr->ia_size != i_size_read(inode))
5552 rc = vmtruncate(inode, attr->ia_size);
5553
5554 if (!rc) {
5555 setattr_copy(inode, attr);
5556 mark_inode_dirty(inode);
5557 }
5533 5558
5534 /* If inode_setattr's call to ext4_truncate failed to get a 5559 /*
5535 * transaction handle at all, we need to clean up the in-core 5560 * If the call to ext4_truncate failed to get a transaction handle at
5536 * orphan list manually. */ 5561 * all, we need to clean up the in-core orphan list manually.
5562 */
5537 if (inode->i_nlink) 5563 if (inode->i_nlink)
5538 ext4_orphan_del(NULL, inode); 5564 ext4_orphan_del(NULL, inode);
5539 5565
@@ -5688,7 +5714,7 @@ int ext4_writepage_trans_blocks(struct inode *inode)
5688 * Calculate the journal credits for a chunk of data modification. 5714 * Calculate the journal credits for a chunk of data modification.
5689 * 5715 *
5690 * This is called from DIO, fallocate or whoever calling 5716 * This is called from DIO, fallocate or whoever calling
5691 * ext4_get_blocks() to map/allocate a chunk of contiguous disk blocks. 5717 * ext4_map_blocks() to map/allocate a chunk of contiguous disk blocks.
5692 * 5718 *
5693 * journal buffers for data blocks are not included here, as DIO 5719 * journal buffers for data blocks are not included here, as DIO
5694 * and fallocate do no need to journal data buffers. 5720 * and fallocate do no need to journal data buffers.
@@ -5754,7 +5780,6 @@ static int ext4_expand_extra_isize(struct inode *inode,
5754{ 5780{
5755 struct ext4_inode *raw_inode; 5781 struct ext4_inode *raw_inode;
5756 struct ext4_xattr_ibody_header *header; 5782 struct ext4_xattr_ibody_header *header;
5757 struct ext4_xattr_entry *entry;
5758 5783
5759 if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) 5784 if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
5760 return 0; 5785 return 0;
@@ -5762,7 +5787,6 @@ static int ext4_expand_extra_isize(struct inode *inode,
5762 raw_inode = ext4_raw_inode(&iloc); 5787 raw_inode = ext4_raw_inode(&iloc);
5763 5788
5764 header = IHDR(inode, raw_inode); 5789 header = IHDR(inode, raw_inode);
5765 entry = IFIRST(header);
5766 5790
5767 /* No extended attributes present */ 5791 /* No extended attributes present */
5768 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || 5792 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||