aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c508
1 files changed, 359 insertions, 149 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9c064727ed62..b9ffa9f4191f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -37,6 +37,7 @@
37#include <linux/printk.h> 37#include <linux/printk.h>
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <linux/bitops.h> 39#include <linux/bitops.h>
40#include <linux/iomap.h>
40 41
41#include "ext4_jbd2.h" 42#include "ext4_jbd2.h"
42#include "xattr.h" 43#include "xattr.h"
@@ -71,10 +72,9 @@ static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw,
71 csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, 72 csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum,
72 csum_size); 73 csum_size);
73 offset += csum_size; 74 offset += csum_size;
74 csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
75 EXT4_INODE_SIZE(inode->i_sb) -
76 offset);
77 } 75 }
76 csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
77 EXT4_INODE_SIZE(inode->i_sb) - offset);
78 } 78 }
79 79
80 return csum; 80 return csum;
@@ -261,8 +261,15 @@ void ext4_evict_inode(struct inode *inode)
261 "couldn't mark inode dirty (err %d)", err); 261 "couldn't mark inode dirty (err %d)", err);
262 goto stop_handle; 262 goto stop_handle;
263 } 263 }
264 if (inode->i_blocks) 264 if (inode->i_blocks) {
265 ext4_truncate(inode); 265 err = ext4_truncate(inode);
266 if (err) {
267 ext4_error(inode->i_sb,
268 "couldn't truncate inode %lu (err %d)",
269 inode->i_ino, err);
270 goto stop_handle;
271 }
272 }
266 273
267 /* 274 /*
268 * ext4_ext_truncate() doesn't reserve any slop when it 275 * ext4_ext_truncate() doesn't reserve any slop when it
@@ -654,12 +661,8 @@ found:
654 if (flags & EXT4_GET_BLOCKS_ZERO && 661 if (flags & EXT4_GET_BLOCKS_ZERO &&
655 map->m_flags & EXT4_MAP_MAPPED && 662 map->m_flags & EXT4_MAP_MAPPED &&
656 map->m_flags & EXT4_MAP_NEW) { 663 map->m_flags & EXT4_MAP_NEW) {
657 ext4_lblk_t i; 664 clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk,
658 665 map->m_len);
659 for (i = 0; i < map->m_len; i++) {
660 unmap_underlying_metadata(inode->i_sb->s_bdev,
661 map->m_pblk + i);
662 }
663 ret = ext4_issue_zeroout(inode, map->m_lblk, 666 ret = ext4_issue_zeroout(inode, map->m_lblk,
664 map->m_pblk, map->m_len); 667 map->m_pblk, map->m_len);
665 if (ret) { 668 if (ret) {
@@ -767,6 +770,9 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
767 ext4_update_bh_state(bh, map.m_flags); 770 ext4_update_bh_state(bh, map.m_flags);
768 bh->b_size = inode->i_sb->s_blocksize * map.m_len; 771 bh->b_size = inode->i_sb->s_blocksize * map.m_len;
769 ret = 0; 772 ret = 0;
773 } else if (ret == 0) {
774 /* hole case, need to fill in bh->b_size */
775 bh->b_size = inode->i_sb->s_blocksize * map.m_len;
770 } 776 }
771 return ret; 777 return ret;
772} 778}
@@ -1127,8 +1133,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
1127 if (err) 1133 if (err)
1128 break; 1134 break;
1129 if (buffer_new(bh)) { 1135 if (buffer_new(bh)) {
1130 unmap_underlying_metadata(bh->b_bdev, 1136 clean_bdev_bh_alias(bh);
1131 bh->b_blocknr);
1132 if (PageUptodate(page)) { 1137 if (PageUptodate(page)) {
1133 clear_buffer_new(bh); 1138 clear_buffer_new(bh);
1134 set_buffer_uptodate(bh); 1139 set_buffer_uptodate(bh);
@@ -1166,7 +1171,8 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
1166 if (unlikely(err)) 1171 if (unlikely(err))
1167 page_zero_new_buffers(page, from, to); 1172 page_zero_new_buffers(page, from, to);
1168 else if (decrypt) 1173 else if (decrypt)
1169 err = fscrypt_decrypt_page(page); 1174 err = fscrypt_decrypt_page(page->mapping->host, page,
1175 PAGE_SIZE, 0, page->index);
1170 return err; 1176 return err;
1171} 1177}
1172#endif 1178#endif
@@ -1183,6 +1189,9 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
1183 pgoff_t index; 1189 pgoff_t index;
1184 unsigned from, to; 1190 unsigned from, to;
1185 1191
1192 if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
1193 return -EIO;
1194
1186 trace_ext4_write_begin(inode, pos, len, flags); 1195 trace_ext4_write_begin(inode, pos, len, flags);
1187 /* 1196 /*
1188 * Reserve one block more for addition to orphan list in case 1197 * Reserve one block more for addition to orphan list in case
@@ -1324,8 +1333,11 @@ static int ext4_write_end(struct file *file,
1324 if (ext4_has_inline_data(inode)) { 1333 if (ext4_has_inline_data(inode)) {
1325 ret = ext4_write_inline_data_end(inode, pos, len, 1334 ret = ext4_write_inline_data_end(inode, pos, len,
1326 copied, page); 1335 copied, page);
1327 if (ret < 0) 1336 if (ret < 0) {
1337 unlock_page(page);
1338 put_page(page);
1328 goto errout; 1339 goto errout;
1340 }
1329 copied = ret; 1341 copied = ret;
1330 } else 1342 } else
1331 copied = block_write_end(file, mapping, pos, 1343 copied = block_write_end(file, mapping, pos,
@@ -1379,7 +1391,9 @@ errout:
1379 * set the buffer to be dirty, since in data=journalled mode we need 1391 * set the buffer to be dirty, since in data=journalled mode we need
1380 * to call ext4_handle_dirty_metadata() instead. 1392 * to call ext4_handle_dirty_metadata() instead.
1381 */ 1393 */
1382static void zero_new_buffers(struct page *page, unsigned from, unsigned to) 1394static void ext4_journalled_zero_new_buffers(handle_t *handle,
1395 struct page *page,
1396 unsigned from, unsigned to)
1383{ 1397{
1384 unsigned int block_start = 0, block_end; 1398 unsigned int block_start = 0, block_end;
1385 struct buffer_head *head, *bh; 1399 struct buffer_head *head, *bh;
@@ -1396,7 +1410,7 @@ static void zero_new_buffers(struct page *page, unsigned from, unsigned to)
1396 size = min(to, block_end) - start; 1410 size = min(to, block_end) - start;
1397 1411
1398 zero_user(page, start, size); 1412 zero_user(page, start, size);
1399 set_buffer_uptodate(bh); 1413 write_end_fn(handle, bh);
1400 } 1414 }
1401 clear_buffer_new(bh); 1415 clear_buffer_new(bh);
1402 } 1416 }
@@ -1425,18 +1439,25 @@ static int ext4_journalled_write_end(struct file *file,
1425 1439
1426 BUG_ON(!ext4_handle_valid(handle)); 1440 BUG_ON(!ext4_handle_valid(handle));
1427 1441
1428 if (ext4_has_inline_data(inode)) 1442 if (ext4_has_inline_data(inode)) {
1429 copied = ext4_write_inline_data_end(inode, pos, len, 1443 ret = ext4_write_inline_data_end(inode, pos, len,
1430 copied, page); 1444 copied, page);
1431 else { 1445 if (ret < 0) {
1432 if (copied < len) { 1446 unlock_page(page);
1433 if (!PageUptodate(page)) 1447 put_page(page);
1434 copied = 0; 1448 goto errout;
1435 zero_new_buffers(page, from+copied, to);
1436 } 1449 }
1437 1450 copied = ret;
1451 } else if (unlikely(copied < len) && !PageUptodate(page)) {
1452 copied = 0;
1453 ext4_journalled_zero_new_buffers(handle, page, from, to);
1454 } else {
1455 if (unlikely(copied < len))
1456 ext4_journalled_zero_new_buffers(handle, page,
1457 from + copied, to);
1438 ret = ext4_walk_page_buffers(handle, page_buffers(page), from, 1458 ret = ext4_walk_page_buffers(handle, page_buffers(page), from,
1439 to, &partial, write_end_fn); 1459 from + copied, &partial,
1460 write_end_fn);
1440 if (!partial) 1461 if (!partial)
1441 SetPageUptodate(page); 1462 SetPageUptodate(page);
1442 } 1463 }
@@ -1462,6 +1483,7 @@ static int ext4_journalled_write_end(struct file *file,
1462 */ 1483 */
1463 ext4_orphan_add(handle, inode); 1484 ext4_orphan_add(handle, inode);
1464 1485
1486errout:
1465 ret2 = ext4_journal_stop(handle); 1487 ret2 = ext4_journal_stop(handle);
1466 if (!ret) 1488 if (!ret)
1467 ret = ret2; 1489 ret = ret2;
@@ -2028,6 +2050,12 @@ static int ext4_writepage(struct page *page,
2028 struct ext4_io_submit io_submit; 2050 struct ext4_io_submit io_submit;
2029 bool keep_towrite = false; 2051 bool keep_towrite = false;
2030 2052
2053 if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) {
2054 ext4_invalidatepage(page, 0, PAGE_SIZE);
2055 unlock_page(page);
2056 return -EIO;
2057 }
2058
2031 trace_ext4_writepage(page); 2059 trace_ext4_writepage(page);
2032 size = i_size_read(inode); 2060 size = i_size_read(inode);
2033 if (page->index == size >> PAGE_SHIFT) 2061 if (page->index == size >> PAGE_SHIFT)
@@ -2193,7 +2221,7 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd,
2193{ 2221{
2194 struct inode *inode = mpd->inode; 2222 struct inode *inode = mpd->inode;
2195 int err; 2223 int err;
2196 ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1) 2224 ext4_lblk_t blocks = (i_size_read(inode) + i_blocksize(inode) - 1)
2197 >> inode->i_blkbits; 2225 >> inode->i_blkbits;
2198 2226
2199 do { 2227 do {
@@ -2360,11 +2388,8 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
2360 2388
2361 BUG_ON(map->m_len == 0); 2389 BUG_ON(map->m_len == 0);
2362 if (map->m_flags & EXT4_MAP_NEW) { 2390 if (map->m_flags & EXT4_MAP_NEW) {
2363 struct block_device *bdev = inode->i_sb->s_bdev; 2391 clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk,
2364 int i; 2392 map->m_len);
2365
2366 for (i = 0; i < map->m_len; i++)
2367 unmap_underlying_metadata(bdev, map->m_pblk + i);
2368 } 2393 }
2369 return 0; 2394 return 0;
2370} 2395}
@@ -2406,7 +2431,8 @@ static int mpage_map_and_submit_extent(handle_t *handle,
2406 if (err < 0) { 2431 if (err < 0) {
2407 struct super_block *sb = inode->i_sb; 2432 struct super_block *sb = inode->i_sb;
2408 2433
2409 if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) 2434 if (ext4_forced_shutdown(EXT4_SB(sb)) ||
2435 EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
2410 goto invalidate_dirty_pages; 2436 goto invalidate_dirty_pages;
2411 /* 2437 /*
2412 * Let the uper layers retry transient errors. 2438 * Let the uper layers retry transient errors.
@@ -2461,8 +2487,8 @@ update_disksize:
2461 disksize = i_size; 2487 disksize = i_size;
2462 if (disksize > EXT4_I(inode)->i_disksize) 2488 if (disksize > EXT4_I(inode)->i_disksize)
2463 EXT4_I(inode)->i_disksize = disksize; 2489 EXT4_I(inode)->i_disksize = disksize;
2464 err2 = ext4_mark_inode_dirty(handle, inode);
2465 up_write(&EXT4_I(inode)->i_data_sem); 2490 up_write(&EXT4_I(inode)->i_data_sem);
2491 err2 = ext4_mark_inode_dirty(handle, inode);
2466 if (err2) 2492 if (err2)
2467 ext4_error(inode->i_sb, 2493 ext4_error(inode->i_sb,
2468 "Failed to mark inode %lu dirty", 2494 "Failed to mark inode %lu dirty",
@@ -2628,6 +2654,9 @@ static int ext4_writepages(struct address_space *mapping,
2628 struct blk_plug plug; 2654 struct blk_plug plug;
2629 bool give_up_on_write = false; 2655 bool give_up_on_write = false;
2630 2656
2657 if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
2658 return -EIO;
2659
2631 percpu_down_read(&sbi->s_journal_flag_rwsem); 2660 percpu_down_read(&sbi->s_journal_flag_rwsem);
2632 trace_ext4_writepages(inode, wbc); 2661 trace_ext4_writepages(inode, wbc);
2633 2662
@@ -2664,7 +2693,8 @@ static int ext4_writepages(struct address_space *mapping,
2664 * *never* be called, so if that ever happens, we would want 2693 * *never* be called, so if that ever happens, we would want
2665 * the stack trace. 2694 * the stack trace.
2666 */ 2695 */
2667 if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) { 2696 if (unlikely(ext4_forced_shutdown(EXT4_SB(mapping->host->i_sb)) ||
2697 sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) {
2668 ret = -EROFS; 2698 ret = -EROFS;
2669 goto out_writepages; 2699 goto out_writepages;
2670 } 2700 }
@@ -2889,9 +2919,13 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2889 struct inode *inode = mapping->host; 2919 struct inode *inode = mapping->host;
2890 handle_t *handle; 2920 handle_t *handle;
2891 2921
2922 if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
2923 return -EIO;
2924
2892 index = pos >> PAGE_SHIFT; 2925 index = pos >> PAGE_SHIFT;
2893 2926
2894 if (ext4_nonda_switch(inode->i_sb)) { 2927 if (ext4_nonda_switch(inode->i_sb) ||
2928 S_ISLNK(inode->i_mode)) {
2895 *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; 2929 *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
2896 return ext4_write_begin(file, mapping, pos, 2930 return ext4_write_begin(file, mapping, pos,
2897 len, flags, pagep, fsdata); 2931 len, flags, pagep, fsdata);
@@ -3268,53 +3302,159 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
3268} 3302}
3269 3303
3270#ifdef CONFIG_FS_DAX 3304#ifdef CONFIG_FS_DAX
3271/* 3305static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
3272 * Get block function for DAX IO and mmap faults. It takes care of converting 3306 unsigned flags, struct iomap *iomap)
3273 * unwritten extents to written ones and initializes new / converted blocks
3274 * to zeros.
3275 */
3276int ext4_dax_get_block(struct inode *inode, sector_t iblock,
3277 struct buffer_head *bh_result, int create)
3278{ 3307{
3308 unsigned int blkbits = inode->i_blkbits;
3309 unsigned long first_block = offset >> blkbits;
3310 unsigned long last_block = (offset + length - 1) >> blkbits;
3311 struct ext4_map_blocks map;
3279 int ret; 3312 int ret;
3280 3313
3281 ext4_debug("inode %lu, create flag %d\n", inode->i_ino, create); 3314 if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
3282 if (!create) 3315 return -ERANGE;
3283 return _ext4_get_block(inode, iblock, bh_result, 0);
3284 3316
3285 ret = ext4_get_block_trans(inode, iblock, bh_result, 3317 map.m_lblk = first_block;
3286 EXT4_GET_BLOCKS_PRE_IO | 3318 map.m_len = last_block - first_block + 1;
3287 EXT4_GET_BLOCKS_CREATE_ZERO); 3319
3288 if (ret < 0) 3320 if (!(flags & IOMAP_WRITE)) {
3289 return ret; 3321 ret = ext4_map_blocks(NULL, inode, &map, 0);
3322 } else {
3323 int dio_credits;
3324 handle_t *handle;
3325 int retries = 0;
3290 3326
3291 if (buffer_unwritten(bh_result)) { 3327 /* Trim mapping request to maximum we can map at once for DIO */
3328 if (map.m_len > DIO_MAX_BLOCKS)
3329 map.m_len = DIO_MAX_BLOCKS;
3330 dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
3331retry:
3292 /* 3332 /*
3293 * We are protected by i_mmap_sem or i_mutex so we know block 3333 * Either we allocate blocks and then we don't get unwritten
3294 * cannot go away from under us even though we dropped 3334 * extent so we have reserved enough credits, or the blocks
3295 * i_data_sem. Convert extent to written and write zeros there. 3335 * are already allocated and unwritten and in that case
3336 * extent conversion fits in the credits as well.
3296 */ 3337 */
3297 ret = ext4_get_block_trans(inode, iblock, bh_result, 3338 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
3298 EXT4_GET_BLOCKS_CONVERT | 3339 dio_credits);
3299 EXT4_GET_BLOCKS_CREATE_ZERO); 3340 if (IS_ERR(handle))
3300 if (ret < 0) 3341 return PTR_ERR(handle);
3342
3343 ret = ext4_map_blocks(handle, inode, &map,
3344 EXT4_GET_BLOCKS_CREATE_ZERO);
3345 if (ret < 0) {
3346 ext4_journal_stop(handle);
3347 if (ret == -ENOSPC &&
3348 ext4_should_retry_alloc(inode->i_sb, &retries))
3349 goto retry;
3301 return ret; 3350 return ret;
3351 }
3352
3353 /*
3354 * If we added blocks beyond i_size, we need to make sure they
3355 * will get truncated if we crash before updating i_size in
3356 * ext4_iomap_end(). For faults we don't need to do that (and
3357 * even cannot because for orphan list operations inode_lock is
3358 * required) - if we happen to instantiate block beyond i_size,
3359 * it is because we race with truncate which has already added
3360 * the inode to the orphan list.
3361 */
3362 if (!(flags & IOMAP_FAULT) && first_block + map.m_len >
3363 (i_size_read(inode) + (1 << blkbits) - 1) >> blkbits) {
3364 int err;
3365
3366 err = ext4_orphan_add(handle, inode);
3367 if (err < 0) {
3368 ext4_journal_stop(handle);
3369 return err;
3370 }
3371 }
3372 ext4_journal_stop(handle);
3302 } 3373 }
3303 /* 3374
3304 * At least for now we have to clear BH_New so that DAX code 3375 iomap->flags = 0;
3305 * doesn't attempt to zero blocks again in a racy way. 3376 iomap->bdev = inode->i_sb->s_bdev;
3306 */ 3377 iomap->offset = first_block << blkbits;
3307 clear_buffer_new(bh_result); 3378
3379 if (ret == 0) {
3380 iomap->type = IOMAP_HOLE;
3381 iomap->blkno = IOMAP_NULL_BLOCK;
3382 iomap->length = (u64)map.m_len << blkbits;
3383 } else {
3384 if (map.m_flags & EXT4_MAP_MAPPED) {
3385 iomap->type = IOMAP_MAPPED;
3386 } else if (map.m_flags & EXT4_MAP_UNWRITTEN) {
3387 iomap->type = IOMAP_UNWRITTEN;
3388 } else {
3389 WARN_ON_ONCE(1);
3390 return -EIO;
3391 }
3392 iomap->blkno = (sector_t)map.m_pblk << (blkbits - 9);
3393 iomap->length = (u64)map.m_len << blkbits;
3394 }
3395
3396 if (map.m_flags & EXT4_MAP_NEW)
3397 iomap->flags |= IOMAP_F_NEW;
3308 return 0; 3398 return 0;
3309} 3399}
3310#else 3400
3311/* Just define empty function, it will never get called. */ 3401static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
3312int ext4_dax_get_block(struct inode *inode, sector_t iblock, 3402 ssize_t written, unsigned flags, struct iomap *iomap)
3313 struct buffer_head *bh_result, int create)
3314{ 3403{
3315 BUG(); 3404 int ret = 0;
3316 return 0; 3405 handle_t *handle;
3406 int blkbits = inode->i_blkbits;
3407 bool truncate = false;
3408
3409 if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT))
3410 return 0;
3411
3412 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
3413 if (IS_ERR(handle)) {
3414 ret = PTR_ERR(handle);
3415 goto orphan_del;
3416 }
3417 if (ext4_update_inode_size(inode, offset + written))
3418 ext4_mark_inode_dirty(handle, inode);
3419 /*
3420 * We may need to truncate allocated but not written blocks beyond EOF.
3421 */
3422 if (iomap->offset + iomap->length >
3423 ALIGN(inode->i_size, 1 << blkbits)) {
3424 ext4_lblk_t written_blk, end_blk;
3425
3426 written_blk = (offset + written) >> blkbits;
3427 end_blk = (offset + length) >> blkbits;
3428 if (written_blk < end_blk && ext4_can_truncate(inode))
3429 truncate = true;
3430 }
3431 /*
3432 * Remove inode from orphan list if we were extending a inode and
3433 * everything went fine.
3434 */
3435 if (!truncate && inode->i_nlink &&
3436 !list_empty(&EXT4_I(inode)->i_orphan))
3437 ext4_orphan_del(handle, inode);
3438 ext4_journal_stop(handle);
3439 if (truncate) {
3440 ext4_truncate_failed_write(inode);
3441orphan_del:
3442 /*
3443 * If truncate failed early the inode might still be on the
3444 * orphan list; we need to make sure the inode is removed from
3445 * the orphan list in that case.
3446 */
3447 if (inode->i_nlink)
3448 ext4_orphan_del(NULL, inode);
3449 }
3450 return ret;
3317} 3451}
3452
3453const struct iomap_ops ext4_iomap_ops = {
3454 .iomap_begin = ext4_iomap_begin,
3455 .iomap_end = ext4_iomap_end,
3456};
3457
3318#endif 3458#endif
3319 3459
3320static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset, 3460static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
@@ -3436,20 +3576,8 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
3436 iocb->private = NULL; 3576 iocb->private = NULL;
3437 if (overwrite) 3577 if (overwrite)
3438 get_block_func = ext4_dio_get_block_overwrite; 3578 get_block_func = ext4_dio_get_block_overwrite;
3439 else if (IS_DAX(inode)) { 3579 else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
3440 /* 3580 round_down(offset, i_blocksize(inode)) >= inode->i_size) {
3441 * We can avoid zeroing for aligned DAX writes beyond EOF. Other
3442 * writes need zeroing either because they can race with page
3443 * faults or because they use partial blocks.
3444 */
3445 if (round_down(offset, 1<<inode->i_blkbits) >= inode->i_size &&
3446 ext4_aligned_io(inode, offset, count))
3447 get_block_func = ext4_dio_get_block;
3448 else
3449 get_block_func = ext4_dax_get_block;
3450 dio_flags = DIO_LOCKING;
3451 } else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
3452 round_down(offset, 1 << inode->i_blkbits) >= inode->i_size) {
3453 get_block_func = ext4_dio_get_block; 3581 get_block_func = ext4_dio_get_block;
3454 dio_flags = DIO_LOCKING | DIO_SKIP_HOLES; 3582 dio_flags = DIO_LOCKING | DIO_SKIP_HOLES;
3455 } else if (is_sync_kiocb(iocb)) { 3583 } else if (is_sync_kiocb(iocb)) {
@@ -3462,14 +3590,9 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
3462#ifdef CONFIG_EXT4_FS_ENCRYPTION 3590#ifdef CONFIG_EXT4_FS_ENCRYPTION
3463 BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)); 3591 BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
3464#endif 3592#endif
3465 if (IS_DAX(inode)) { 3593 ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
3466 ret = dax_do_io(iocb, inode, iter, get_block_func, 3594 get_block_func, ext4_end_io_dio, NULL,
3467 ext4_end_io_dio, dio_flags); 3595 dio_flags);
3468 } else
3469 ret = __blockdev_direct_IO(iocb, inode,
3470 inode->i_sb->s_bdev, iter,
3471 get_block_func,
3472 ext4_end_io_dio, NULL, dio_flags);
3473 3596
3474 if (ret > 0 && !overwrite && ext4_test_inode_state(inode, 3597 if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
3475 EXT4_STATE_DIO_UNWRITTEN)) { 3598 EXT4_STATE_DIO_UNWRITTEN)) {
@@ -3538,6 +3661,7 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
3538{ 3661{
3539 struct address_space *mapping = iocb->ki_filp->f_mapping; 3662 struct address_space *mapping = iocb->ki_filp->f_mapping;
3540 struct inode *inode = mapping->host; 3663 struct inode *inode = mapping->host;
3664 size_t count = iov_iter_count(iter);
3541 ssize_t ret; 3665 ssize_t ret;
3542 3666
3543 /* 3667 /*
@@ -3546,19 +3670,12 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
3546 * we are protected against page writeback as well. 3670 * we are protected against page writeback as well.
3547 */ 3671 */
3548 inode_lock_shared(inode); 3672 inode_lock_shared(inode);
3549 if (IS_DAX(inode)) { 3673 ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
3550 ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block, NULL, 0); 3674 iocb->ki_pos + count);
3551 } else { 3675 if (ret)
3552 size_t count = iov_iter_count(iter); 3676 goto out_unlock;
3553 3677 ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
3554 ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, 3678 iter, ext4_dio_get_block, NULL, NULL, 0);
3555 iocb->ki_pos + count);
3556 if (ret)
3557 goto out_unlock;
3558 ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
3559 iter, ext4_dio_get_block,
3560 NULL, NULL, 0);
3561 }
3562out_unlock: 3679out_unlock:
3563 inode_unlock_shared(inode); 3680 inode_unlock_shared(inode);
3564 return ret; 3681 return ret;
@@ -3587,6 +3704,10 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
3587 if (ext4_has_inline_data(inode)) 3704 if (ext4_has_inline_data(inode))
3588 return 0; 3705 return 0;
3589 3706
3707 /* DAX uses iomap path now */
3708 if (WARN_ON_ONCE(IS_DAX(inode)))
3709 return 0;
3710
3590 trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); 3711 trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
3591 if (iov_iter_rw(iter) == READ) 3712 if (iov_iter_rw(iter) == READ)
3592 ret = ext4_direct_IO_read(iocb, iter); 3713 ret = ext4_direct_IO_read(iocb, iter);
@@ -3615,6 +3736,13 @@ static int ext4_journalled_set_page_dirty(struct page *page)
3615 return __set_page_dirty_nobuffers(page); 3736 return __set_page_dirty_nobuffers(page);
3616} 3737}
3617 3738
3739static int ext4_set_page_dirty(struct page *page)
3740{
3741 WARN_ON_ONCE(!PageLocked(page) && !PageDirty(page));
3742 WARN_ON_ONCE(!page_has_buffers(page));
3743 return __set_page_dirty_buffers(page);
3744}
3745
3618static const struct address_space_operations ext4_aops = { 3746static const struct address_space_operations ext4_aops = {
3619 .readpage = ext4_readpage, 3747 .readpage = ext4_readpage,
3620 .readpages = ext4_readpages, 3748 .readpages = ext4_readpages,
@@ -3622,6 +3750,7 @@ static const struct address_space_operations ext4_aops = {
3622 .writepages = ext4_writepages, 3750 .writepages = ext4_writepages,
3623 .write_begin = ext4_write_begin, 3751 .write_begin = ext4_write_begin,
3624 .write_end = ext4_write_end, 3752 .write_end = ext4_write_end,
3753 .set_page_dirty = ext4_set_page_dirty,
3625 .bmap = ext4_bmap, 3754 .bmap = ext4_bmap,
3626 .invalidatepage = ext4_invalidatepage, 3755 .invalidatepage = ext4_invalidatepage,
3627 .releasepage = ext4_releasepage, 3756 .releasepage = ext4_releasepage,
@@ -3654,6 +3783,7 @@ static const struct address_space_operations ext4_da_aops = {
3654 .writepages = ext4_writepages, 3783 .writepages = ext4_writepages,
3655 .write_begin = ext4_da_write_begin, 3784 .write_begin = ext4_da_write_begin,
3656 .write_end = ext4_da_write_end, 3785 .write_end = ext4_da_write_end,
3786 .set_page_dirty = ext4_set_page_dirty,
3657 .bmap = ext4_bmap, 3787 .bmap = ext4_bmap,
3658 .invalidatepage = ext4_da_invalidatepage, 3788 .invalidatepage = ext4_da_invalidatepage,
3659 .releasepage = ext4_releasepage, 3789 .releasepage = ext4_releasepage,
@@ -3743,7 +3873,8 @@ static int __ext4_block_zero_page_range(handle_t *handle,
3743 /* We expect the key to be set. */ 3873 /* We expect the key to be set. */
3744 BUG_ON(!fscrypt_has_encryption_key(inode)); 3874 BUG_ON(!fscrypt_has_encryption_key(inode));
3745 BUG_ON(blocksize != PAGE_SIZE); 3875 BUG_ON(blocksize != PAGE_SIZE);
3746 WARN_ON_ONCE(fscrypt_decrypt_page(page)); 3876 WARN_ON_ONCE(fscrypt_decrypt_page(page->mapping->host,
3877 page, PAGE_SIZE, 0, page->index));
3747 } 3878 }
3748 } 3879 }
3749 if (ext4_should_journal_data(inode)) { 3880 if (ext4_should_journal_data(inode)) {
@@ -3792,8 +3923,10 @@ static int ext4_block_zero_page_range(handle_t *handle,
3792 if (length > max || length < 0) 3923 if (length > max || length < 0)
3793 length = max; 3924 length = max;
3794 3925
3795 if (IS_DAX(inode)) 3926 if (IS_DAX(inode)) {
3796 return dax_zero_page_range(inode, from, length, ext4_get_block); 3927 return iomap_zero_range(inode, from, length, NULL,
3928 &ext4_iomap_ops);
3929 }
3797 return __ext4_block_zero_page_range(handle, mapping, from, length); 3930 return __ext4_block_zero_page_range(handle, mapping, from, length);
3798} 3931}
3799 3932
@@ -3811,6 +3944,10 @@ static int ext4_block_truncate_page(handle_t *handle,
3811 unsigned blocksize; 3944 unsigned blocksize;
3812 struct inode *inode = mapping->host; 3945 struct inode *inode = mapping->host;
3813 3946
3947 /* If we are processing an encrypted inode during orphan list handling */
3948 if (ext4_encrypted_inode(inode) && !fscrypt_has_encryption_key(inode))
3949 return 0;
3950
3814 blocksize = inode->i_sb->s_blocksize; 3951 blocksize = inode->i_sb->s_blocksize;
3815 length = blocksize - (offset & (blocksize - 1)); 3952 length = blocksize - (offset & (blocksize - 1));
3816 3953
@@ -4026,7 +4163,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
4026 if (IS_SYNC(inode)) 4163 if (IS_SYNC(inode))
4027 ext4_handle_sync(handle); 4164 ext4_handle_sync(handle);
4028 4165
4029 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 4166 inode->i_mtime = inode->i_ctime = current_time(inode);
4030 ext4_mark_inode_dirty(handle, inode); 4167 ext4_mark_inode_dirty(handle, inode);
4031out_stop: 4168out_stop:
4032 ext4_journal_stop(handle); 4169 ext4_journal_stop(handle);
@@ -4091,10 +4228,11 @@ int ext4_inode_attach_jinode(struct inode *inode)
4091 * that's fine - as long as they are linked from the inode, the post-crash 4228 * that's fine - as long as they are linked from the inode, the post-crash
4092 * ext4_truncate() run will find them and release them. 4229 * ext4_truncate() run will find them and release them.
4093 */ 4230 */
4094void ext4_truncate(struct inode *inode) 4231int ext4_truncate(struct inode *inode)
4095{ 4232{
4096 struct ext4_inode_info *ei = EXT4_I(inode); 4233 struct ext4_inode_info *ei = EXT4_I(inode);
4097 unsigned int credits; 4234 unsigned int credits;
4235 int err = 0;
4098 handle_t *handle; 4236 handle_t *handle;
4099 struct address_space *mapping = inode->i_mapping; 4237 struct address_space *mapping = inode->i_mapping;
4100 4238
@@ -4108,7 +4246,7 @@ void ext4_truncate(struct inode *inode)
4108 trace_ext4_truncate_enter(inode); 4246 trace_ext4_truncate_enter(inode);
4109 4247
4110 if (!ext4_can_truncate(inode)) 4248 if (!ext4_can_truncate(inode))
4111 return; 4249 return 0;
4112 4250
4113 ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); 4251 ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
4114 4252
@@ -4118,15 +4256,17 @@ void ext4_truncate(struct inode *inode)
4118 if (ext4_has_inline_data(inode)) { 4256 if (ext4_has_inline_data(inode)) {
4119 int has_inline = 1; 4257 int has_inline = 1;
4120 4258
4121 ext4_inline_data_truncate(inode, &has_inline); 4259 err = ext4_inline_data_truncate(inode, &has_inline);
4260 if (err)
4261 return err;
4122 if (has_inline) 4262 if (has_inline)
4123 return; 4263 return 0;
4124 } 4264 }
4125 4265
4126 /* If we zero-out tail of the page, we have to create jinode for jbd2 */ 4266 /* If we zero-out tail of the page, we have to create jinode for jbd2 */
4127 if (inode->i_size & (inode->i_sb->s_blocksize - 1)) { 4267 if (inode->i_size & (inode->i_sb->s_blocksize - 1)) {
4128 if (ext4_inode_attach_jinode(inode) < 0) 4268 if (ext4_inode_attach_jinode(inode) < 0)
4129 return; 4269 return 0;
4130 } 4270 }
4131 4271
4132 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 4272 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
@@ -4135,10 +4275,8 @@ void ext4_truncate(struct inode *inode)
4135 credits = ext4_blocks_for_truncate(inode); 4275 credits = ext4_blocks_for_truncate(inode);
4136 4276
4137 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); 4277 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
4138 if (IS_ERR(handle)) { 4278 if (IS_ERR(handle))
4139 ext4_std_error(inode->i_sb, PTR_ERR(handle)); 4279 return PTR_ERR(handle);
4140 return;
4141 }
4142 4280
4143 if (inode->i_size & (inode->i_sb->s_blocksize - 1)) 4281 if (inode->i_size & (inode->i_sb->s_blocksize - 1))
4144 ext4_block_truncate_page(handle, mapping, inode->i_size); 4282 ext4_block_truncate_page(handle, mapping, inode->i_size);
@@ -4152,7 +4290,8 @@ void ext4_truncate(struct inode *inode)
4152 * Implication: the file must always be in a sane, consistent 4290 * Implication: the file must always be in a sane, consistent
4153 * truncatable state while each transaction commits. 4291 * truncatable state while each transaction commits.
4154 */ 4292 */
4155 if (ext4_orphan_add(handle, inode)) 4293 err = ext4_orphan_add(handle, inode);
4294 if (err)
4156 goto out_stop; 4295 goto out_stop;
4157 4296
4158 down_write(&EXT4_I(inode)->i_data_sem); 4297 down_write(&EXT4_I(inode)->i_data_sem);
@@ -4160,11 +4299,13 @@ void ext4_truncate(struct inode *inode)
4160 ext4_discard_preallocations(inode); 4299 ext4_discard_preallocations(inode);
4161 4300
4162 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 4301 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
4163 ext4_ext_truncate(handle, inode); 4302 err = ext4_ext_truncate(handle, inode);
4164 else 4303 else
4165 ext4_ind_truncate(handle, inode); 4304 ext4_ind_truncate(handle, inode);
4166 4305
4167 up_write(&ei->i_data_sem); 4306 up_write(&ei->i_data_sem);
4307 if (err)
4308 goto out_stop;
4168 4309
4169 if (IS_SYNC(inode)) 4310 if (IS_SYNC(inode))
4170 ext4_handle_sync(handle); 4311 ext4_handle_sync(handle);
@@ -4180,11 +4321,12 @@ out_stop:
4180 if (inode->i_nlink) 4321 if (inode->i_nlink)
4181 ext4_orphan_del(handle, inode); 4322 ext4_orphan_del(handle, inode);
4182 4323
4183 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 4324 inode->i_mtime = inode->i_ctime = current_time(inode);
4184 ext4_mark_inode_dirty(handle, inode); 4325 ext4_mark_inode_dirty(handle, inode);
4185 ext4_journal_stop(handle); 4326 ext4_journal_stop(handle);
4186 4327
4187 trace_ext4_truncate_exit(inode); 4328 trace_ext4_truncate_exit(inode);
4329 return err;
4188} 4330}
4189 4331
4190/* 4332/*
@@ -4352,7 +4494,9 @@ void ext4_set_inode_flags(struct inode *inode)
4352 new_fl |= S_NOATIME; 4494 new_fl |= S_NOATIME;
4353 if (flags & EXT4_DIRSYNC_FL) 4495 if (flags & EXT4_DIRSYNC_FL)
4354 new_fl |= S_DIRSYNC; 4496 new_fl |= S_DIRSYNC;
4355 if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode)) 4497 if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode) &&
4498 !ext4_should_journal_data(inode) && !ext4_has_inline_data(inode) &&
4499 !ext4_encrypted_inode(inode))
4356 new_fl |= S_DAX; 4500 new_fl |= S_DAX;
4357 inode_set_flags(inode, new_fl, 4501 inode_set_flags(inode, new_fl,
4358 S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX); 4502 S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
@@ -4411,7 +4555,9 @@ static inline void ext4_iget_extra_inode(struct inode *inode,
4411{ 4555{
4412 __le32 *magic = (void *)raw_inode + 4556 __le32 *magic = (void *)raw_inode +
4413 EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize; 4557 EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize;
4414 if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) { 4558 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize + sizeof(__le32) <=
4559 EXT4_INODE_SIZE(inode->i_sb) &&
4560 *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
4415 ext4_set_inode_state(inode, EXT4_STATE_XATTR); 4561 ext4_set_inode_state(inode, EXT4_STATE_XATTR);
4416 ext4_find_inline_data_nolock(inode); 4562 ext4_find_inline_data_nolock(inode);
4417 } else 4563 } else
@@ -4434,6 +4580,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4434 struct inode *inode; 4580 struct inode *inode;
4435 journal_t *journal = EXT4_SB(sb)->s_journal; 4581 journal_t *journal = EXT4_SB(sb)->s_journal;
4436 long ret; 4582 long ret;
4583 loff_t size;
4437 int block; 4584 int block;
4438 uid_t i_uid; 4585 uid_t i_uid;
4439 gid_t i_gid; 4586 gid_t i_gid;
@@ -4456,10 +4603,12 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4456 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { 4603 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
4457 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); 4604 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
4458 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > 4605 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
4459 EXT4_INODE_SIZE(inode->i_sb)) { 4606 EXT4_INODE_SIZE(inode->i_sb) ||
4460 EXT4_ERROR_INODE(inode, "bad extra_isize (%u != %u)", 4607 (ei->i_extra_isize & 3)) {
4461 EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize, 4608 EXT4_ERROR_INODE(inode,
4462 EXT4_INODE_SIZE(inode->i_sb)); 4609 "bad extra_isize %u (inode size %u)",
4610 ei->i_extra_isize,
4611 EXT4_INODE_SIZE(inode->i_sb));
4463 ret = -EFSCORRUPTED; 4612 ret = -EFSCORRUPTED;
4464 goto bad_inode; 4613 goto bad_inode;
4465 } 4614 }
@@ -4534,6 +4683,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4534 ei->i_file_acl |= 4683 ei->i_file_acl |=
4535 ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; 4684 ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
4536 inode->i_size = ext4_isize(raw_inode); 4685 inode->i_size = ext4_isize(raw_inode);
4686 if ((size = i_size_read(inode)) < 0) {
4687 EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size);
4688 ret = -EFSCORRUPTED;
4689 goto bad_inode;
4690 }
4537 ei->i_disksize = inode->i_size; 4691 ei->i_disksize = inode->i_size;
4538#ifdef CONFIG_QUOTA 4692#ifdef CONFIG_QUOTA
4539 ei->i_reserved_quota = 0; 4693 ei->i_reserved_quota = 0;
@@ -4577,6 +4731,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4577 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { 4731 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
4578 if (ei->i_extra_isize == 0) { 4732 if (ei->i_extra_isize == 0) {
4579 /* The extra space is currently unused. Use it. */ 4733 /* The extra space is currently unused. Use it. */
4734 BUILD_BUG_ON(sizeof(struct ext4_inode) & 3);
4580 ei->i_extra_isize = sizeof(struct ext4_inode) - 4735 ei->i_extra_isize = sizeof(struct ext4_inode) -
4581 EXT4_GOOD_OLD_INODE_SIZE; 4736 EXT4_GOOD_OLD_INODE_SIZE;
4582 } else { 4737 } else {
@@ -5024,7 +5179,7 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode)
5024 * do. We do the check mainly to optimize the common PAGE_SIZE == 5179 * do. We do the check mainly to optimize the common PAGE_SIZE ==
5025 * blocksize case 5180 * blocksize case
5026 */ 5181 */
5027 if (offset > PAGE_SIZE - (1 << inode->i_blkbits)) 5182 if (offset > PAGE_SIZE - i_blocksize(inode))
5028 return; 5183 return;
5029 while (1) { 5184 while (1) {
5030 page = find_lock_page(inode->i_mapping, 5185 page = find_lock_page(inode->i_mapping,
@@ -5078,6 +5233,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5078 int orphan = 0; 5233 int orphan = 0;
5079 const unsigned int ia_valid = attr->ia_valid; 5234 const unsigned int ia_valid = attr->ia_valid;
5080 5235
5236 if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
5237 return -EIO;
5238
5081 error = setattr_prepare(dentry, attr); 5239 error = setattr_prepare(dentry, attr);
5082 if (error) 5240 if (error)
5083 return error; 5241 return error;
@@ -5154,7 +5312,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5154 * update c/mtime in shrink case below 5312 * update c/mtime in shrink case below
5155 */ 5313 */
5156 if (!shrink) { 5314 if (!shrink) {
5157 inode->i_mtime = ext4_current_time(inode); 5315 inode->i_mtime = current_time(inode);
5158 inode->i_ctime = inode->i_mtime; 5316 inode->i_ctime = inode->i_mtime;
5159 } 5317 }
5160 down_write(&EXT4_I(inode)->i_data_sem); 5318 down_write(&EXT4_I(inode)->i_data_sem);
@@ -5199,12 +5357,15 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5199 * in data=journal mode to make pages freeable. 5357 * in data=journal mode to make pages freeable.
5200 */ 5358 */
5201 truncate_pagecache(inode, inode->i_size); 5359 truncate_pagecache(inode, inode->i_size);
5202 if (shrink) 5360 if (shrink) {
5203 ext4_truncate(inode); 5361 rc = ext4_truncate(inode);
5362 if (rc)
5363 error = rc;
5364 }
5204 up_write(&EXT4_I(inode)->i_mmap_sem); 5365 up_write(&EXT4_I(inode)->i_mmap_sem);
5205 } 5366 }
5206 5367
5207 if (!rc) { 5368 if (!error) {
5208 setattr_copy(inode, attr); 5369 setattr_copy(inode, attr);
5209 mark_inode_dirty(inode); 5370 mark_inode_dirty(inode);
5210 } 5371 }
@@ -5216,7 +5377,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5216 if (orphan && inode->i_nlink) 5377 if (orphan && inode->i_nlink)
5217 ext4_orphan_del(NULL, inode); 5378 ext4_orphan_del(NULL, inode);
5218 5379
5219 if (!rc && (ia_valid & ATTR_MODE)) 5380 if (!error && (ia_valid & ATTR_MODE))
5220 rc = posix_acl_chmod(inode, inode->i_mode); 5381 rc = posix_acl_chmod(inode, inode->i_mode);
5221 5382
5222err_out: 5383err_out:
@@ -5226,20 +5387,55 @@ err_out:
5226 return error; 5387 return error;
5227} 5388}
5228 5389
5229int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, 5390int ext4_getattr(const struct path *path, struct kstat *stat,
5230 struct kstat *stat) 5391 u32 request_mask, unsigned int query_flags)
5231{ 5392{
5232 struct inode *inode; 5393 struct inode *inode = d_inode(path->dentry);
5233 unsigned long long delalloc_blocks; 5394 struct ext4_inode *raw_inode;
5395 struct ext4_inode_info *ei = EXT4_I(inode);
5396 unsigned int flags;
5397
5398 if (EXT4_FITS_IN_INODE(raw_inode, ei, i_crtime)) {
5399 stat->result_mask |= STATX_BTIME;
5400 stat->btime.tv_sec = ei->i_crtime.tv_sec;
5401 stat->btime.tv_nsec = ei->i_crtime.tv_nsec;
5402 }
5403
5404 flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
5405 if (flags & EXT4_APPEND_FL)
5406 stat->attributes |= STATX_ATTR_APPEND;
5407 if (flags & EXT4_COMPR_FL)
5408 stat->attributes |= STATX_ATTR_COMPRESSED;
5409 if (flags & EXT4_ENCRYPT_FL)
5410 stat->attributes |= STATX_ATTR_ENCRYPTED;
5411 if (flags & EXT4_IMMUTABLE_FL)
5412 stat->attributes |= STATX_ATTR_IMMUTABLE;
5413 if (flags & EXT4_NODUMP_FL)
5414 stat->attributes |= STATX_ATTR_NODUMP;
5415
5416 stat->attributes_mask |= (STATX_ATTR_APPEND |
5417 STATX_ATTR_COMPRESSED |
5418 STATX_ATTR_ENCRYPTED |
5419 STATX_ATTR_IMMUTABLE |
5420 STATX_ATTR_NODUMP);
5234 5421
5235 inode = d_inode(dentry);
5236 generic_fillattr(inode, stat); 5422 generic_fillattr(inode, stat);
5423 return 0;
5424}
5425
5426int ext4_file_getattr(const struct path *path, struct kstat *stat,
5427 u32 request_mask, unsigned int query_flags)
5428{
5429 struct inode *inode = d_inode(path->dentry);
5430 u64 delalloc_blocks;
5431
5432 ext4_getattr(path, stat, request_mask, query_flags);
5237 5433
5238 /* 5434 /*
5239 * If there is inline data in the inode, the inode will normally not 5435 * If there is inline data in the inode, the inode will normally not
5240 * have data blocks allocated (it may have an external xattr block). 5436 * have data blocks allocated (it may have an external xattr block).
5241 * Report at least one sector for such files, so tools like tar, rsync, 5437 * Report at least one sector for such files, so tools like tar, rsync,
5242 * others doen't incorrectly think the file is completely sparse. 5438 * others don't incorrectly think the file is completely sparse.
5243 */ 5439 */
5244 if (unlikely(ext4_has_inline_data(inode))) 5440 if (unlikely(ext4_has_inline_data(inode)))
5245 stat->blocks += (stat->size + 511) >> 9; 5441 stat->blocks += (stat->size + 511) >> 9;
@@ -5361,6 +5557,9 @@ int ext4_mark_iloc_dirty(handle_t *handle,
5361{ 5557{
5362 int err = 0; 5558 int err = 0;
5363 5559
5560 if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
5561 return -EIO;
5562
5364 if (IS_I_VERSION(inode)) 5563 if (IS_I_VERSION(inode))
5365 inode_inc_iversion(inode); 5564 inode_inc_iversion(inode);
5366 5565
@@ -5384,6 +5583,9 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
5384{ 5583{
5385 int err; 5584 int err;
5386 5585
5586 if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
5587 return -EIO;
5588
5387 err = ext4_get_inode_loc(inode, iloc); 5589 err = ext4_get_inode_loc(inode, iloc);
5388 if (!err) { 5590 if (!err) {
5389 BUFFER_TRACE(iloc->bh, "get_write_access"); 5591 BUFFER_TRACE(iloc->bh, "get_write_access");
@@ -5455,18 +5657,20 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
5455 err = ext4_reserve_inode_write(handle, inode, &iloc); 5657 err = ext4_reserve_inode_write(handle, inode, &iloc);
5456 if (err) 5658 if (err)
5457 return err; 5659 return err;
5458 if (ext4_handle_valid(handle) && 5660 if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
5459 EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
5460 !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { 5661 !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
5461 /* 5662 /*
5462 * We need extra buffer credits since we may write into EA block 5663 * In nojournal mode, we can immediately attempt to expand
5664 * the inode. When journaled, we first need to obtain extra
5665 * buffer credits since we may write into the EA block
5463 * with this same handle. If journal_extend fails, then it will 5666 * with this same handle. If journal_extend fails, then it will
5464 * only result in a minor loss of functionality for that inode. 5667 * only result in a minor loss of functionality for that inode.
5465 * If this is felt to be critical, then e2fsck should be run to 5668 * If this is felt to be critical, then e2fsck should be run to
5466 * force a large enough s_min_extra_isize. 5669 * force a large enough s_min_extra_isize.
5467 */ 5670 */
5468 if ((jbd2_journal_extend(handle, 5671 if (!ext4_handle_valid(handle) ||
5469 EXT4_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) { 5672 jbd2_journal_extend(handle,
5673 EXT4_DATA_TRANS_BLOCKS(inode->i_sb)) == 0) {
5470 ret = ext4_expand_extra_isize(inode, 5674 ret = ext4_expand_extra_isize(inode,
5471 sbi->s_want_extra_isize, 5675 sbi->s_want_extra_isize,
5472 iloc, handle); 5676 iloc, handle);
@@ -5620,6 +5824,11 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
5620 ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); 5824 ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
5621 } 5825 }
5622 ext4_set_aops(inode); 5826 ext4_set_aops(inode);
5827 /*
5828 * Update inode->i_flags after EXT4_INODE_JOURNAL_DATA was updated.
5829 * E.g. S_DAX may get cleared / set.
5830 */
5831 ext4_set_inode_flags(inode);
5623 5832
5624 jbd2_journal_unlock_updates(journal); 5833 jbd2_journal_unlock_updates(journal);
5625 percpu_up_write(&sbi->s_journal_flag_rwsem); 5834 percpu_up_write(&sbi->s_journal_flag_rwsem);
@@ -5647,8 +5856,9 @@ static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh)
5647 return !buffer_mapped(bh); 5856 return !buffer_mapped(bh);
5648} 5857}
5649 5858
5650int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) 5859int ext4_page_mkwrite(struct vm_fault *vmf)
5651{ 5860{
5861 struct vm_area_struct *vma = vmf->vma;
5652 struct page *page = vmf->page; 5862 struct page *page = vmf->page;
5653 loff_t size; 5863 loff_t size;
5654 unsigned long len; 5864 unsigned long len;
@@ -5738,13 +5948,13 @@ out:
5738 return ret; 5948 return ret;
5739} 5949}
5740 5950
5741int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 5951int ext4_filemap_fault(struct vm_fault *vmf)
5742{ 5952{
5743 struct inode *inode = file_inode(vma->vm_file); 5953 struct inode *inode = file_inode(vmf->vma->vm_file);
5744 int err; 5954 int err;
5745 5955
5746 down_read(&EXT4_I(inode)->i_mmap_sem); 5956 down_read(&EXT4_I(inode)->i_mmap_sem);
5747 err = filemap_fault(vma, vmf); 5957 err = filemap_fault(vmf);
5748 up_read(&EXT4_I(inode)->i_mmap_sem); 5958 up_read(&EXT4_I(inode)->i_mmap_sem);
5749 5959
5750 return err; 5960 return err;