diff options
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 508 |
1 files changed, 359 insertions, 149 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9c064727ed62..b9ffa9f4191f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/printk.h> | 37 | #include <linux/printk.h> |
38 | #include <linux/slab.h> | 38 | #include <linux/slab.h> |
39 | #include <linux/bitops.h> | 39 | #include <linux/bitops.h> |
40 | #include <linux/iomap.h> | ||
40 | 41 | ||
41 | #include "ext4_jbd2.h" | 42 | #include "ext4_jbd2.h" |
42 | #include "xattr.h" | 43 | #include "xattr.h" |
@@ -71,10 +72,9 @@ static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw, | |||
71 | csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, | 72 | csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, |
72 | csum_size); | 73 | csum_size); |
73 | offset += csum_size; | 74 | offset += csum_size; |
74 | csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset, | ||
75 | EXT4_INODE_SIZE(inode->i_sb) - | ||
76 | offset); | ||
77 | } | 75 | } |
76 | csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset, | ||
77 | EXT4_INODE_SIZE(inode->i_sb) - offset); | ||
78 | } | 78 | } |
79 | 79 | ||
80 | return csum; | 80 | return csum; |
@@ -261,8 +261,15 @@ void ext4_evict_inode(struct inode *inode) | |||
261 | "couldn't mark inode dirty (err %d)", err); | 261 | "couldn't mark inode dirty (err %d)", err); |
262 | goto stop_handle; | 262 | goto stop_handle; |
263 | } | 263 | } |
264 | if (inode->i_blocks) | 264 | if (inode->i_blocks) { |
265 | ext4_truncate(inode); | 265 | err = ext4_truncate(inode); |
266 | if (err) { | ||
267 | ext4_error(inode->i_sb, | ||
268 | "couldn't truncate inode %lu (err %d)", | ||
269 | inode->i_ino, err); | ||
270 | goto stop_handle; | ||
271 | } | ||
272 | } | ||
266 | 273 | ||
267 | /* | 274 | /* |
268 | * ext4_ext_truncate() doesn't reserve any slop when it | 275 | * ext4_ext_truncate() doesn't reserve any slop when it |
@@ -654,12 +661,8 @@ found: | |||
654 | if (flags & EXT4_GET_BLOCKS_ZERO && | 661 | if (flags & EXT4_GET_BLOCKS_ZERO && |
655 | map->m_flags & EXT4_MAP_MAPPED && | 662 | map->m_flags & EXT4_MAP_MAPPED && |
656 | map->m_flags & EXT4_MAP_NEW) { | 663 | map->m_flags & EXT4_MAP_NEW) { |
657 | ext4_lblk_t i; | 664 | clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk, |
658 | 665 | map->m_len); | |
659 | for (i = 0; i < map->m_len; i++) { | ||
660 | unmap_underlying_metadata(inode->i_sb->s_bdev, | ||
661 | map->m_pblk + i); | ||
662 | } | ||
663 | ret = ext4_issue_zeroout(inode, map->m_lblk, | 666 | ret = ext4_issue_zeroout(inode, map->m_lblk, |
664 | map->m_pblk, map->m_len); | 667 | map->m_pblk, map->m_len); |
665 | if (ret) { | 668 | if (ret) { |
@@ -767,6 +770,9 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock, | |||
767 | ext4_update_bh_state(bh, map.m_flags); | 770 | ext4_update_bh_state(bh, map.m_flags); |
768 | bh->b_size = inode->i_sb->s_blocksize * map.m_len; | 771 | bh->b_size = inode->i_sb->s_blocksize * map.m_len; |
769 | ret = 0; | 772 | ret = 0; |
773 | } else if (ret == 0) { | ||
774 | /* hole case, need to fill in bh->b_size */ | ||
775 | bh->b_size = inode->i_sb->s_blocksize * map.m_len; | ||
770 | } | 776 | } |
771 | return ret; | 777 | return ret; |
772 | } | 778 | } |
@@ -1127,8 +1133,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, | |||
1127 | if (err) | 1133 | if (err) |
1128 | break; | 1134 | break; |
1129 | if (buffer_new(bh)) { | 1135 | if (buffer_new(bh)) { |
1130 | unmap_underlying_metadata(bh->b_bdev, | 1136 | clean_bdev_bh_alias(bh); |
1131 | bh->b_blocknr); | ||
1132 | if (PageUptodate(page)) { | 1137 | if (PageUptodate(page)) { |
1133 | clear_buffer_new(bh); | 1138 | clear_buffer_new(bh); |
1134 | set_buffer_uptodate(bh); | 1139 | set_buffer_uptodate(bh); |
@@ -1166,7 +1171,8 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, | |||
1166 | if (unlikely(err)) | 1171 | if (unlikely(err)) |
1167 | page_zero_new_buffers(page, from, to); | 1172 | page_zero_new_buffers(page, from, to); |
1168 | else if (decrypt) | 1173 | else if (decrypt) |
1169 | err = fscrypt_decrypt_page(page); | 1174 | err = fscrypt_decrypt_page(page->mapping->host, page, |
1175 | PAGE_SIZE, 0, page->index); | ||
1170 | return err; | 1176 | return err; |
1171 | } | 1177 | } |
1172 | #endif | 1178 | #endif |
@@ -1183,6 +1189,9 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, | |||
1183 | pgoff_t index; | 1189 | pgoff_t index; |
1184 | unsigned from, to; | 1190 | unsigned from, to; |
1185 | 1191 | ||
1192 | if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) | ||
1193 | return -EIO; | ||
1194 | |||
1186 | trace_ext4_write_begin(inode, pos, len, flags); | 1195 | trace_ext4_write_begin(inode, pos, len, flags); |
1187 | /* | 1196 | /* |
1188 | * Reserve one block more for addition to orphan list in case | 1197 | * Reserve one block more for addition to orphan list in case |
@@ -1324,8 +1333,11 @@ static int ext4_write_end(struct file *file, | |||
1324 | if (ext4_has_inline_data(inode)) { | 1333 | if (ext4_has_inline_data(inode)) { |
1325 | ret = ext4_write_inline_data_end(inode, pos, len, | 1334 | ret = ext4_write_inline_data_end(inode, pos, len, |
1326 | copied, page); | 1335 | copied, page); |
1327 | if (ret < 0) | 1336 | if (ret < 0) { |
1337 | unlock_page(page); | ||
1338 | put_page(page); | ||
1328 | goto errout; | 1339 | goto errout; |
1340 | } | ||
1329 | copied = ret; | 1341 | copied = ret; |
1330 | } else | 1342 | } else |
1331 | copied = block_write_end(file, mapping, pos, | 1343 | copied = block_write_end(file, mapping, pos, |
@@ -1379,7 +1391,9 @@ errout: | |||
1379 | * set the buffer to be dirty, since in data=journalled mode we need | 1391 | * set the buffer to be dirty, since in data=journalled mode we need |
1380 | * to call ext4_handle_dirty_metadata() instead. | 1392 | * to call ext4_handle_dirty_metadata() instead. |
1381 | */ | 1393 | */ |
1382 | static void zero_new_buffers(struct page *page, unsigned from, unsigned to) | 1394 | static void ext4_journalled_zero_new_buffers(handle_t *handle, |
1395 | struct page *page, | ||
1396 | unsigned from, unsigned to) | ||
1383 | { | 1397 | { |
1384 | unsigned int block_start = 0, block_end; | 1398 | unsigned int block_start = 0, block_end; |
1385 | struct buffer_head *head, *bh; | 1399 | struct buffer_head *head, *bh; |
@@ -1396,7 +1410,7 @@ static void zero_new_buffers(struct page *page, unsigned from, unsigned to) | |||
1396 | size = min(to, block_end) - start; | 1410 | size = min(to, block_end) - start; |
1397 | 1411 | ||
1398 | zero_user(page, start, size); | 1412 | zero_user(page, start, size); |
1399 | set_buffer_uptodate(bh); | 1413 | write_end_fn(handle, bh); |
1400 | } | 1414 | } |
1401 | clear_buffer_new(bh); | 1415 | clear_buffer_new(bh); |
1402 | } | 1416 | } |
@@ -1425,18 +1439,25 @@ static int ext4_journalled_write_end(struct file *file, | |||
1425 | 1439 | ||
1426 | BUG_ON(!ext4_handle_valid(handle)); | 1440 | BUG_ON(!ext4_handle_valid(handle)); |
1427 | 1441 | ||
1428 | if (ext4_has_inline_data(inode)) | 1442 | if (ext4_has_inline_data(inode)) { |
1429 | copied = ext4_write_inline_data_end(inode, pos, len, | 1443 | ret = ext4_write_inline_data_end(inode, pos, len, |
1430 | copied, page); | 1444 | copied, page); |
1431 | else { | 1445 | if (ret < 0) { |
1432 | if (copied < len) { | 1446 | unlock_page(page); |
1433 | if (!PageUptodate(page)) | 1447 | put_page(page); |
1434 | copied = 0; | 1448 | goto errout; |
1435 | zero_new_buffers(page, from+copied, to); | ||
1436 | } | 1449 | } |
1437 | 1450 | copied = ret; | |
1451 | } else if (unlikely(copied < len) && !PageUptodate(page)) { | ||
1452 | copied = 0; | ||
1453 | ext4_journalled_zero_new_buffers(handle, page, from, to); | ||
1454 | } else { | ||
1455 | if (unlikely(copied < len)) | ||
1456 | ext4_journalled_zero_new_buffers(handle, page, | ||
1457 | from + copied, to); | ||
1438 | ret = ext4_walk_page_buffers(handle, page_buffers(page), from, | 1458 | ret = ext4_walk_page_buffers(handle, page_buffers(page), from, |
1439 | to, &partial, write_end_fn); | 1459 | from + copied, &partial, |
1460 | write_end_fn); | ||
1440 | if (!partial) | 1461 | if (!partial) |
1441 | SetPageUptodate(page); | 1462 | SetPageUptodate(page); |
1442 | } | 1463 | } |
@@ -1462,6 +1483,7 @@ static int ext4_journalled_write_end(struct file *file, | |||
1462 | */ | 1483 | */ |
1463 | ext4_orphan_add(handle, inode); | 1484 | ext4_orphan_add(handle, inode); |
1464 | 1485 | ||
1486 | errout: | ||
1465 | ret2 = ext4_journal_stop(handle); | 1487 | ret2 = ext4_journal_stop(handle); |
1466 | if (!ret) | 1488 | if (!ret) |
1467 | ret = ret2; | 1489 | ret = ret2; |
@@ -2028,6 +2050,12 @@ static int ext4_writepage(struct page *page, | |||
2028 | struct ext4_io_submit io_submit; | 2050 | struct ext4_io_submit io_submit; |
2029 | bool keep_towrite = false; | 2051 | bool keep_towrite = false; |
2030 | 2052 | ||
2053 | if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) { | ||
2054 | ext4_invalidatepage(page, 0, PAGE_SIZE); | ||
2055 | unlock_page(page); | ||
2056 | return -EIO; | ||
2057 | } | ||
2058 | |||
2031 | trace_ext4_writepage(page); | 2059 | trace_ext4_writepage(page); |
2032 | size = i_size_read(inode); | 2060 | size = i_size_read(inode); |
2033 | if (page->index == size >> PAGE_SHIFT) | 2061 | if (page->index == size >> PAGE_SHIFT) |
@@ -2193,7 +2221,7 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd, | |||
2193 | { | 2221 | { |
2194 | struct inode *inode = mpd->inode; | 2222 | struct inode *inode = mpd->inode; |
2195 | int err; | 2223 | int err; |
2196 | ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1) | 2224 | ext4_lblk_t blocks = (i_size_read(inode) + i_blocksize(inode) - 1) |
2197 | >> inode->i_blkbits; | 2225 | >> inode->i_blkbits; |
2198 | 2226 | ||
2199 | do { | 2227 | do { |
@@ -2360,11 +2388,8 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) | |||
2360 | 2388 | ||
2361 | BUG_ON(map->m_len == 0); | 2389 | BUG_ON(map->m_len == 0); |
2362 | if (map->m_flags & EXT4_MAP_NEW) { | 2390 | if (map->m_flags & EXT4_MAP_NEW) { |
2363 | struct block_device *bdev = inode->i_sb->s_bdev; | 2391 | clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk, |
2364 | int i; | 2392 | map->m_len); |
2365 | |||
2366 | for (i = 0; i < map->m_len; i++) | ||
2367 | unmap_underlying_metadata(bdev, map->m_pblk + i); | ||
2368 | } | 2393 | } |
2369 | return 0; | 2394 | return 0; |
2370 | } | 2395 | } |
@@ -2406,7 +2431,8 @@ static int mpage_map_and_submit_extent(handle_t *handle, | |||
2406 | if (err < 0) { | 2431 | if (err < 0) { |
2407 | struct super_block *sb = inode->i_sb; | 2432 | struct super_block *sb = inode->i_sb; |
2408 | 2433 | ||
2409 | if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) | 2434 | if (ext4_forced_shutdown(EXT4_SB(sb)) || |
2435 | EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) | ||
2410 | goto invalidate_dirty_pages; | 2436 | goto invalidate_dirty_pages; |
2411 | /* | 2437 | /* |
2412 | * Let the uper layers retry transient errors. | 2438 | * Let the uper layers retry transient errors. |
@@ -2461,8 +2487,8 @@ update_disksize: | |||
2461 | disksize = i_size; | 2487 | disksize = i_size; |
2462 | if (disksize > EXT4_I(inode)->i_disksize) | 2488 | if (disksize > EXT4_I(inode)->i_disksize) |
2463 | EXT4_I(inode)->i_disksize = disksize; | 2489 | EXT4_I(inode)->i_disksize = disksize; |
2464 | err2 = ext4_mark_inode_dirty(handle, inode); | ||
2465 | up_write(&EXT4_I(inode)->i_data_sem); | 2490 | up_write(&EXT4_I(inode)->i_data_sem); |
2491 | err2 = ext4_mark_inode_dirty(handle, inode); | ||
2466 | if (err2) | 2492 | if (err2) |
2467 | ext4_error(inode->i_sb, | 2493 | ext4_error(inode->i_sb, |
2468 | "Failed to mark inode %lu dirty", | 2494 | "Failed to mark inode %lu dirty", |
@@ -2628,6 +2654,9 @@ static int ext4_writepages(struct address_space *mapping, | |||
2628 | struct blk_plug plug; | 2654 | struct blk_plug plug; |
2629 | bool give_up_on_write = false; | 2655 | bool give_up_on_write = false; |
2630 | 2656 | ||
2657 | if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) | ||
2658 | return -EIO; | ||
2659 | |||
2631 | percpu_down_read(&sbi->s_journal_flag_rwsem); | 2660 | percpu_down_read(&sbi->s_journal_flag_rwsem); |
2632 | trace_ext4_writepages(inode, wbc); | 2661 | trace_ext4_writepages(inode, wbc); |
2633 | 2662 | ||
@@ -2664,7 +2693,8 @@ static int ext4_writepages(struct address_space *mapping, | |||
2664 | * *never* be called, so if that ever happens, we would want | 2693 | * *never* be called, so if that ever happens, we would want |
2665 | * the stack trace. | 2694 | * the stack trace. |
2666 | */ | 2695 | */ |
2667 | if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) { | 2696 | if (unlikely(ext4_forced_shutdown(EXT4_SB(mapping->host->i_sb)) || |
2697 | sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) { | ||
2668 | ret = -EROFS; | 2698 | ret = -EROFS; |
2669 | goto out_writepages; | 2699 | goto out_writepages; |
2670 | } | 2700 | } |
@@ -2889,9 +2919,13 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | |||
2889 | struct inode *inode = mapping->host; | 2919 | struct inode *inode = mapping->host; |
2890 | handle_t *handle; | 2920 | handle_t *handle; |
2891 | 2921 | ||
2922 | if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) | ||
2923 | return -EIO; | ||
2924 | |||
2892 | index = pos >> PAGE_SHIFT; | 2925 | index = pos >> PAGE_SHIFT; |
2893 | 2926 | ||
2894 | if (ext4_nonda_switch(inode->i_sb)) { | 2927 | if (ext4_nonda_switch(inode->i_sb) || |
2928 | S_ISLNK(inode->i_mode)) { | ||
2895 | *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; | 2929 | *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; |
2896 | return ext4_write_begin(file, mapping, pos, | 2930 | return ext4_write_begin(file, mapping, pos, |
2897 | len, flags, pagep, fsdata); | 2931 | len, flags, pagep, fsdata); |
@@ -3268,53 +3302,159 @@ static int ext4_releasepage(struct page *page, gfp_t wait) | |||
3268 | } | 3302 | } |
3269 | 3303 | ||
3270 | #ifdef CONFIG_FS_DAX | 3304 | #ifdef CONFIG_FS_DAX |
3271 | /* | 3305 | static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, |
3272 | * Get block function for DAX IO and mmap faults. It takes care of converting | 3306 | unsigned flags, struct iomap *iomap) |
3273 | * unwritten extents to written ones and initializes new / converted blocks | ||
3274 | * to zeros. | ||
3275 | */ | ||
3276 | int ext4_dax_get_block(struct inode *inode, sector_t iblock, | ||
3277 | struct buffer_head *bh_result, int create) | ||
3278 | { | 3307 | { |
3308 | unsigned int blkbits = inode->i_blkbits; | ||
3309 | unsigned long first_block = offset >> blkbits; | ||
3310 | unsigned long last_block = (offset + length - 1) >> blkbits; | ||
3311 | struct ext4_map_blocks map; | ||
3279 | int ret; | 3312 | int ret; |
3280 | 3313 | ||
3281 | ext4_debug("inode %lu, create flag %d\n", inode->i_ino, create); | 3314 | if (WARN_ON_ONCE(ext4_has_inline_data(inode))) |
3282 | if (!create) | 3315 | return -ERANGE; |
3283 | return _ext4_get_block(inode, iblock, bh_result, 0); | ||
3284 | 3316 | ||
3285 | ret = ext4_get_block_trans(inode, iblock, bh_result, | 3317 | map.m_lblk = first_block; |
3286 | EXT4_GET_BLOCKS_PRE_IO | | 3318 | map.m_len = last_block - first_block + 1; |
3287 | EXT4_GET_BLOCKS_CREATE_ZERO); | 3319 | |
3288 | if (ret < 0) | 3320 | if (!(flags & IOMAP_WRITE)) { |
3289 | return ret; | 3321 | ret = ext4_map_blocks(NULL, inode, &map, 0); |
3322 | } else { | ||
3323 | int dio_credits; | ||
3324 | handle_t *handle; | ||
3325 | int retries = 0; | ||
3290 | 3326 | ||
3291 | if (buffer_unwritten(bh_result)) { | 3327 | /* Trim mapping request to maximum we can map at once for DIO */ |
3328 | if (map.m_len > DIO_MAX_BLOCKS) | ||
3329 | map.m_len = DIO_MAX_BLOCKS; | ||
3330 | dio_credits = ext4_chunk_trans_blocks(inode, map.m_len); | ||
3331 | retry: | ||
3292 | /* | 3332 | /* |
3293 | * We are protected by i_mmap_sem or i_mutex so we know block | 3333 | * Either we allocate blocks and then we don't get unwritten |
3294 | * cannot go away from under us even though we dropped | 3334 | * extent so we have reserved enough credits, or the blocks |
3295 | * i_data_sem. Convert extent to written and write zeros there. | 3335 | * are already allocated and unwritten and in that case |
3336 | * extent conversion fits in the credits as well. | ||
3296 | */ | 3337 | */ |
3297 | ret = ext4_get_block_trans(inode, iblock, bh_result, | 3338 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, |
3298 | EXT4_GET_BLOCKS_CONVERT | | 3339 | dio_credits); |
3299 | EXT4_GET_BLOCKS_CREATE_ZERO); | 3340 | if (IS_ERR(handle)) |
3300 | if (ret < 0) | 3341 | return PTR_ERR(handle); |
3342 | |||
3343 | ret = ext4_map_blocks(handle, inode, &map, | ||
3344 | EXT4_GET_BLOCKS_CREATE_ZERO); | ||
3345 | if (ret < 0) { | ||
3346 | ext4_journal_stop(handle); | ||
3347 | if (ret == -ENOSPC && | ||
3348 | ext4_should_retry_alloc(inode->i_sb, &retries)) | ||
3349 | goto retry; | ||
3301 | return ret; | 3350 | return ret; |
3351 | } | ||
3352 | |||
3353 | /* | ||
3354 | * If we added blocks beyond i_size, we need to make sure they | ||
3355 | * will get truncated if we crash before updating i_size in | ||
3356 | * ext4_iomap_end(). For faults we don't need to do that (and | ||
3357 | * even cannot because for orphan list operations inode_lock is | ||
3358 | * required) - if we happen to instantiate block beyond i_size, | ||
3359 | * it is because we race with truncate which has already added | ||
3360 | * the inode to the orphan list. | ||
3361 | */ | ||
3362 | if (!(flags & IOMAP_FAULT) && first_block + map.m_len > | ||
3363 | (i_size_read(inode) + (1 << blkbits) - 1) >> blkbits) { | ||
3364 | int err; | ||
3365 | |||
3366 | err = ext4_orphan_add(handle, inode); | ||
3367 | if (err < 0) { | ||
3368 | ext4_journal_stop(handle); | ||
3369 | return err; | ||
3370 | } | ||
3371 | } | ||
3372 | ext4_journal_stop(handle); | ||
3302 | } | 3373 | } |
3303 | /* | 3374 | |
3304 | * At least for now we have to clear BH_New so that DAX code | 3375 | iomap->flags = 0; |
3305 | * doesn't attempt to zero blocks again in a racy way. | 3376 | iomap->bdev = inode->i_sb->s_bdev; |
3306 | */ | 3377 | iomap->offset = first_block << blkbits; |
3307 | clear_buffer_new(bh_result); | 3378 | |
3379 | if (ret == 0) { | ||
3380 | iomap->type = IOMAP_HOLE; | ||
3381 | iomap->blkno = IOMAP_NULL_BLOCK; | ||
3382 | iomap->length = (u64)map.m_len << blkbits; | ||
3383 | } else { | ||
3384 | if (map.m_flags & EXT4_MAP_MAPPED) { | ||
3385 | iomap->type = IOMAP_MAPPED; | ||
3386 | } else if (map.m_flags & EXT4_MAP_UNWRITTEN) { | ||
3387 | iomap->type = IOMAP_UNWRITTEN; | ||
3388 | } else { | ||
3389 | WARN_ON_ONCE(1); | ||
3390 | return -EIO; | ||
3391 | } | ||
3392 | iomap->blkno = (sector_t)map.m_pblk << (blkbits - 9); | ||
3393 | iomap->length = (u64)map.m_len << blkbits; | ||
3394 | } | ||
3395 | |||
3396 | if (map.m_flags & EXT4_MAP_NEW) | ||
3397 | iomap->flags |= IOMAP_F_NEW; | ||
3308 | return 0; | 3398 | return 0; |
3309 | } | 3399 | } |
3310 | #else | 3400 | |
3311 | /* Just define empty function, it will never get called. */ | 3401 | static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length, |
3312 | int ext4_dax_get_block(struct inode *inode, sector_t iblock, | 3402 | ssize_t written, unsigned flags, struct iomap *iomap) |
3313 | struct buffer_head *bh_result, int create) | ||
3314 | { | 3403 | { |
3315 | BUG(); | 3404 | int ret = 0; |
3316 | return 0; | 3405 | handle_t *handle; |
3406 | int blkbits = inode->i_blkbits; | ||
3407 | bool truncate = false; | ||
3408 | |||
3409 | if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT)) | ||
3410 | return 0; | ||
3411 | |||
3412 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); | ||
3413 | if (IS_ERR(handle)) { | ||
3414 | ret = PTR_ERR(handle); | ||
3415 | goto orphan_del; | ||
3416 | } | ||
3417 | if (ext4_update_inode_size(inode, offset + written)) | ||
3418 | ext4_mark_inode_dirty(handle, inode); | ||
3419 | /* | ||
3420 | * We may need to truncate allocated but not written blocks beyond EOF. | ||
3421 | */ | ||
3422 | if (iomap->offset + iomap->length > | ||
3423 | ALIGN(inode->i_size, 1 << blkbits)) { | ||
3424 | ext4_lblk_t written_blk, end_blk; | ||
3425 | |||
3426 | written_blk = (offset + written) >> blkbits; | ||
3427 | end_blk = (offset + length) >> blkbits; | ||
3428 | if (written_blk < end_blk && ext4_can_truncate(inode)) | ||
3429 | truncate = true; | ||
3430 | } | ||
3431 | /* | ||
3432 | * Remove inode from orphan list if we were extending a inode and | ||
3433 | * everything went fine. | ||
3434 | */ | ||
3435 | if (!truncate && inode->i_nlink && | ||
3436 | !list_empty(&EXT4_I(inode)->i_orphan)) | ||
3437 | ext4_orphan_del(handle, inode); | ||
3438 | ext4_journal_stop(handle); | ||
3439 | if (truncate) { | ||
3440 | ext4_truncate_failed_write(inode); | ||
3441 | orphan_del: | ||
3442 | /* | ||
3443 | * If truncate failed early the inode might still be on the | ||
3444 | * orphan list; we need to make sure the inode is removed from | ||
3445 | * the orphan list in that case. | ||
3446 | */ | ||
3447 | if (inode->i_nlink) | ||
3448 | ext4_orphan_del(NULL, inode); | ||
3449 | } | ||
3450 | return ret; | ||
3317 | } | 3451 | } |
3452 | |||
3453 | const struct iomap_ops ext4_iomap_ops = { | ||
3454 | .iomap_begin = ext4_iomap_begin, | ||
3455 | .iomap_end = ext4_iomap_end, | ||
3456 | }; | ||
3457 | |||
3318 | #endif | 3458 | #endif |
3319 | 3459 | ||
3320 | static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | 3460 | static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset, |
@@ -3436,20 +3576,8 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter) | |||
3436 | iocb->private = NULL; | 3576 | iocb->private = NULL; |
3437 | if (overwrite) | 3577 | if (overwrite) |
3438 | get_block_func = ext4_dio_get_block_overwrite; | 3578 | get_block_func = ext4_dio_get_block_overwrite; |
3439 | else if (IS_DAX(inode)) { | 3579 | else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) || |
3440 | /* | 3580 | round_down(offset, i_blocksize(inode)) >= inode->i_size) { |
3441 | * We can avoid zeroing for aligned DAX writes beyond EOF. Other | ||
3442 | * writes need zeroing either because they can race with page | ||
3443 | * faults or because they use partial blocks. | ||
3444 | */ | ||
3445 | if (round_down(offset, 1<<inode->i_blkbits) >= inode->i_size && | ||
3446 | ext4_aligned_io(inode, offset, count)) | ||
3447 | get_block_func = ext4_dio_get_block; | ||
3448 | else | ||
3449 | get_block_func = ext4_dax_get_block; | ||
3450 | dio_flags = DIO_LOCKING; | ||
3451 | } else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) || | ||
3452 | round_down(offset, 1 << inode->i_blkbits) >= inode->i_size) { | ||
3453 | get_block_func = ext4_dio_get_block; | 3581 | get_block_func = ext4_dio_get_block; |
3454 | dio_flags = DIO_LOCKING | DIO_SKIP_HOLES; | 3582 | dio_flags = DIO_LOCKING | DIO_SKIP_HOLES; |
3455 | } else if (is_sync_kiocb(iocb)) { | 3583 | } else if (is_sync_kiocb(iocb)) { |
@@ -3462,14 +3590,9 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter) | |||
3462 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | 3590 | #ifdef CONFIG_EXT4_FS_ENCRYPTION |
3463 | BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)); | 3591 | BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)); |
3464 | #endif | 3592 | #endif |
3465 | if (IS_DAX(inode)) { | 3593 | ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, |
3466 | ret = dax_do_io(iocb, inode, iter, get_block_func, | 3594 | get_block_func, ext4_end_io_dio, NULL, |
3467 | ext4_end_io_dio, dio_flags); | 3595 | dio_flags); |
3468 | } else | ||
3469 | ret = __blockdev_direct_IO(iocb, inode, | ||
3470 | inode->i_sb->s_bdev, iter, | ||
3471 | get_block_func, | ||
3472 | ext4_end_io_dio, NULL, dio_flags); | ||
3473 | 3596 | ||
3474 | if (ret > 0 && !overwrite && ext4_test_inode_state(inode, | 3597 | if (ret > 0 && !overwrite && ext4_test_inode_state(inode, |
3475 | EXT4_STATE_DIO_UNWRITTEN)) { | 3598 | EXT4_STATE_DIO_UNWRITTEN)) { |
@@ -3538,6 +3661,7 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter) | |||
3538 | { | 3661 | { |
3539 | struct address_space *mapping = iocb->ki_filp->f_mapping; | 3662 | struct address_space *mapping = iocb->ki_filp->f_mapping; |
3540 | struct inode *inode = mapping->host; | 3663 | struct inode *inode = mapping->host; |
3664 | size_t count = iov_iter_count(iter); | ||
3541 | ssize_t ret; | 3665 | ssize_t ret; |
3542 | 3666 | ||
3543 | /* | 3667 | /* |
@@ -3546,19 +3670,12 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter) | |||
3546 | * we are protected against page writeback as well. | 3670 | * we are protected against page writeback as well. |
3547 | */ | 3671 | */ |
3548 | inode_lock_shared(inode); | 3672 | inode_lock_shared(inode); |
3549 | if (IS_DAX(inode)) { | 3673 | ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, |
3550 | ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block, NULL, 0); | 3674 | iocb->ki_pos + count); |
3551 | } else { | 3675 | if (ret) |
3552 | size_t count = iov_iter_count(iter); | 3676 | goto out_unlock; |
3553 | 3677 | ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, | |
3554 | ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, | 3678 | iter, ext4_dio_get_block, NULL, NULL, 0); |
3555 | iocb->ki_pos + count); | ||
3556 | if (ret) | ||
3557 | goto out_unlock; | ||
3558 | ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, | ||
3559 | iter, ext4_dio_get_block, | ||
3560 | NULL, NULL, 0); | ||
3561 | } | ||
3562 | out_unlock: | 3679 | out_unlock: |
3563 | inode_unlock_shared(inode); | 3680 | inode_unlock_shared(inode); |
3564 | return ret; | 3681 | return ret; |
@@ -3587,6 +3704,10 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter) | |||
3587 | if (ext4_has_inline_data(inode)) | 3704 | if (ext4_has_inline_data(inode)) |
3588 | return 0; | 3705 | return 0; |
3589 | 3706 | ||
3707 | /* DAX uses iomap path now */ | ||
3708 | if (WARN_ON_ONCE(IS_DAX(inode))) | ||
3709 | return 0; | ||
3710 | |||
3590 | trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); | 3711 | trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); |
3591 | if (iov_iter_rw(iter) == READ) | 3712 | if (iov_iter_rw(iter) == READ) |
3592 | ret = ext4_direct_IO_read(iocb, iter); | 3713 | ret = ext4_direct_IO_read(iocb, iter); |
@@ -3615,6 +3736,13 @@ static int ext4_journalled_set_page_dirty(struct page *page) | |||
3615 | return __set_page_dirty_nobuffers(page); | 3736 | return __set_page_dirty_nobuffers(page); |
3616 | } | 3737 | } |
3617 | 3738 | ||
3739 | static int ext4_set_page_dirty(struct page *page) | ||
3740 | { | ||
3741 | WARN_ON_ONCE(!PageLocked(page) && !PageDirty(page)); | ||
3742 | WARN_ON_ONCE(!page_has_buffers(page)); | ||
3743 | return __set_page_dirty_buffers(page); | ||
3744 | } | ||
3745 | |||
3618 | static const struct address_space_operations ext4_aops = { | 3746 | static const struct address_space_operations ext4_aops = { |
3619 | .readpage = ext4_readpage, | 3747 | .readpage = ext4_readpage, |
3620 | .readpages = ext4_readpages, | 3748 | .readpages = ext4_readpages, |
@@ -3622,6 +3750,7 @@ static const struct address_space_operations ext4_aops = { | |||
3622 | .writepages = ext4_writepages, | 3750 | .writepages = ext4_writepages, |
3623 | .write_begin = ext4_write_begin, | 3751 | .write_begin = ext4_write_begin, |
3624 | .write_end = ext4_write_end, | 3752 | .write_end = ext4_write_end, |
3753 | .set_page_dirty = ext4_set_page_dirty, | ||
3625 | .bmap = ext4_bmap, | 3754 | .bmap = ext4_bmap, |
3626 | .invalidatepage = ext4_invalidatepage, | 3755 | .invalidatepage = ext4_invalidatepage, |
3627 | .releasepage = ext4_releasepage, | 3756 | .releasepage = ext4_releasepage, |
@@ -3654,6 +3783,7 @@ static const struct address_space_operations ext4_da_aops = { | |||
3654 | .writepages = ext4_writepages, | 3783 | .writepages = ext4_writepages, |
3655 | .write_begin = ext4_da_write_begin, | 3784 | .write_begin = ext4_da_write_begin, |
3656 | .write_end = ext4_da_write_end, | 3785 | .write_end = ext4_da_write_end, |
3786 | .set_page_dirty = ext4_set_page_dirty, | ||
3657 | .bmap = ext4_bmap, | 3787 | .bmap = ext4_bmap, |
3658 | .invalidatepage = ext4_da_invalidatepage, | 3788 | .invalidatepage = ext4_da_invalidatepage, |
3659 | .releasepage = ext4_releasepage, | 3789 | .releasepage = ext4_releasepage, |
@@ -3743,7 +3873,8 @@ static int __ext4_block_zero_page_range(handle_t *handle, | |||
3743 | /* We expect the key to be set. */ | 3873 | /* We expect the key to be set. */ |
3744 | BUG_ON(!fscrypt_has_encryption_key(inode)); | 3874 | BUG_ON(!fscrypt_has_encryption_key(inode)); |
3745 | BUG_ON(blocksize != PAGE_SIZE); | 3875 | BUG_ON(blocksize != PAGE_SIZE); |
3746 | WARN_ON_ONCE(fscrypt_decrypt_page(page)); | 3876 | WARN_ON_ONCE(fscrypt_decrypt_page(page->mapping->host, |
3877 | page, PAGE_SIZE, 0, page->index)); | ||
3747 | } | 3878 | } |
3748 | } | 3879 | } |
3749 | if (ext4_should_journal_data(inode)) { | 3880 | if (ext4_should_journal_data(inode)) { |
@@ -3792,8 +3923,10 @@ static int ext4_block_zero_page_range(handle_t *handle, | |||
3792 | if (length > max || length < 0) | 3923 | if (length > max || length < 0) |
3793 | length = max; | 3924 | length = max; |
3794 | 3925 | ||
3795 | if (IS_DAX(inode)) | 3926 | if (IS_DAX(inode)) { |
3796 | return dax_zero_page_range(inode, from, length, ext4_get_block); | 3927 | return iomap_zero_range(inode, from, length, NULL, |
3928 | &ext4_iomap_ops); | ||
3929 | } | ||
3797 | return __ext4_block_zero_page_range(handle, mapping, from, length); | 3930 | return __ext4_block_zero_page_range(handle, mapping, from, length); |
3798 | } | 3931 | } |
3799 | 3932 | ||
@@ -3811,6 +3944,10 @@ static int ext4_block_truncate_page(handle_t *handle, | |||
3811 | unsigned blocksize; | 3944 | unsigned blocksize; |
3812 | struct inode *inode = mapping->host; | 3945 | struct inode *inode = mapping->host; |
3813 | 3946 | ||
3947 | /* If we are processing an encrypted inode during orphan list handling */ | ||
3948 | if (ext4_encrypted_inode(inode) && !fscrypt_has_encryption_key(inode)) | ||
3949 | return 0; | ||
3950 | |||
3814 | blocksize = inode->i_sb->s_blocksize; | 3951 | blocksize = inode->i_sb->s_blocksize; |
3815 | length = blocksize - (offset & (blocksize - 1)); | 3952 | length = blocksize - (offset & (blocksize - 1)); |
3816 | 3953 | ||
@@ -4026,7 +4163,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) | |||
4026 | if (IS_SYNC(inode)) | 4163 | if (IS_SYNC(inode)) |
4027 | ext4_handle_sync(handle); | 4164 | ext4_handle_sync(handle); |
4028 | 4165 | ||
4029 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 4166 | inode->i_mtime = inode->i_ctime = current_time(inode); |
4030 | ext4_mark_inode_dirty(handle, inode); | 4167 | ext4_mark_inode_dirty(handle, inode); |
4031 | out_stop: | 4168 | out_stop: |
4032 | ext4_journal_stop(handle); | 4169 | ext4_journal_stop(handle); |
@@ -4091,10 +4228,11 @@ int ext4_inode_attach_jinode(struct inode *inode) | |||
4091 | * that's fine - as long as they are linked from the inode, the post-crash | 4228 | * that's fine - as long as they are linked from the inode, the post-crash |
4092 | * ext4_truncate() run will find them and release them. | 4229 | * ext4_truncate() run will find them and release them. |
4093 | */ | 4230 | */ |
4094 | void ext4_truncate(struct inode *inode) | 4231 | int ext4_truncate(struct inode *inode) |
4095 | { | 4232 | { |
4096 | struct ext4_inode_info *ei = EXT4_I(inode); | 4233 | struct ext4_inode_info *ei = EXT4_I(inode); |
4097 | unsigned int credits; | 4234 | unsigned int credits; |
4235 | int err = 0; | ||
4098 | handle_t *handle; | 4236 | handle_t *handle; |
4099 | struct address_space *mapping = inode->i_mapping; | 4237 | struct address_space *mapping = inode->i_mapping; |
4100 | 4238 | ||
@@ -4108,7 +4246,7 @@ void ext4_truncate(struct inode *inode) | |||
4108 | trace_ext4_truncate_enter(inode); | 4246 | trace_ext4_truncate_enter(inode); |
4109 | 4247 | ||
4110 | if (!ext4_can_truncate(inode)) | 4248 | if (!ext4_can_truncate(inode)) |
4111 | return; | 4249 | return 0; |
4112 | 4250 | ||
4113 | ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); | 4251 | ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); |
4114 | 4252 | ||
@@ -4118,15 +4256,17 @@ void ext4_truncate(struct inode *inode) | |||
4118 | if (ext4_has_inline_data(inode)) { | 4256 | if (ext4_has_inline_data(inode)) { |
4119 | int has_inline = 1; | 4257 | int has_inline = 1; |
4120 | 4258 | ||
4121 | ext4_inline_data_truncate(inode, &has_inline); | 4259 | err = ext4_inline_data_truncate(inode, &has_inline); |
4260 | if (err) | ||
4261 | return err; | ||
4122 | if (has_inline) | 4262 | if (has_inline) |
4123 | return; | 4263 | return 0; |
4124 | } | 4264 | } |
4125 | 4265 | ||
4126 | /* If we zero-out tail of the page, we have to create jinode for jbd2 */ | 4266 | /* If we zero-out tail of the page, we have to create jinode for jbd2 */ |
4127 | if (inode->i_size & (inode->i_sb->s_blocksize - 1)) { | 4267 | if (inode->i_size & (inode->i_sb->s_blocksize - 1)) { |
4128 | if (ext4_inode_attach_jinode(inode) < 0) | 4268 | if (ext4_inode_attach_jinode(inode) < 0) |
4129 | return; | 4269 | return 0; |
4130 | } | 4270 | } |
4131 | 4271 | ||
4132 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 4272 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
@@ -4135,10 +4275,8 @@ void ext4_truncate(struct inode *inode) | |||
4135 | credits = ext4_blocks_for_truncate(inode); | 4275 | credits = ext4_blocks_for_truncate(inode); |
4136 | 4276 | ||
4137 | handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); | 4277 | handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); |
4138 | if (IS_ERR(handle)) { | 4278 | if (IS_ERR(handle)) |
4139 | ext4_std_error(inode->i_sb, PTR_ERR(handle)); | 4279 | return PTR_ERR(handle); |
4140 | return; | ||
4141 | } | ||
4142 | 4280 | ||
4143 | if (inode->i_size & (inode->i_sb->s_blocksize - 1)) | 4281 | if (inode->i_size & (inode->i_sb->s_blocksize - 1)) |
4144 | ext4_block_truncate_page(handle, mapping, inode->i_size); | 4282 | ext4_block_truncate_page(handle, mapping, inode->i_size); |
@@ -4152,7 +4290,8 @@ void ext4_truncate(struct inode *inode) | |||
4152 | * Implication: the file must always be in a sane, consistent | 4290 | * Implication: the file must always be in a sane, consistent |
4153 | * truncatable state while each transaction commits. | 4291 | * truncatable state while each transaction commits. |
4154 | */ | 4292 | */ |
4155 | if (ext4_orphan_add(handle, inode)) | 4293 | err = ext4_orphan_add(handle, inode); |
4294 | if (err) | ||
4156 | goto out_stop; | 4295 | goto out_stop; |
4157 | 4296 | ||
4158 | down_write(&EXT4_I(inode)->i_data_sem); | 4297 | down_write(&EXT4_I(inode)->i_data_sem); |
@@ -4160,11 +4299,13 @@ void ext4_truncate(struct inode *inode) | |||
4160 | ext4_discard_preallocations(inode); | 4299 | ext4_discard_preallocations(inode); |
4161 | 4300 | ||
4162 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 4301 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
4163 | ext4_ext_truncate(handle, inode); | 4302 | err = ext4_ext_truncate(handle, inode); |
4164 | else | 4303 | else |
4165 | ext4_ind_truncate(handle, inode); | 4304 | ext4_ind_truncate(handle, inode); |
4166 | 4305 | ||
4167 | up_write(&ei->i_data_sem); | 4306 | up_write(&ei->i_data_sem); |
4307 | if (err) | ||
4308 | goto out_stop; | ||
4168 | 4309 | ||
4169 | if (IS_SYNC(inode)) | 4310 | if (IS_SYNC(inode)) |
4170 | ext4_handle_sync(handle); | 4311 | ext4_handle_sync(handle); |
@@ -4180,11 +4321,12 @@ out_stop: | |||
4180 | if (inode->i_nlink) | 4321 | if (inode->i_nlink) |
4181 | ext4_orphan_del(handle, inode); | 4322 | ext4_orphan_del(handle, inode); |
4182 | 4323 | ||
4183 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 4324 | inode->i_mtime = inode->i_ctime = current_time(inode); |
4184 | ext4_mark_inode_dirty(handle, inode); | 4325 | ext4_mark_inode_dirty(handle, inode); |
4185 | ext4_journal_stop(handle); | 4326 | ext4_journal_stop(handle); |
4186 | 4327 | ||
4187 | trace_ext4_truncate_exit(inode); | 4328 | trace_ext4_truncate_exit(inode); |
4329 | return err; | ||
4188 | } | 4330 | } |
4189 | 4331 | ||
4190 | /* | 4332 | /* |
@@ -4352,7 +4494,9 @@ void ext4_set_inode_flags(struct inode *inode) | |||
4352 | new_fl |= S_NOATIME; | 4494 | new_fl |= S_NOATIME; |
4353 | if (flags & EXT4_DIRSYNC_FL) | 4495 | if (flags & EXT4_DIRSYNC_FL) |
4354 | new_fl |= S_DIRSYNC; | 4496 | new_fl |= S_DIRSYNC; |
4355 | if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode)) | 4497 | if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode) && |
4498 | !ext4_should_journal_data(inode) && !ext4_has_inline_data(inode) && | ||
4499 | !ext4_encrypted_inode(inode)) | ||
4356 | new_fl |= S_DAX; | 4500 | new_fl |= S_DAX; |
4357 | inode_set_flags(inode, new_fl, | 4501 | inode_set_flags(inode, new_fl, |
4358 | S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX); | 4502 | S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX); |
@@ -4411,7 +4555,9 @@ static inline void ext4_iget_extra_inode(struct inode *inode, | |||
4411 | { | 4555 | { |
4412 | __le32 *magic = (void *)raw_inode + | 4556 | __le32 *magic = (void *)raw_inode + |
4413 | EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize; | 4557 | EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize; |
4414 | if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) { | 4558 | if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize + sizeof(__le32) <= |
4559 | EXT4_INODE_SIZE(inode->i_sb) && | ||
4560 | *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) { | ||
4415 | ext4_set_inode_state(inode, EXT4_STATE_XATTR); | 4561 | ext4_set_inode_state(inode, EXT4_STATE_XATTR); |
4416 | ext4_find_inline_data_nolock(inode); | 4562 | ext4_find_inline_data_nolock(inode); |
4417 | } else | 4563 | } else |
@@ -4434,6 +4580,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4434 | struct inode *inode; | 4580 | struct inode *inode; |
4435 | journal_t *journal = EXT4_SB(sb)->s_journal; | 4581 | journal_t *journal = EXT4_SB(sb)->s_journal; |
4436 | long ret; | 4582 | long ret; |
4583 | loff_t size; | ||
4437 | int block; | 4584 | int block; |
4438 | uid_t i_uid; | 4585 | uid_t i_uid; |
4439 | gid_t i_gid; | 4586 | gid_t i_gid; |
@@ -4456,10 +4603,12 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4456 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { | 4603 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { |
4457 | ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); | 4604 | ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); |
4458 | if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > | 4605 | if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > |
4459 | EXT4_INODE_SIZE(inode->i_sb)) { | 4606 | EXT4_INODE_SIZE(inode->i_sb) || |
4460 | EXT4_ERROR_INODE(inode, "bad extra_isize (%u != %u)", | 4607 | (ei->i_extra_isize & 3)) { |
4461 | EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize, | 4608 | EXT4_ERROR_INODE(inode, |
4462 | EXT4_INODE_SIZE(inode->i_sb)); | 4609 | "bad extra_isize %u (inode size %u)", |
4610 | ei->i_extra_isize, | ||
4611 | EXT4_INODE_SIZE(inode->i_sb)); | ||
4463 | ret = -EFSCORRUPTED; | 4612 | ret = -EFSCORRUPTED; |
4464 | goto bad_inode; | 4613 | goto bad_inode; |
4465 | } | 4614 | } |
@@ -4534,6 +4683,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4534 | ei->i_file_acl |= | 4683 | ei->i_file_acl |= |
4535 | ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; | 4684 | ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; |
4536 | inode->i_size = ext4_isize(raw_inode); | 4685 | inode->i_size = ext4_isize(raw_inode); |
4686 | if ((size = i_size_read(inode)) < 0) { | ||
4687 | EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size); | ||
4688 | ret = -EFSCORRUPTED; | ||
4689 | goto bad_inode; | ||
4690 | } | ||
4537 | ei->i_disksize = inode->i_size; | 4691 | ei->i_disksize = inode->i_size; |
4538 | #ifdef CONFIG_QUOTA | 4692 | #ifdef CONFIG_QUOTA |
4539 | ei->i_reserved_quota = 0; | 4693 | ei->i_reserved_quota = 0; |
@@ -4577,6 +4731,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4577 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { | 4731 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { |
4578 | if (ei->i_extra_isize == 0) { | 4732 | if (ei->i_extra_isize == 0) { |
4579 | /* The extra space is currently unused. Use it. */ | 4733 | /* The extra space is currently unused. Use it. */ |
4734 | BUILD_BUG_ON(sizeof(struct ext4_inode) & 3); | ||
4580 | ei->i_extra_isize = sizeof(struct ext4_inode) - | 4735 | ei->i_extra_isize = sizeof(struct ext4_inode) - |
4581 | EXT4_GOOD_OLD_INODE_SIZE; | 4736 | EXT4_GOOD_OLD_INODE_SIZE; |
4582 | } else { | 4737 | } else { |
@@ -5024,7 +5179,7 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode) | |||
5024 | * do. We do the check mainly to optimize the common PAGE_SIZE == | 5179 | * do. We do the check mainly to optimize the common PAGE_SIZE == |
5025 | * blocksize case | 5180 | * blocksize case |
5026 | */ | 5181 | */ |
5027 | if (offset > PAGE_SIZE - (1 << inode->i_blkbits)) | 5182 | if (offset > PAGE_SIZE - i_blocksize(inode)) |
5028 | return; | 5183 | return; |
5029 | while (1) { | 5184 | while (1) { |
5030 | page = find_lock_page(inode->i_mapping, | 5185 | page = find_lock_page(inode->i_mapping, |
@@ -5078,6 +5233,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5078 | int orphan = 0; | 5233 | int orphan = 0; |
5079 | const unsigned int ia_valid = attr->ia_valid; | 5234 | const unsigned int ia_valid = attr->ia_valid; |
5080 | 5235 | ||
5236 | if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) | ||
5237 | return -EIO; | ||
5238 | |||
5081 | error = setattr_prepare(dentry, attr); | 5239 | error = setattr_prepare(dentry, attr); |
5082 | if (error) | 5240 | if (error) |
5083 | return error; | 5241 | return error; |
@@ -5154,7 +5312,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5154 | * update c/mtime in shrink case below | 5312 | * update c/mtime in shrink case below |
5155 | */ | 5313 | */ |
5156 | if (!shrink) { | 5314 | if (!shrink) { |
5157 | inode->i_mtime = ext4_current_time(inode); | 5315 | inode->i_mtime = current_time(inode); |
5158 | inode->i_ctime = inode->i_mtime; | 5316 | inode->i_ctime = inode->i_mtime; |
5159 | } | 5317 | } |
5160 | down_write(&EXT4_I(inode)->i_data_sem); | 5318 | down_write(&EXT4_I(inode)->i_data_sem); |
@@ -5199,12 +5357,15 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5199 | * in data=journal mode to make pages freeable. | 5357 | * in data=journal mode to make pages freeable. |
5200 | */ | 5358 | */ |
5201 | truncate_pagecache(inode, inode->i_size); | 5359 | truncate_pagecache(inode, inode->i_size); |
5202 | if (shrink) | 5360 | if (shrink) { |
5203 | ext4_truncate(inode); | 5361 | rc = ext4_truncate(inode); |
5362 | if (rc) | ||
5363 | error = rc; | ||
5364 | } | ||
5204 | up_write(&EXT4_I(inode)->i_mmap_sem); | 5365 | up_write(&EXT4_I(inode)->i_mmap_sem); |
5205 | } | 5366 | } |
5206 | 5367 | ||
5207 | if (!rc) { | 5368 | if (!error) { |
5208 | setattr_copy(inode, attr); | 5369 | setattr_copy(inode, attr); |
5209 | mark_inode_dirty(inode); | 5370 | mark_inode_dirty(inode); |
5210 | } | 5371 | } |
@@ -5216,7 +5377,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5216 | if (orphan && inode->i_nlink) | 5377 | if (orphan && inode->i_nlink) |
5217 | ext4_orphan_del(NULL, inode); | 5378 | ext4_orphan_del(NULL, inode); |
5218 | 5379 | ||
5219 | if (!rc && (ia_valid & ATTR_MODE)) | 5380 | if (!error && (ia_valid & ATTR_MODE)) |
5220 | rc = posix_acl_chmod(inode, inode->i_mode); | 5381 | rc = posix_acl_chmod(inode, inode->i_mode); |
5221 | 5382 | ||
5222 | err_out: | 5383 | err_out: |
@@ -5226,20 +5387,55 @@ err_out: | |||
5226 | return error; | 5387 | return error; |
5227 | } | 5388 | } |
5228 | 5389 | ||
5229 | int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | 5390 | int ext4_getattr(const struct path *path, struct kstat *stat, |
5230 | struct kstat *stat) | 5391 | u32 request_mask, unsigned int query_flags) |
5231 | { | 5392 | { |
5232 | struct inode *inode; | 5393 | struct inode *inode = d_inode(path->dentry); |
5233 | unsigned long long delalloc_blocks; | 5394 | struct ext4_inode *raw_inode; |
5395 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
5396 | unsigned int flags; | ||
5397 | |||
5398 | if (EXT4_FITS_IN_INODE(raw_inode, ei, i_crtime)) { | ||
5399 | stat->result_mask |= STATX_BTIME; | ||
5400 | stat->btime.tv_sec = ei->i_crtime.tv_sec; | ||
5401 | stat->btime.tv_nsec = ei->i_crtime.tv_nsec; | ||
5402 | } | ||
5403 | |||
5404 | flags = ei->i_flags & EXT4_FL_USER_VISIBLE; | ||
5405 | if (flags & EXT4_APPEND_FL) | ||
5406 | stat->attributes |= STATX_ATTR_APPEND; | ||
5407 | if (flags & EXT4_COMPR_FL) | ||
5408 | stat->attributes |= STATX_ATTR_COMPRESSED; | ||
5409 | if (flags & EXT4_ENCRYPT_FL) | ||
5410 | stat->attributes |= STATX_ATTR_ENCRYPTED; | ||
5411 | if (flags & EXT4_IMMUTABLE_FL) | ||
5412 | stat->attributes |= STATX_ATTR_IMMUTABLE; | ||
5413 | if (flags & EXT4_NODUMP_FL) | ||
5414 | stat->attributes |= STATX_ATTR_NODUMP; | ||
5415 | |||
5416 | stat->attributes_mask |= (STATX_ATTR_APPEND | | ||
5417 | STATX_ATTR_COMPRESSED | | ||
5418 | STATX_ATTR_ENCRYPTED | | ||
5419 | STATX_ATTR_IMMUTABLE | | ||
5420 | STATX_ATTR_NODUMP); | ||
5234 | 5421 | ||
5235 | inode = d_inode(dentry); | ||
5236 | generic_fillattr(inode, stat); | 5422 | generic_fillattr(inode, stat); |
5423 | return 0; | ||
5424 | } | ||
5425 | |||
5426 | int ext4_file_getattr(const struct path *path, struct kstat *stat, | ||
5427 | u32 request_mask, unsigned int query_flags) | ||
5428 | { | ||
5429 | struct inode *inode = d_inode(path->dentry); | ||
5430 | u64 delalloc_blocks; | ||
5431 | |||
5432 | ext4_getattr(path, stat, request_mask, query_flags); | ||
5237 | 5433 | ||
5238 | /* | 5434 | /* |
5239 | * If there is inline data in the inode, the inode will normally not | 5435 | * If there is inline data in the inode, the inode will normally not |
5240 | * have data blocks allocated (it may have an external xattr block). | 5436 | * have data blocks allocated (it may have an external xattr block). |
5241 | * Report at least one sector for such files, so tools like tar, rsync, | 5437 | * Report at least one sector for such files, so tools like tar, rsync, |
5242 | * others doen't incorrectly think the file is completely sparse. | 5438 | * others don't incorrectly think the file is completely sparse. |
5243 | */ | 5439 | */ |
5244 | if (unlikely(ext4_has_inline_data(inode))) | 5440 | if (unlikely(ext4_has_inline_data(inode))) |
5245 | stat->blocks += (stat->size + 511) >> 9; | 5441 | stat->blocks += (stat->size + 511) >> 9; |
@@ -5361,6 +5557,9 @@ int ext4_mark_iloc_dirty(handle_t *handle, | |||
5361 | { | 5557 | { |
5362 | int err = 0; | 5558 | int err = 0; |
5363 | 5559 | ||
5560 | if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) | ||
5561 | return -EIO; | ||
5562 | |||
5364 | if (IS_I_VERSION(inode)) | 5563 | if (IS_I_VERSION(inode)) |
5365 | inode_inc_iversion(inode); | 5564 | inode_inc_iversion(inode); |
5366 | 5565 | ||
@@ -5384,6 +5583,9 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode, | |||
5384 | { | 5583 | { |
5385 | int err; | 5584 | int err; |
5386 | 5585 | ||
5586 | if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) | ||
5587 | return -EIO; | ||
5588 | |||
5387 | err = ext4_get_inode_loc(inode, iloc); | 5589 | err = ext4_get_inode_loc(inode, iloc); |
5388 | if (!err) { | 5590 | if (!err) { |
5389 | BUFFER_TRACE(iloc->bh, "get_write_access"); | 5591 | BUFFER_TRACE(iloc->bh, "get_write_access"); |
@@ -5455,18 +5657,20 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
5455 | err = ext4_reserve_inode_write(handle, inode, &iloc); | 5657 | err = ext4_reserve_inode_write(handle, inode, &iloc); |
5456 | if (err) | 5658 | if (err) |
5457 | return err; | 5659 | return err; |
5458 | if (ext4_handle_valid(handle) && | 5660 | if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && |
5459 | EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && | ||
5460 | !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { | 5661 | !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { |
5461 | /* | 5662 | /* |
5462 | * We need extra buffer credits since we may write into EA block | 5663 | * In nojournal mode, we can immediately attempt to expand |
5664 | * the inode. When journaled, we first need to obtain extra | ||
5665 | * buffer credits since we may write into the EA block | ||
5463 | * with this same handle. If journal_extend fails, then it will | 5666 | * with this same handle. If journal_extend fails, then it will |
5464 | * only result in a minor loss of functionality for that inode. | 5667 | * only result in a minor loss of functionality for that inode. |
5465 | * If this is felt to be critical, then e2fsck should be run to | 5668 | * If this is felt to be critical, then e2fsck should be run to |
5466 | * force a large enough s_min_extra_isize. | 5669 | * force a large enough s_min_extra_isize. |
5467 | */ | 5670 | */ |
5468 | if ((jbd2_journal_extend(handle, | 5671 | if (!ext4_handle_valid(handle) || |
5469 | EXT4_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) { | 5672 | jbd2_journal_extend(handle, |
5673 | EXT4_DATA_TRANS_BLOCKS(inode->i_sb)) == 0) { | ||
5470 | ret = ext4_expand_extra_isize(inode, | 5674 | ret = ext4_expand_extra_isize(inode, |
5471 | sbi->s_want_extra_isize, | 5675 | sbi->s_want_extra_isize, |
5472 | iloc, handle); | 5676 | iloc, handle); |
@@ -5620,6 +5824,11 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) | |||
5620 | ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); | 5824 | ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); |
5621 | } | 5825 | } |
5622 | ext4_set_aops(inode); | 5826 | ext4_set_aops(inode); |
5827 | /* | ||
5828 | * Update inode->i_flags after EXT4_INODE_JOURNAL_DATA was updated. | ||
5829 | * E.g. S_DAX may get cleared / set. | ||
5830 | */ | ||
5831 | ext4_set_inode_flags(inode); | ||
5623 | 5832 | ||
5624 | jbd2_journal_unlock_updates(journal); | 5833 | jbd2_journal_unlock_updates(journal); |
5625 | percpu_up_write(&sbi->s_journal_flag_rwsem); | 5834 | percpu_up_write(&sbi->s_journal_flag_rwsem); |
@@ -5647,8 +5856,9 @@ static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh) | |||
5647 | return !buffer_mapped(bh); | 5856 | return !buffer_mapped(bh); |
5648 | } | 5857 | } |
5649 | 5858 | ||
5650 | int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | 5859 | int ext4_page_mkwrite(struct vm_fault *vmf) |
5651 | { | 5860 | { |
5861 | struct vm_area_struct *vma = vmf->vma; | ||
5652 | struct page *page = vmf->page; | 5862 | struct page *page = vmf->page; |
5653 | loff_t size; | 5863 | loff_t size; |
5654 | unsigned long len; | 5864 | unsigned long len; |
@@ -5738,13 +5948,13 @@ out: | |||
5738 | return ret; | 5948 | return ret; |
5739 | } | 5949 | } |
5740 | 5950 | ||
5741 | int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 5951 | int ext4_filemap_fault(struct vm_fault *vmf) |
5742 | { | 5952 | { |
5743 | struct inode *inode = file_inode(vma->vm_file); | 5953 | struct inode *inode = file_inode(vmf->vma->vm_file); |
5744 | int err; | 5954 | int err; |
5745 | 5955 | ||
5746 | down_read(&EXT4_I(inode)->i_mmap_sem); | 5956 | down_read(&EXT4_I(inode)->i_mmap_sem); |
5747 | err = filemap_fault(vma, vmf); | 5957 | err = filemap_fault(vmf); |
5748 | up_read(&EXT4_I(inode)->i_mmap_sem); | 5958 | up_read(&EXT4_I(inode)->i_mmap_sem); |
5749 | 5959 | ||
5750 | return err; | 5960 | return err; |