aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-16 20:33:01 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-16 20:33:01 -0500
commit36cd5c19c3fe8291fac45a262c44c00bd14b531a (patch)
tree77813e551c82546c9f6cddc8a3216ba5d02807ed /fs/ext4
parent2a74dbb9a86e8102dcd07d284135b4530a84826e (diff)
parentbd9926e80330d43f15b710c2935fa41b792d56fd (diff)
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 update from Ted Ts'o: "There are two major features for this merge window. The first is inline data, which allows small files or directories to be stored in the in-inode extended attribute area. (This requires that the file system use inodes which are at least 256 bytes or larger; 128 byte inodes do not have any room for in-inode xattrs.) The second new feature is SEEK_HOLE/SEEK_DATA support. This is enabled by the extent status tree patches, and this infrastructure will be used to further optimize ext4 in the future. Beyond that, we have the usual collection of code cleanups and bug fixes." * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (63 commits) ext4: zero out inline data using memset() instead of empty_zero_page ext4: ensure Inode flags consistency are checked at build time ext4: Remove CONFIG_EXT4_FS_XATTR ext4: remove unused variable from ext4_ext_in_cache() ext4: remove redundant initialization in ext4_fill_super() ext4: remove redundant code in ext4_alloc_inode() ext4: use sync_inode_metadata() when syncing inode metadata ext4: enable ext4 inline support ext4: let fallocate handle inline data correctly ext4: let ext4_truncate handle inline data correctly ext4: evict inline data out if we need to strore xattr in inode ext4: let fiemap work with inline data ext4: let ext4_rename handle inline dir ext4: let empty_dir handle inline dir ext4: let ext4_delete_entry() handle inline data ext4: make ext4_delete_entry generic ext4: let ext4_find_entry handle inline data ext4: create a new function search_dir ext4: let ext4_readdir handle inline data ext4: let add_dir_entry handle inline data properly ...
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/Kconfig15
-rw-r--r--fs/ext4/Makefile4
-rw-r--r--fs/ext4/acl.c6
-rw-r--r--fs/ext4/dir.c41
-rw-r--r--fs/ext4/ext4.h165
-rw-r--r--fs/ext4/ext4_extents.h40
-rw-r--r--fs/ext4/ext4_jbd2.h7
-rw-r--r--fs/ext4/extents.c480
-rw-r--r--fs/ext4/extents_status.c500
-rw-r--r--fs/ext4/extents_status.h45
-rw-r--r--fs/ext4/file.c336
-rw-r--r--fs/ext4/fsync.c6
-rw-r--r--fs/ext4/ialloc.c6
-rw-r--r--fs/ext4/indirect.c5
-rw-r--r--fs/ext4/inline.c1884
-rw-r--r--fs/ext4/inode.c629
-rw-r--r--fs/ext4/mballoc.c60
-rw-r--r--fs/ext4/migrate.c1
-rw-r--r--fs/ext4/move_extent.c1
-rw-r--r--fs/ext4/namei.c531
-rw-r--r--fs/ext4/page-io.c3
-rw-r--r--fs/ext4/resize.c17
-rw-r--r--fs/ext4/super.c57
-rw-r--r--fs/ext4/symlink.c4
-rw-r--r--fs/ext4/xattr.c110
-rw-r--r--fs/ext4/xattr.h158
26 files changed, 4045 insertions, 1066 deletions
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index c22f17021b6e..0a475c881852 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -39,22 +39,8 @@ config EXT4_USE_FOR_EXT23
39 compiled kernel size by using one file system driver for 39 compiled kernel size by using one file system driver for
40 ext2, ext3, and ext4 file systems. 40 ext2, ext3, and ext4 file systems.
41 41
42config EXT4_FS_XATTR
43 bool "Ext4 extended attributes"
44 depends on EXT4_FS
45 default y
46 help
47 Extended attributes are name:value pairs associated with inodes by
48 the kernel or by users (see the attr(5) manual page, or visit
49 <http://acl.bestbits.at/> for details).
50
51 If unsure, say N.
52
53 You need this for POSIX ACL support on ext4.
54
55config EXT4_FS_POSIX_ACL 42config EXT4_FS_POSIX_ACL
56 bool "Ext4 POSIX Access Control Lists" 43 bool "Ext4 POSIX Access Control Lists"
57 depends on EXT4_FS_XATTR
58 select FS_POSIX_ACL 44 select FS_POSIX_ACL
59 help 45 help
60 POSIX Access Control Lists (ACLs) support permissions for users and 46 POSIX Access Control Lists (ACLs) support permissions for users and
@@ -67,7 +53,6 @@ config EXT4_FS_POSIX_ACL
67 53
68config EXT4_FS_SECURITY 54config EXT4_FS_SECURITY
69 bool "Ext4 Security Labels" 55 bool "Ext4 Security Labels"
70 depends on EXT4_FS_XATTR
71 help 56 help
72 Security labels support alternative access control models 57 Security labels support alternative access control models
73 implemented by security modules like SELinux. This option 58 implemented by security modules like SELinux. This option
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 56fd8f865930..0310fec2ee3d 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -7,8 +7,8 @@ obj-$(CONFIG_EXT4_FS) += ext4.o
7ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ 7ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ 8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
9 ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ 9 ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
10 mmp.o indirect.o 10 mmp.o indirect.o extents_status.o xattr.o xattr_user.o \
11 xattr_trusted.o inline.o
11 12
12ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
13ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o 13ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
14ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o 14ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index d3c5b88fd89f..e6e0d988439b 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -423,8 +423,10 @@ ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
423 423
424retry: 424retry:
425 handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb)); 425 handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
426 if (IS_ERR(handle)) 426 if (IS_ERR(handle)) {
427 return PTR_ERR(handle); 427 error = PTR_ERR(handle);
428 goto release_and_out;
429 }
428 error = ext4_set_acl(handle, inode, type, acl); 430 error = ext4_set_acl(handle, inode, type, acl);
429 ext4_journal_stop(handle); 431 ext4_journal_stop(handle);
430 if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 432 if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 8e07d2a5a139..b8d877f6c1fa 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -27,23 +27,11 @@
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/rbtree.h> 28#include <linux/rbtree.h>
29#include "ext4.h" 29#include "ext4.h"
30 30#include "xattr.h"
31static unsigned char ext4_filetype_table[] = {
32 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
33};
34 31
35static int ext4_dx_readdir(struct file *filp, 32static int ext4_dx_readdir(struct file *filp,
36 void *dirent, filldir_t filldir); 33 void *dirent, filldir_t filldir);
37 34
38static unsigned char get_dtype(struct super_block *sb, int filetype)
39{
40 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE) ||
41 (filetype >= EXT4_FT_MAX))
42 return DT_UNKNOWN;
43
44 return (ext4_filetype_table[filetype]);
45}
46
47/** 35/**
48 * Check if the given dir-inode refers to an htree-indexed directory 36 * Check if the given dir-inode refers to an htree-indexed directory
49 * (or a directory which chould potentially get coverted to use htree 37 * (or a directory which chould potentially get coverted to use htree
@@ -68,11 +56,14 @@ static int is_dx_dir(struct inode *inode)
68 * Return 0 if the directory entry is OK, and 1 if there is a problem 56 * Return 0 if the directory entry is OK, and 1 if there is a problem
69 * 57 *
70 * Note: this is the opposite of what ext2 and ext3 historically returned... 58 * Note: this is the opposite of what ext2 and ext3 historically returned...
59 *
60 * bh passed here can be an inode block or a dir data block, depending
61 * on the inode inline data flag.
71 */ 62 */
72int __ext4_check_dir_entry(const char *function, unsigned int line, 63int __ext4_check_dir_entry(const char *function, unsigned int line,
73 struct inode *dir, struct file *filp, 64 struct inode *dir, struct file *filp,
74 struct ext4_dir_entry_2 *de, 65 struct ext4_dir_entry_2 *de,
75 struct buffer_head *bh, 66 struct buffer_head *bh, char *buf, int size,
76 unsigned int offset) 67 unsigned int offset)
77{ 68{
78 const char *error_msg = NULL; 69 const char *error_msg = NULL;
@@ -85,9 +76,8 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
85 error_msg = "rec_len % 4 != 0"; 76 error_msg = "rec_len % 4 != 0";
86 else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len))) 77 else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len)))
87 error_msg = "rec_len is too small for name_len"; 78 error_msg = "rec_len is too small for name_len";
88 else if (unlikely(((char *) de - bh->b_data) + rlen > 79 else if (unlikely(((char *) de - buf) + rlen > size))
89 dir->i_sb->s_blocksize)) 80 error_msg = "directory entry across range";
90 error_msg = "directory entry across blocks";
91 else if (unlikely(le32_to_cpu(de->inode) > 81 else if (unlikely(le32_to_cpu(de->inode) >
92 le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count))) 82 le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)))
93 error_msg = "inode out of bounds"; 83 error_msg = "inode out of bounds";
@@ -98,14 +88,14 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
98 ext4_error_file(filp, function, line, bh->b_blocknr, 88 ext4_error_file(filp, function, line, bh->b_blocknr,
99 "bad entry in directory: %s - offset=%u(%u), " 89 "bad entry in directory: %s - offset=%u(%u), "
100 "inode=%u, rec_len=%d, name_len=%d", 90 "inode=%u, rec_len=%d, name_len=%d",
101 error_msg, (unsigned) (offset % bh->b_size), 91 error_msg, (unsigned) (offset % size),
102 offset, le32_to_cpu(de->inode), 92 offset, le32_to_cpu(de->inode),
103 rlen, de->name_len); 93 rlen, de->name_len);
104 else 94 else
105 ext4_error_inode(dir, function, line, bh->b_blocknr, 95 ext4_error_inode(dir, function, line, bh->b_blocknr,
106 "bad entry in directory: %s - offset=%u(%u), " 96 "bad entry in directory: %s - offset=%u(%u), "
107 "inode=%u, rec_len=%d, name_len=%d", 97 "inode=%u, rec_len=%d, name_len=%d",
108 error_msg, (unsigned) (offset % bh->b_size), 98 error_msg, (unsigned) (offset % size),
109 offset, le32_to_cpu(de->inode), 99 offset, le32_to_cpu(de->inode),
110 rlen, de->name_len); 100 rlen, de->name_len);
111 101
@@ -125,6 +115,14 @@ static int ext4_readdir(struct file *filp,
125 int ret = 0; 115 int ret = 0;
126 int dir_has_error = 0; 116 int dir_has_error = 0;
127 117
118 if (ext4_has_inline_data(inode)) {
119 int has_inline_data = 1;
120 ret = ext4_read_inline_dir(filp, dirent, filldir,
121 &has_inline_data);
122 if (has_inline_data)
123 return ret;
124 }
125
128 if (is_dx_dir(inode)) { 126 if (is_dx_dir(inode)) {
129 err = ext4_dx_readdir(filp, dirent, filldir); 127 err = ext4_dx_readdir(filp, dirent, filldir);
130 if (err != ERR_BAD_DX_DIR) { 128 if (err != ERR_BAD_DX_DIR) {
@@ -221,8 +219,9 @@ revalidate:
221 while (!error && filp->f_pos < inode->i_size 219 while (!error && filp->f_pos < inode->i_size
222 && offset < sb->s_blocksize) { 220 && offset < sb->s_blocksize) {
223 de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); 221 de = (struct ext4_dir_entry_2 *) (bh->b_data + offset);
224 if (ext4_check_dir_entry(inode, filp, de, 222 if (ext4_check_dir_entry(inode, filp, de, bh,
225 bh, offset)) { 223 bh->b_data, bh->b_size,
224 offset)) {
226 /* 225 /*
227 * On error, skip the f_pos to the next block 226 * On error, skip the f_pos to the next block
228 */ 227 */
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index df163da388c9..8462eb3c33aa 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -57,6 +57,16 @@
57#define ext4_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__) 57#define ext4_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
58#endif 58#endif
59 59
60/*
61 * Turn on EXT_DEBUG to get lots of info about extents operations.
62 */
63#define EXT_DEBUG__
64#ifdef EXT_DEBUG
65#define ext_debug(fmt, ...) printk(fmt, ##__VA_ARGS__)
66#else
67#define ext_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
68#endif
69
60#define EXT4_ERROR_INODE(inode, fmt, a...) \ 70#define EXT4_ERROR_INODE(inode, fmt, a...) \
61 ext4_error_inode((inode), __func__, __LINE__, 0, (fmt), ## a) 71 ext4_error_inode((inode), __func__, __LINE__, 0, (fmt), ## a)
62 72
@@ -392,6 +402,7 @@ struct flex_groups {
392#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ 402#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
393#define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ 403#define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */
394#define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ 404#define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */
405#define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
395#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ 406#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
396 407
397#define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */ 408#define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */
@@ -448,28 +459,26 @@ enum {
448 EXT4_INODE_EXTENTS = 19, /* Inode uses extents */ 459 EXT4_INODE_EXTENTS = 19, /* Inode uses extents */
449 EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */ 460 EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */
450 EXT4_INODE_EOFBLOCKS = 22, /* Blocks allocated beyond EOF */ 461 EXT4_INODE_EOFBLOCKS = 22, /* Blocks allocated beyond EOF */
462 EXT4_INODE_INLINE_DATA = 28, /* Data in inode. */
451 EXT4_INODE_RESERVED = 31, /* reserved for ext4 lib */ 463 EXT4_INODE_RESERVED = 31, /* reserved for ext4 lib */
452}; 464};
453 465
454#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG)) 466/*
455#define CHECK_FLAG_VALUE(FLAG) if (!TEST_FLAG_VALUE(FLAG)) { \ 467 * Since it's pretty easy to mix up bit numbers and hex values, we use a
456 printk(KERN_EMERG "EXT4 flag fail: " #FLAG ": %d %d\n", \ 468 * build-time check to make sure that EXT4_XXX_FL is consistent with respect to
457 EXT4_##FLAG##_FL, EXT4_INODE_##FLAG); BUG_ON(1); } 469 * EXT4_INODE_XXX. If all is well, the macros will be dropped, so, it won't cost
458 470 * any extra space in the compiled kernel image, otherwise, the build will fail.
459/* 471 * It's important that these values are the same, since we are using
460 * Since it's pretty easy to mix up bit numbers and hex values, and we 472 * EXT4_INODE_XXX to test for flag values, but EXT4_XXX_FL must be consistent
461 * can't do a compile-time test for ENUM values, we use a run-time 473 * with the values of FS_XXX_FL defined in include/linux/fs.h and the on-disk
462 * test to make sure that EXT4_XXX_FL is consistent with respect to 474 * values found in ext2, ext3 and ext4 filesystems, and of course the values
463 * EXT4_INODE_XXX. If all is well the printk and BUG_ON will all drop 475 * defined in e2fsprogs.
464 * out so it won't cost any extra space in the compiled kernel image.
465 * But it's important that these values are the same, since we are
466 * using EXT4_INODE_XXX to test for the flag values, but EXT4_XX_FL
467 * must be consistent with the values of FS_XXX_FL defined in
468 * include/linux/fs.h and the on-disk values found in ext2, ext3, and
469 * ext4 filesystems, and of course the values defined in e2fsprogs.
470 * 476 *
471 * It's not paranoia if the Murphy's Law really *is* out to get you. :-) 477 * It's not paranoia if the Murphy's Law really *is* out to get you. :-)
472 */ 478 */
479#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG))
480#define CHECK_FLAG_VALUE(FLAG) BUILD_BUG_ON(!TEST_FLAG_VALUE(FLAG))
481
473static inline void ext4_check_flag_values(void) 482static inline void ext4_check_flag_values(void)
474{ 483{
475 CHECK_FLAG_VALUE(SECRM); 484 CHECK_FLAG_VALUE(SECRM);
@@ -494,6 +503,7 @@ static inline void ext4_check_flag_values(void)
494 CHECK_FLAG_VALUE(EXTENTS); 503 CHECK_FLAG_VALUE(EXTENTS);
495 CHECK_FLAG_VALUE(EA_INODE); 504 CHECK_FLAG_VALUE(EA_INODE);
496 CHECK_FLAG_VALUE(EOFBLOCKS); 505 CHECK_FLAG_VALUE(EOFBLOCKS);
506 CHECK_FLAG_VALUE(INLINE_DATA);
497 CHECK_FLAG_VALUE(RESERVED); 507 CHECK_FLAG_VALUE(RESERVED);
498} 508}
499 509
@@ -811,6 +821,8 @@ struct ext4_ext_cache {
811 __u32 ec_len; /* must be 32bit to return holes */ 821 __u32 ec_len; /* must be 32bit to return holes */
812}; 822};
813 823
824#include "extents_status.h"
825
814/* 826/*
815 * fourth extended file system inode data in memory 827 * fourth extended file system inode data in memory
816 */ 828 */
@@ -833,7 +845,6 @@ struct ext4_inode_info {
833#endif 845#endif
834 unsigned long i_flags; 846 unsigned long i_flags;
835 847
836#ifdef CONFIG_EXT4_FS_XATTR
837 /* 848 /*
838 * Extended attributes can be read independently of the main file 849 * Extended attributes can be read independently of the main file
839 * data. Taking i_mutex even when reading would cause contention 850 * data. Taking i_mutex even when reading would cause contention
@@ -842,7 +853,6 @@ struct ext4_inode_info {
842 * EAs. 853 * EAs.
843 */ 854 */
844 struct rw_semaphore xattr_sem; 855 struct rw_semaphore xattr_sem;
845#endif
846 856
847 struct list_head i_orphan; /* unlinked but open inodes */ 857 struct list_head i_orphan; /* unlinked but open inodes */
848 858
@@ -888,6 +898,10 @@ struct ext4_inode_info {
888 struct list_head i_prealloc_list; 898 struct list_head i_prealloc_list;
889 spinlock_t i_prealloc_lock; 899 spinlock_t i_prealloc_lock;
890 900
901 /* extents status tree */
902 struct ext4_es_tree i_es_tree;
903 rwlock_t i_es_lock;
904
891 /* ialloc */ 905 /* ialloc */
892 ext4_group_t i_last_alloc_group; 906 ext4_group_t i_last_alloc_group;
893 907
@@ -902,6 +916,10 @@ struct ext4_inode_info {
902 /* on-disk additional length */ 916 /* on-disk additional length */
903 __u16 i_extra_isize; 917 __u16 i_extra_isize;
904 918
919 /* Indicate the inline data space. */
920 u16 i_inline_off;
921 u16 i_inline_size;
922
905#ifdef CONFIG_QUOTA 923#ifdef CONFIG_QUOTA
906 /* quota space reservation, managed internally by quota code */ 924 /* quota space reservation, managed internally by quota code */
907 qsize_t i_reserved_quota; 925 qsize_t i_reserved_quota;
@@ -1360,6 +1378,7 @@ enum {
1360 EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */ 1378 EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */
1361 EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read 1379 EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read
1362 nolocking */ 1380 nolocking */
1381 EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */
1363}; 1382};
1364 1383
1365#define EXT4_INODE_BIT_FNS(name, field, offset) \ 1384#define EXT4_INODE_BIT_FNS(name, field, offset) \
@@ -1481,7 +1500,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1481#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ 1500#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */
1482#define EXT4_FEATURE_INCOMPAT_BG_USE_META_CSUM 0x2000 /* use crc32c for bg */ 1501#define EXT4_FEATURE_INCOMPAT_BG_USE_META_CSUM 0x2000 /* use crc32c for bg */
1483#define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */ 1502#define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */
1484#define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x8000 /* data in inode */ 1503#define EXT4_FEATURE_INCOMPAT_INLINE_DATA 0x8000 /* data in inode */
1485 1504
1486#define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR 1505#define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR
1487#define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ 1506#define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
@@ -1505,7 +1524,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1505 EXT4_FEATURE_INCOMPAT_EXTENTS| \ 1524 EXT4_FEATURE_INCOMPAT_EXTENTS| \
1506 EXT4_FEATURE_INCOMPAT_64BIT| \ 1525 EXT4_FEATURE_INCOMPAT_64BIT| \
1507 EXT4_FEATURE_INCOMPAT_FLEX_BG| \ 1526 EXT4_FEATURE_INCOMPAT_FLEX_BG| \
1508 EXT4_FEATURE_INCOMPAT_MMP) 1527 EXT4_FEATURE_INCOMPAT_MMP | \
1528 EXT4_FEATURE_INCOMPAT_INLINE_DATA)
1509#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ 1529#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
1510 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ 1530 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
1511 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ 1531 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
@@ -1592,6 +1612,11 @@ struct ext4_dir_entry_tail {
1592 __le32 det_checksum; /* crc32c(uuid+inum+dirblock) */ 1612 __le32 det_checksum; /* crc32c(uuid+inum+dirblock) */
1593}; 1613};
1594 1614
1615#define EXT4_DIRENT_TAIL(block, blocksize) \
1616 ((struct ext4_dir_entry_tail *)(((void *)(block)) + \
1617 ((blocksize) - \
1618 sizeof(struct ext4_dir_entry_tail))))
1619
1595/* 1620/*
1596 * Ext4 directory file types. Only the low 3 bits are used. The 1621 * Ext4 directory file types. Only the low 3 bits are used. The
1597 * other bits are reserved for now. 1622 * other bits are reserved for now.
@@ -1936,14 +1961,42 @@ ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
1936extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *, 1961extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *,
1937 struct file *, 1962 struct file *,
1938 struct ext4_dir_entry_2 *, 1963 struct ext4_dir_entry_2 *,
1939 struct buffer_head *, unsigned int); 1964 struct buffer_head *, char *, int,
1940#define ext4_check_dir_entry(dir, filp, de, bh, offset) \ 1965 unsigned int);
1966#define ext4_check_dir_entry(dir, filp, de, bh, buf, size, offset) \
1941 unlikely(__ext4_check_dir_entry(__func__, __LINE__, (dir), (filp), \ 1967 unlikely(__ext4_check_dir_entry(__func__, __LINE__, (dir), (filp), \
1942 (de), (bh), (offset))) 1968 (de), (bh), (buf), (size), (offset)))
1943extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, 1969extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
1944 __u32 minor_hash, 1970 __u32 minor_hash,
1945 struct ext4_dir_entry_2 *dirent); 1971 struct ext4_dir_entry_2 *dirent);
1946extern void ext4_htree_free_dir_info(struct dir_private_info *p); 1972extern void ext4_htree_free_dir_info(struct dir_private_info *p);
1973extern int ext4_find_dest_de(struct inode *dir, struct inode *inode,
1974 struct buffer_head *bh,
1975 void *buf, int buf_size,
1976 const char *name, int namelen,
1977 struct ext4_dir_entry_2 **dest_de);
1978void ext4_insert_dentry(struct inode *inode,
1979 struct ext4_dir_entry_2 *de,
1980 int buf_size,
1981 const char *name, int namelen);
1982static inline void ext4_update_dx_flag(struct inode *inode)
1983{
1984 if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
1985 EXT4_FEATURE_COMPAT_DIR_INDEX))
1986 ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
1987}
1988static unsigned char ext4_filetype_table[] = {
1989 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
1990};
1991
1992static inline unsigned char get_dtype(struct super_block *sb, int filetype)
1993{
1994 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE) ||
1995 (filetype >= EXT4_FT_MAX))
1996 return DT_UNKNOWN;
1997
1998 return ext4_filetype_table[filetype];
1999}
1947 2000
1948/* fsync.c */ 2001/* fsync.c */
1949extern int ext4_sync_file(struct file *, loff_t, loff_t, int); 2002extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
@@ -1994,8 +2047,23 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *,
1994 ext4_lblk_t, int, int *); 2047 ext4_lblk_t, int, int *);
1995struct buffer_head *ext4_bread(handle_t *, struct inode *, 2048struct buffer_head *ext4_bread(handle_t *, struct inode *,
1996 ext4_lblk_t, int, int *); 2049 ext4_lblk_t, int, int *);
2050int ext4_get_block_write(struct inode *inode, sector_t iblock,
2051 struct buffer_head *bh_result, int create);
1997int ext4_get_block(struct inode *inode, sector_t iblock, 2052int ext4_get_block(struct inode *inode, sector_t iblock,
1998 struct buffer_head *bh_result, int create); 2053 struct buffer_head *bh_result, int create);
2054int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2055 struct buffer_head *bh, int create);
2056int ext4_walk_page_buffers(handle_t *handle,
2057 struct buffer_head *head,
2058 unsigned from,
2059 unsigned to,
2060 int *partial,
2061 int (*fn)(handle_t *handle,
2062 struct buffer_head *bh));
2063int do_journal_get_write_access(handle_t *handle,
2064 struct buffer_head *bh);
2065#define FALL_BACK_TO_NONDELALLOC 1
2066#define CONVERT_INLINE_DATA 2
1999 2067
2000extern struct inode *ext4_iget(struct super_block *, unsigned long); 2068extern struct inode *ext4_iget(struct super_block *, unsigned long);
2001extern int ext4_write_inode(struct inode *, struct writeback_control *); 2069extern int ext4_write_inode(struct inode *, struct writeback_control *);
@@ -2050,6 +2118,20 @@ extern int ext4_orphan_add(handle_t *, struct inode *);
2050extern int ext4_orphan_del(handle_t *, struct inode *); 2118extern int ext4_orphan_del(handle_t *, struct inode *);
2051extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, 2119extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
2052 __u32 start_minor_hash, __u32 *next_hash); 2120 __u32 start_minor_hash, __u32 *next_hash);
2121extern int search_dir(struct buffer_head *bh,
2122 char *search_buf,
2123 int buf_size,
2124 struct inode *dir,
2125 const struct qstr *d_name,
2126 unsigned int offset,
2127 struct ext4_dir_entry_2 **res_dir);
2128extern int ext4_generic_delete_entry(handle_t *handle,
2129 struct inode *dir,
2130 struct ext4_dir_entry_2 *de_del,
2131 struct buffer_head *bh,
2132 void *entry_buf,
2133 int buf_size,
2134 int csum_size);
2053 2135
2054/* resize.c */ 2136/* resize.c */
2055extern int ext4_group_add(struct super_block *sb, 2137extern int ext4_group_add(struct super_block *sb,
@@ -2376,6 +2458,15 @@ extern void ext4_unwritten_wait(struct inode *inode);
2376extern const struct inode_operations ext4_dir_inode_operations; 2458extern const struct inode_operations ext4_dir_inode_operations;
2377extern const struct inode_operations ext4_special_inode_operations; 2459extern const struct inode_operations ext4_special_inode_operations;
2378extern struct dentry *ext4_get_parent(struct dentry *child); 2460extern struct dentry *ext4_get_parent(struct dentry *child);
2461extern struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
2462 struct ext4_dir_entry_2 *de,
2463 int blocksize, int csum_size,
2464 unsigned int parent_ino, int dotdot_real_len);
2465extern void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
2466 unsigned int blocksize);
2467extern int ext4_handle_dirty_dirent_node(handle_t *handle,
2468 struct inode *inode,
2469 struct buffer_head *bh);
2379 2470
2380/* symlink.c */ 2471/* symlink.c */
2381extern const struct inode_operations ext4_symlink_inode_operations; 2472extern const struct inode_operations ext4_symlink_inode_operations;
@@ -2393,6 +2484,9 @@ extern int ext4_check_blockref(const char *, unsigned int,
2393 struct inode *, __le32 *, unsigned int); 2484 struct inode *, __le32 *, unsigned int);
2394 2485
2395/* extents.c */ 2486/* extents.c */
2487struct ext4_ext_path;
2488struct ext4_extent;
2489
2396extern int ext4_ext_tree_init(handle_t *handle, struct inode *); 2490extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
2397extern int ext4_ext_writepage_trans_blocks(struct inode *, int); 2491extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
2398extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, 2492extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
@@ -2410,8 +2504,27 @@ extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
2410 ssize_t len); 2504 ssize_t len);
2411extern int ext4_map_blocks(handle_t *handle, struct inode *inode, 2505extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
2412 struct ext4_map_blocks *map, int flags); 2506 struct ext4_map_blocks *map, int flags);
2507extern int ext4_ext_calc_metadata_amount(struct inode *inode,
2508 ext4_lblk_t lblocks);
2509extern int ext4_extent_tree_init(handle_t *, struct inode *);
2510extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
2511 int num,
2512 struct ext4_ext_path *path);
2513extern int ext4_can_extents_be_merged(struct inode *inode,
2514 struct ext4_extent *ex1,
2515 struct ext4_extent *ex2);
2516extern int ext4_ext_insert_extent(handle_t *, struct inode *,
2517 struct ext4_ext_path *,
2518 struct ext4_extent *, int);
2519extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
2520 struct ext4_ext_path *);
2521extern void ext4_ext_drop_refs(struct ext4_ext_path *);
2522extern int ext4_ext_check_inode(struct inode *inode);
2523extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
2413extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2524extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2414 __u64 start, __u64 len); 2525 __u64 start, __u64 len);
2526
2527
2415/* move_extent.c */ 2528/* move_extent.c */
2416extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, 2529extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
2417 __u64 start_orig, __u64 start_donor, 2530 __u64 start_orig, __u64 start_donor,
@@ -2445,14 +2558,10 @@ enum ext4_state_bits {
2445 * never, ever appear in a buffer_head's state 2558 * never, ever appear in a buffer_head's state
2446 * flag. See EXT4_MAP_FROM_CLUSTER to see where 2559 * flag. See EXT4_MAP_FROM_CLUSTER to see where
2447 * this is used. */ 2560 * this is used. */
2448 BH_Da_Mapped, /* Delayed allocated block that now has a mapping. This
2449 * flag is set when ext4_map_blocks is called on a
2450 * delayed allocated block to get its real mapping. */
2451}; 2561};
2452 2562
2453BUFFER_FNS(Uninit, uninit) 2563BUFFER_FNS(Uninit, uninit)
2454TAS_BUFFER_FNS(Uninit, uninit) 2564TAS_BUFFER_FNS(Uninit, uninit)
2455BUFFER_FNS(Da_Mapped, da_mapped)
2456 2565
2457/* 2566/*
2458 * Add new method to test whether block and inode bitmaps are properly 2567 * Add new method to test whether block and inode bitmaps are properly
@@ -2503,6 +2612,4 @@ extern void ext4_resize_end(struct super_block *sb);
2503 2612
2504#endif /* __KERNEL__ */ 2613#endif /* __KERNEL__ */
2505 2614
2506#include "ext4_extents.h"
2507
2508#endif /* _EXT4_H */ 2615#endif /* _EXT4_H */
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index cb1b2c919963..487fda12bc00 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -43,16 +43,6 @@
43#define CHECK_BINSEARCH__ 43#define CHECK_BINSEARCH__
44 44
45/* 45/*
46 * Turn on EXT_DEBUG to get lots of info about extents operations.
47 */
48#define EXT_DEBUG__
49#ifdef EXT_DEBUG
50#define ext_debug(fmt, ...) printk(fmt, ##__VA_ARGS__)
51#else
52#define ext_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
53#endif
54
55/*
56 * If EXT_STATS is defined then stats numbers are collected. 46 * If EXT_STATS is defined then stats numbers are collected.
57 * These number will be displayed at umount time. 47 * These number will be displayed at umount time.
58 */ 48 */
@@ -144,20 +134,6 @@ struct ext4_ext_path {
144 */ 134 */
145 135
146/* 136/*
147 * to be called by ext4_ext_walk_space()
148 * negative retcode - error
149 * positive retcode - signal for ext4_ext_walk_space(), see below
150 * callback must return valid extent (passed or newly created)
151 */
152typedef int (*ext_prepare_callback)(struct inode *, ext4_lblk_t,
153 struct ext4_ext_cache *,
154 struct ext4_extent *, void *);
155
156#define EXT_CONTINUE 0
157#define EXT_BREAK 1
158#define EXT_REPEAT 2
159
160/*
161 * Maximum number of logical blocks in a file; ext4_extent's ee_block is 137 * Maximum number of logical blocks in a file; ext4_extent's ee_block is
162 * __le32. 138 * __le32.
163 */ 139 */
@@ -300,21 +276,5 @@ static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix,
300 0xffff); 276 0xffff);
301} 277}
302 278
303extern int ext4_ext_calc_metadata_amount(struct inode *inode,
304 ext4_lblk_t lblocks);
305extern int ext4_extent_tree_init(handle_t *, struct inode *);
306extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
307 int num,
308 struct ext4_ext_path *path);
309extern int ext4_can_extents_be_merged(struct inode *inode,
310 struct ext4_extent *ex1,
311 struct ext4_extent *ex2);
312extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *, int);
313extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
314 struct ext4_ext_path *);
315extern void ext4_ext_drop_refs(struct ext4_ext_path *);
316extern int ext4_ext_check_inode(struct inode *inode);
317extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk,
318 int search_hint_reverse);
319#endif /* _EXT4_EXTENTS */ 279#endif /* _EXT4_EXTENTS */
320 280
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 56d258c18303..7177f9b21cb2 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -254,13 +254,6 @@ static inline void ext4_handle_sync(handle_t *handle)
254 handle->h_sync = 1; 254 handle->h_sync = 1;
255} 255}
256 256
257static inline void ext4_handle_release_buffer(handle_t *handle,
258 struct buffer_head *bh)
259{
260 if (ext4_handle_valid(handle))
261 jbd2_journal_release_buffer(handle, bh);
262}
263
264static inline int ext4_handle_is_aborted(handle_t *handle) 257static inline int ext4_handle_is_aborted(handle_t *handle)
265{ 258{
266 if (ext4_handle_valid(handle)) 259 if (ext4_handle_valid(handle))
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7011ac967208..26af22832a84 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -41,6 +41,8 @@
41#include <asm/uaccess.h> 41#include <asm/uaccess.h>
42#include <linux/fiemap.h> 42#include <linux/fiemap.h>
43#include "ext4_jbd2.h" 43#include "ext4_jbd2.h"
44#include "ext4_extents.h"
45#include "xattr.h"
44 46
45#include <trace/events/ext4.h> 47#include <trace/events/ext4.h>
46 48
@@ -109,6 +111,9 @@ static int ext4_split_extent_at(handle_t *handle,
109 int split_flag, 111 int split_flag,
110 int flags); 112 int flags);
111 113
114static int ext4_find_delayed_extent(struct inode *inode,
115 struct ext4_ext_cache *newex);
116
112static int ext4_ext_truncate_extend_restart(handle_t *handle, 117static int ext4_ext_truncate_extend_restart(handle_t *handle,
113 struct inode *inode, 118 struct inode *inode,
114 int needed) 119 int needed)
@@ -1959,27 +1964,33 @@ cleanup:
1959 return err; 1964 return err;
1960} 1965}
1961 1966
1962static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, 1967static int ext4_fill_fiemap_extents(struct inode *inode,
1963 ext4_lblk_t num, ext_prepare_callback func, 1968 ext4_lblk_t block, ext4_lblk_t num,
1964 void *cbdata) 1969 struct fiemap_extent_info *fieinfo)
1965{ 1970{
1966 struct ext4_ext_path *path = NULL; 1971 struct ext4_ext_path *path = NULL;
1967 struct ext4_ext_cache cbex; 1972 struct ext4_ext_cache newex;
1968 struct ext4_extent *ex; 1973 struct ext4_extent *ex;
1969 ext4_lblk_t next, start = 0, end = 0; 1974 ext4_lblk_t next, next_del, start = 0, end = 0;
1970 ext4_lblk_t last = block + num; 1975 ext4_lblk_t last = block + num;
1971 int depth, exists, err = 0; 1976 int exists, depth = 0, err = 0;
1972 1977 unsigned int flags = 0;
1973 BUG_ON(func == NULL); 1978 unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
1974 BUG_ON(inode == NULL);
1975 1979
1976 while (block < last && block != EXT_MAX_BLOCKS) { 1980 while (block < last && block != EXT_MAX_BLOCKS) {
1977 num = last - block; 1981 num = last - block;
1978 /* find extent for this block */ 1982 /* find extent for this block */
1979 down_read(&EXT4_I(inode)->i_data_sem); 1983 down_read(&EXT4_I(inode)->i_data_sem);
1984
1985 if (path && ext_depth(inode) != depth) {
1986 /* depth was changed. we have to realloc path */
1987 kfree(path);
1988 path = NULL;
1989 }
1990
1980 path = ext4_ext_find_extent(inode, block, path); 1991 path = ext4_ext_find_extent(inode, block, path);
1981 up_read(&EXT4_I(inode)->i_data_sem);
1982 if (IS_ERR(path)) { 1992 if (IS_ERR(path)) {
1993 up_read(&EXT4_I(inode)->i_data_sem);
1983 err = PTR_ERR(path); 1994 err = PTR_ERR(path);
1984 path = NULL; 1995 path = NULL;
1985 break; 1996 break;
@@ -1987,13 +1998,16 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1987 1998
1988 depth = ext_depth(inode); 1999 depth = ext_depth(inode);
1989 if (unlikely(path[depth].p_hdr == NULL)) { 2000 if (unlikely(path[depth].p_hdr == NULL)) {
2001 up_read(&EXT4_I(inode)->i_data_sem);
1990 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); 2002 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
1991 err = -EIO; 2003 err = -EIO;
1992 break; 2004 break;
1993 } 2005 }
1994 ex = path[depth].p_ext; 2006 ex = path[depth].p_ext;
1995 next = ext4_ext_next_allocated_block(path); 2007 next = ext4_ext_next_allocated_block(path);
2008 ext4_ext_drop_refs(path);
1996 2009
2010 flags = 0;
1997 exists = 0; 2011 exists = 0;
1998 if (!ex) { 2012 if (!ex) {
1999 /* there is no extent yet, so try to allocate 2013 /* there is no extent yet, so try to allocate
@@ -2030,40 +2044,64 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
2030 BUG_ON(end <= start); 2044 BUG_ON(end <= start);
2031 2045
2032 if (!exists) { 2046 if (!exists) {
2033 cbex.ec_block = start; 2047 newex.ec_block = start;
2034 cbex.ec_len = end - start; 2048 newex.ec_len = end - start;
2035 cbex.ec_start = 0; 2049 newex.ec_start = 0;
2036 } else { 2050 } else {
2037 cbex.ec_block = le32_to_cpu(ex->ee_block); 2051 newex.ec_block = le32_to_cpu(ex->ee_block);
2038 cbex.ec_len = ext4_ext_get_actual_len(ex); 2052 newex.ec_len = ext4_ext_get_actual_len(ex);
2039 cbex.ec_start = ext4_ext_pblock(ex); 2053 newex.ec_start = ext4_ext_pblock(ex);
2054 if (ext4_ext_is_uninitialized(ex))
2055 flags |= FIEMAP_EXTENT_UNWRITTEN;
2040 } 2056 }
2041 2057
2042 if (unlikely(cbex.ec_len == 0)) { 2058 /*
2043 EXT4_ERROR_INODE(inode, "cbex.ec_len == 0"); 2059 * Find delayed extent and update newex accordingly. We call
2044 err = -EIO; 2060 * it even in !exists case to find out whether newex is the
2045 break; 2061 * last existing extent or not.
2062 */
2063 next_del = ext4_find_delayed_extent(inode, &newex);
2064 if (!exists && next_del) {
2065 exists = 1;
2066 flags |= FIEMAP_EXTENT_DELALLOC;
2046 } 2067 }
2047 err = func(inode, next, &cbex, ex, cbdata); 2068 up_read(&EXT4_I(inode)->i_data_sem);
2048 ext4_ext_drop_refs(path);
2049 2069
2050 if (err < 0) 2070 if (unlikely(newex.ec_len == 0)) {
2071 EXT4_ERROR_INODE(inode, "newex.ec_len == 0");
2072 err = -EIO;
2051 break; 2073 break;
2074 }
2052 2075
2053 if (err == EXT_REPEAT) 2076 /* This is possible iff next == next_del == EXT_MAX_BLOCKS */
2054 continue; 2077 if (next == next_del) {
2055 else if (err == EXT_BREAK) { 2078 flags |= FIEMAP_EXTENT_LAST;
2056 err = 0; 2079 if (unlikely(next_del != EXT_MAX_BLOCKS ||
2057 break; 2080 next != EXT_MAX_BLOCKS)) {
2081 EXT4_ERROR_INODE(inode,
2082 "next extent == %u, next "
2083 "delalloc extent = %u",
2084 next, next_del);
2085 err = -EIO;
2086 break;
2087 }
2058 } 2088 }
2059 2089
2060 if (ext_depth(inode) != depth) { 2090 if (exists) {
2061 /* depth was changed. we have to realloc path */ 2091 err = fiemap_fill_next_extent(fieinfo,
2062 kfree(path); 2092 (__u64)newex.ec_block << blksize_bits,
2063 path = NULL; 2093 (__u64)newex.ec_start << blksize_bits,
2094 (__u64)newex.ec_len << blksize_bits,
2095 flags);
2096 if (err < 0)
2097 break;
2098 if (err == 1) {
2099 err = 0;
2100 break;
2101 }
2064 } 2102 }
2065 2103
2066 block = cbex.ec_block + cbex.ec_len; 2104 block = newex.ec_block + newex.ec_len;
2067 } 2105 }
2068 2106
2069 if (path) { 2107 if (path) {
@@ -2156,7 +2194,6 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
2156 struct ext4_extent *ex) 2194 struct ext4_extent *ex)
2157{ 2195{
2158 struct ext4_ext_cache *cex; 2196 struct ext4_ext_cache *cex;
2159 struct ext4_sb_info *sbi;
2160 int ret = 0; 2197 int ret = 0;
2161 2198
2162 /* 2199 /*
@@ -2164,7 +2201,6 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
2164 */ 2201 */
2165 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 2202 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
2166 cex = &EXT4_I(inode)->i_cached_extent; 2203 cex = &EXT4_I(inode)->i_cached_extent;
2167 sbi = EXT4_SB(inode->i_sb);
2168 2204
2169 /* has cache valid data? */ 2205 /* has cache valid data? */
2170 if (cex->ec_len == 0) 2206 if (cex->ec_len == 0)
@@ -2273,7 +2309,13 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
2273int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) 2309int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
2274{ 2310{
2275 int index; 2311 int index;
2276 int depth = ext_depth(inode); 2312 int depth;
2313
2314 /* If we are converting the inline data, only one is needed here. */
2315 if (ext4_has_inline_data(inode))
2316 return 1;
2317
2318 depth = ext_depth(inode);
2277 2319
2278 if (chunk) 2320 if (chunk)
2279 index = depth * 2; 2321 index = depth * 2;
@@ -3461,115 +3503,34 @@ out:
3461/** 3503/**
3462 * ext4_find_delalloc_range: find delayed allocated block in the given range. 3504 * ext4_find_delalloc_range: find delayed allocated block in the given range.
3463 * 3505 *
3464 * Goes through the buffer heads in the range [lblk_start, lblk_end] and returns 3506 * Return 1 if there is a delalloc block in the range, otherwise 0.
3465 * whether there are any buffers marked for delayed allocation. It returns '1'
3466 * on the first delalloc'ed buffer head found. If no buffer head in the given
3467 * range is marked for delalloc, it returns 0.
3468 * lblk_start should always be <= lblk_end.
3469 * search_hint_reverse is to indicate that searching in reverse from lblk_end to
3470 * lblk_start might be more efficient (i.e., we will likely hit the delalloc'ed
3471 * block sooner). This is useful when blocks are truncated sequentially from
3472 * lblk_start towards lblk_end.
3473 */ 3507 */
3474static int ext4_find_delalloc_range(struct inode *inode, 3508static int ext4_find_delalloc_range(struct inode *inode,
3475 ext4_lblk_t lblk_start, 3509 ext4_lblk_t lblk_start,
3476 ext4_lblk_t lblk_end, 3510 ext4_lblk_t lblk_end)
3477 int search_hint_reverse)
3478{ 3511{
3479 struct address_space *mapping = inode->i_mapping; 3512 struct extent_status es;
3480 struct buffer_head *head, *bh = NULL;
3481 struct page *page;
3482 ext4_lblk_t i, pg_lblk;
3483 pgoff_t index;
3484
3485 if (!test_opt(inode->i_sb, DELALLOC))
3486 return 0;
3487
3488 /* reverse search wont work if fs block size is less than page size */
3489 if (inode->i_blkbits < PAGE_CACHE_SHIFT)
3490 search_hint_reverse = 0;
3491 3513
3492 if (search_hint_reverse) 3514 es.start = lblk_start;
3493 i = lblk_end; 3515 ext4_es_find_extent(inode, &es);
3516 if (es.len == 0)
3517 return 0; /* there is no delay extent in this tree */
3518 else if (es.start <= lblk_start && lblk_start < es.start + es.len)
3519 return 1;
3520 else if (lblk_start <= es.start && es.start <= lblk_end)
3521 return 1;
3494 else 3522 else
3495 i = lblk_start; 3523 return 0;
3496
3497 index = i >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
3498
3499 while ((i >= lblk_start) && (i <= lblk_end)) {
3500 page = find_get_page(mapping, index);
3501 if (!page)
3502 goto nextpage;
3503
3504 if (!page_has_buffers(page))
3505 goto nextpage;
3506
3507 head = page_buffers(page);
3508 if (!head)
3509 goto nextpage;
3510
3511 bh = head;
3512 pg_lblk = index << (PAGE_CACHE_SHIFT -
3513 inode->i_blkbits);
3514 do {
3515 if (unlikely(pg_lblk < lblk_start)) {
3516 /*
3517 * This is possible when fs block size is less
3518 * than page size and our cluster starts/ends in
3519 * middle of the page. So we need to skip the
3520 * initial few blocks till we reach the 'lblk'
3521 */
3522 pg_lblk++;
3523 continue;
3524 }
3525
3526 /* Check if the buffer is delayed allocated and that it
3527 * is not yet mapped. (when da-buffers are mapped during
3528 * their writeout, their da_mapped bit is set.)
3529 */
3530 if (buffer_delay(bh) && !buffer_da_mapped(bh)) {
3531 page_cache_release(page);
3532 trace_ext4_find_delalloc_range(inode,
3533 lblk_start, lblk_end,
3534 search_hint_reverse,
3535 1, i);
3536 return 1;
3537 }
3538 if (search_hint_reverse)
3539 i--;
3540 else
3541 i++;
3542 } while ((i >= lblk_start) && (i <= lblk_end) &&
3543 ((bh = bh->b_this_page) != head));
3544nextpage:
3545 if (page)
3546 page_cache_release(page);
3547 /*
3548 * Move to next page. 'i' will be the first lblk in the next
3549 * page.
3550 */
3551 if (search_hint_reverse)
3552 index--;
3553 else
3554 index++;
3555 i = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
3556 }
3557
3558 trace_ext4_find_delalloc_range(inode, lblk_start, lblk_end,
3559 search_hint_reverse, 0, 0);
3560 return 0;
3561} 3524}
3562 3525
3563int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk, 3526int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk)
3564 int search_hint_reverse)
3565{ 3527{
3566 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 3528 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
3567 ext4_lblk_t lblk_start, lblk_end; 3529 ext4_lblk_t lblk_start, lblk_end;
3568 lblk_start = lblk & (~(sbi->s_cluster_ratio - 1)); 3530 lblk_start = lblk & (~(sbi->s_cluster_ratio - 1));
3569 lblk_end = lblk_start + sbi->s_cluster_ratio - 1; 3531 lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
3570 3532
3571 return ext4_find_delalloc_range(inode, lblk_start, lblk_end, 3533 return ext4_find_delalloc_range(inode, lblk_start, lblk_end);
3572 search_hint_reverse);
3573} 3534}
3574 3535
3575/** 3536/**
@@ -3630,7 +3591,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
3630 lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1)); 3591 lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1));
3631 lblk_to = lblk_from + c_offset - 1; 3592 lblk_to = lblk_from + c_offset - 1;
3632 3593
3633 if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0)) 3594 if (ext4_find_delalloc_range(inode, lblk_from, lblk_to))
3634 allocated_clusters--; 3595 allocated_clusters--;
3635 } 3596 }
3636 3597
@@ -3640,7 +3601,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
3640 lblk_from = lblk_start + num_blks; 3601 lblk_from = lblk_start + num_blks;
3641 lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1; 3602 lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1;
3642 3603
3643 if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0)) 3604 if (ext4_find_delalloc_range(inode, lblk_from, lblk_to))
3644 allocated_clusters--; 3605 allocated_clusters--;
3645 } 3606 }
3646 3607
@@ -3663,8 +3624,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3663 flags, allocated); 3624 flags, allocated);
3664 ext4_ext_show_leaf(inode, path); 3625 ext4_ext_show_leaf(inode, path);
3665 3626
3666 trace_ext4_ext_handle_uninitialized_extents(inode, map, allocated, 3627 trace_ext4_ext_handle_uninitialized_extents(inode, map, flags,
3667 newblock); 3628 allocated, newblock);
3668 3629
3669 /* get_block() before submit the IO, split the extent */ 3630 /* get_block() before submit the IO, split the extent */
3670 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3631 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
@@ -3911,7 +3872,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3911 struct ext4_extent newex, *ex, *ex2; 3872 struct ext4_extent newex, *ex, *ex2;
3912 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 3873 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
3913 ext4_fsblk_t newblock = 0; 3874 ext4_fsblk_t newblock = 0;
3914 int free_on_err = 0, err = 0, depth, ret; 3875 int free_on_err = 0, err = 0, depth;
3915 unsigned int allocated = 0, offset = 0; 3876 unsigned int allocated = 0, offset = 0;
3916 unsigned int allocated_clusters = 0; 3877 unsigned int allocated_clusters = 0;
3917 struct ext4_allocation_request ar; 3878 struct ext4_allocation_request ar;
@@ -3927,7 +3888,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3927 if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) { 3888 if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
3928 if (!newex.ee_start_lo && !newex.ee_start_hi) { 3889 if (!newex.ee_start_lo && !newex.ee_start_hi) {
3929 if ((sbi->s_cluster_ratio > 1) && 3890 if ((sbi->s_cluster_ratio > 1) &&
3930 ext4_find_delalloc_cluster(inode, map->m_lblk, 0)) 3891 ext4_find_delalloc_cluster(inode, map->m_lblk))
3931 map->m_flags |= EXT4_MAP_FROM_CLUSTER; 3892 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3932 3893
3933 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { 3894 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
@@ -4007,15 +3968,15 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4007 ee_len, ee_start); 3968 ee_len, ee_start);
4008 goto out; 3969 goto out;
4009 } 3970 }
4010 ret = ext4_ext_handle_uninitialized_extents( 3971 allocated = ext4_ext_handle_uninitialized_extents(
4011 handle, inode, map, path, flags, 3972 handle, inode, map, path, flags,
4012 allocated, newblock); 3973 allocated, newblock);
4013 return ret; 3974 goto out3;
4014 } 3975 }
4015 } 3976 }
4016 3977
4017 if ((sbi->s_cluster_ratio > 1) && 3978 if ((sbi->s_cluster_ratio > 1) &&
4018 ext4_find_delalloc_cluster(inode, map->m_lblk, 0)) 3979 ext4_find_delalloc_cluster(inode, map->m_lblk))
4019 map->m_flags |= EXT4_MAP_FROM_CLUSTER; 3980 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
4020 3981
4021 /* 3982 /*
@@ -4284,8 +4245,8 @@ out2:
4284 kfree(path); 4245 kfree(path);
4285 } 4246 }
4286 4247
4287 trace_ext4_ext_map_blocks_exit(inode, map->m_lblk, 4248out3:
4288 newblock, map->m_len, err ? err : allocated); 4249 trace_ext4_ext_map_blocks_exit(inode, map, err ? err : allocated);
4289 4250
4290 return err ? err : allocated; 4251 return err ? err : allocated;
4291} 4252}
@@ -4344,6 +4305,8 @@ void ext4_ext_truncate(struct inode *inode)
4344 4305
4345 last_block = (inode->i_size + sb->s_blocksize - 1) 4306 last_block = (inode->i_size + sb->s_blocksize - 1)
4346 >> EXT4_BLOCK_SIZE_BITS(sb); 4307 >> EXT4_BLOCK_SIZE_BITS(sb);
4308 err = ext4_es_remove_extent(inode, last_block,
4309 EXT_MAX_BLOCKS - last_block);
4347 err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); 4310 err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
4348 4311
4349 /* In a multi-transaction truncate, we only make the final 4312 /* In a multi-transaction truncate, we only make the final
@@ -4434,6 +4397,10 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4434 if (mode & FALLOC_FL_PUNCH_HOLE) 4397 if (mode & FALLOC_FL_PUNCH_HOLE)
4435 return ext4_punch_hole(file, offset, len); 4398 return ext4_punch_hole(file, offset, len);
4436 4399
4400 ret = ext4_convert_inline_data(inode);
4401 if (ret)
4402 return ret;
4403
4437 trace_ext4_fallocate_enter(inode, offset, len, mode); 4404 trace_ext4_fallocate_enter(inode, offset, len, mode);
4438 map.m_lblk = offset >> blkbits; 4405 map.m_lblk = offset >> blkbits;
4439 /* 4406 /*
@@ -4572,206 +4539,43 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
4572} 4539}
4573 4540
4574/* 4541/*
4575 * Callback function called for each extent to gather FIEMAP information. 4542 * If newex is not existing extent (newex->ec_start equals zero) find
4543 * delayed extent at start of newex and update newex accordingly and
4544 * return start of the next delayed extent.
4545 *
4546 * If newex is existing extent (newex->ec_start is not equal zero)
4547 * return start of next delayed extent or EXT_MAX_BLOCKS if no delayed
4548 * extent found. Leave newex unmodified.
4576 */ 4549 */
4577static int ext4_ext_fiemap_cb(struct inode *inode, ext4_lblk_t next, 4550static int ext4_find_delayed_extent(struct inode *inode,
4578 struct ext4_ext_cache *newex, struct ext4_extent *ex, 4551 struct ext4_ext_cache *newex)
4579 void *data)
4580{ 4552{
4581 __u64 logical; 4553 struct extent_status es;
4582 __u64 physical; 4554 ext4_lblk_t next_del;
4583 __u64 length;
4584 __u32 flags = 0;
4585 int ret = 0;
4586 struct fiemap_extent_info *fieinfo = data;
4587 unsigned char blksize_bits;
4588 4555
4589 blksize_bits = inode->i_sb->s_blocksize_bits; 4556 es.start = newex->ec_block;
4590 logical = (__u64)newex->ec_block << blksize_bits; 4557 next_del = ext4_es_find_extent(inode, &es);
4591 4558
4592 if (newex->ec_start == 0) { 4559 if (newex->ec_start == 0) {
4593 /* 4560 /*
4594 * No extent in extent-tree contains block @newex->ec_start, 4561 * No extent in extent-tree contains block @newex->ec_start,
4595 * then the block may stay in 1)a hole or 2)delayed-extent. 4562 * then the block may stay in 1)a hole or 2)delayed-extent.
4596 *
4597 * Holes or delayed-extents are processed as follows.
4598 * 1. lookup dirty pages with specified range in pagecache.
4599 * If no page is got, then there is no delayed-extent and
4600 * return with EXT_CONTINUE.
4601 * 2. find the 1st mapped buffer,
4602 * 3. check if the mapped buffer is both in the request range
4603 * and a delayed buffer. If not, there is no delayed-extent,
4604 * then return.
4605 * 4. a delayed-extent is found, the extent will be collected.
4606 */ 4563 */
4607 ext4_lblk_t end = 0; 4564 if (es.len == 0)
4608 pgoff_t last_offset; 4565 /* A hole found. */
4609 pgoff_t offset; 4566 return 0;
4610 pgoff_t index;
4611 pgoff_t start_index = 0;
4612 struct page **pages = NULL;
4613 struct buffer_head *bh = NULL;
4614 struct buffer_head *head = NULL;
4615 unsigned int nr_pages = PAGE_SIZE / sizeof(struct page *);
4616
4617 pages = kmalloc(PAGE_SIZE, GFP_KERNEL);
4618 if (pages == NULL)
4619 return -ENOMEM;
4620
4621 offset = logical >> PAGE_SHIFT;
4622repeat:
4623 last_offset = offset;
4624 head = NULL;
4625 ret = find_get_pages_tag(inode->i_mapping, &offset,
4626 PAGECACHE_TAG_DIRTY, nr_pages, pages);
4627
4628 if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
4629 /* First time, try to find a mapped buffer. */
4630 if (ret == 0) {
4631out:
4632 for (index = 0; index < ret; index++)
4633 page_cache_release(pages[index]);
4634 /* just a hole. */
4635 kfree(pages);
4636 return EXT_CONTINUE;
4637 }
4638 index = 0;
4639
4640next_page:
4641 /* Try to find the 1st mapped buffer. */
4642 end = ((__u64)pages[index]->index << PAGE_SHIFT) >>
4643 blksize_bits;
4644 if (!page_has_buffers(pages[index]))
4645 goto out;
4646 head = page_buffers(pages[index]);
4647 if (!head)
4648 goto out;
4649
4650 index++;
4651 bh = head;
4652 do {
4653 if (end >= newex->ec_block +
4654 newex->ec_len)
4655 /* The buffer is out of
4656 * the request range.
4657 */
4658 goto out;
4659
4660 if (buffer_mapped(bh) &&
4661 end >= newex->ec_block) {
4662 start_index = index - 1;
4663 /* get the 1st mapped buffer. */
4664 goto found_mapped_buffer;
4665 }
4666
4667 bh = bh->b_this_page;
4668 end++;
4669 } while (bh != head);
4670
4671 /* No mapped buffer in the range found in this page,
4672 * We need to look up next page.
4673 */
4674 if (index >= ret) {
4675 /* There is no page left, but we need to limit
4676 * newex->ec_len.
4677 */
4678 newex->ec_len = end - newex->ec_block;
4679 goto out;
4680 }
4681 goto next_page;
4682 } else {
4683 /*Find contiguous delayed buffers. */
4684 if (ret > 0 && pages[0]->index == last_offset)
4685 head = page_buffers(pages[0]);
4686 bh = head;
4687 index = 1;
4688 start_index = 0;
4689 }
4690
4691found_mapped_buffer:
4692 if (bh != NULL && buffer_delay(bh)) {
4693 /* 1st or contiguous delayed buffer found. */
4694 if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
4695 /*
4696 * 1st delayed buffer found, record
4697 * the start of extent.
4698 */
4699 flags |= FIEMAP_EXTENT_DELALLOC;
4700 newex->ec_block = end;
4701 logical = (__u64)end << blksize_bits;
4702 }
4703 /* Find contiguous delayed buffers. */
4704 do {
4705 if (!buffer_delay(bh))
4706 goto found_delayed_extent;
4707 bh = bh->b_this_page;
4708 end++;
4709 } while (bh != head);
4710
4711 for (; index < ret; index++) {
4712 if (!page_has_buffers(pages[index])) {
4713 bh = NULL;
4714 break;
4715 }
4716 head = page_buffers(pages[index]);
4717 if (!head) {
4718 bh = NULL;
4719 break;
4720 }
4721
4722 if (pages[index]->index !=
4723 pages[start_index]->index + index
4724 - start_index) {
4725 /* Blocks are not contiguous. */
4726 bh = NULL;
4727 break;
4728 }
4729 bh = head;
4730 do {
4731 if (!buffer_delay(bh))
4732 /* Delayed-extent ends. */
4733 goto found_delayed_extent;
4734 bh = bh->b_this_page;
4735 end++;
4736 } while (bh != head);
4737 }
4738 } else if (!(flags & FIEMAP_EXTENT_DELALLOC))
4739 /* a hole found. */
4740 goto out;
4741 4567
4742found_delayed_extent: 4568 if (es.start > newex->ec_block) {
4743 newex->ec_len = min(end - newex->ec_block, 4569 /* A hole found. */
4744 (ext4_lblk_t)EXT_INIT_MAX_LEN); 4570 newex->ec_len = min(es.start - newex->ec_block,
4745 if (ret == nr_pages && bh != NULL && 4571 newex->ec_len);
4746 newex->ec_len < EXT_INIT_MAX_LEN && 4572 return 0;
4747 buffer_delay(bh)) {
4748 /* Have not collected an extent and continue. */
4749 for (index = 0; index < ret; index++)
4750 page_cache_release(pages[index]);
4751 goto repeat;
4752 } 4573 }
4753 4574
4754 for (index = 0; index < ret; index++) 4575 newex->ec_len = es.start + es.len - newex->ec_block;
4755 page_cache_release(pages[index]);
4756 kfree(pages);
4757 } 4576 }
4758 4577
4759 physical = (__u64)newex->ec_start << blksize_bits; 4578 return next_del;
4760 length = (__u64)newex->ec_len << blksize_bits;
4761
4762 if (ex && ext4_ext_is_uninitialized(ex))
4763 flags |= FIEMAP_EXTENT_UNWRITTEN;
4764
4765 if (next == EXT_MAX_BLOCKS)
4766 flags |= FIEMAP_EXTENT_LAST;
4767
4768 ret = fiemap_fill_next_extent(fieinfo, logical, physical,
4769 length, flags);
4770 if (ret < 0)
4771 return ret;
4772 if (ret == 1)
4773 return EXT_BREAK;
4774 return EXT_CONTINUE;
4775} 4579}
4776/* fiemap flags we can handle specified here */ 4580/* fiemap flags we can handle specified here */
4777#define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) 4581#define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
@@ -4971,6 +4775,8 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4971 ext4_ext_invalidate_cache(inode); 4775 ext4_ext_invalidate_cache(inode);
4972 ext4_discard_preallocations(inode); 4776 ext4_discard_preallocations(inode);
4973 4777
4778 err = ext4_es_remove_extent(inode, first_block,
4779 stop_block - first_block);
4974 err = ext4_ext_remove_space(inode, first_block, stop_block - 1); 4780 err = ext4_ext_remove_space(inode, first_block, stop_block - 1);
4975 4781
4976 ext4_ext_invalidate_cache(inode); 4782 ext4_ext_invalidate_cache(inode);
@@ -4991,12 +4797,22 @@ out_mutex:
4991 mutex_unlock(&inode->i_mutex); 4797 mutex_unlock(&inode->i_mutex);
4992 return err; 4798 return err;
4993} 4799}
4800
4994int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 4801int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4995 __u64 start, __u64 len) 4802 __u64 start, __u64 len)
4996{ 4803{
4997 ext4_lblk_t start_blk; 4804 ext4_lblk_t start_blk;
4998 int error = 0; 4805 int error = 0;
4999 4806
4807 if (ext4_has_inline_data(inode)) {
4808 int has_inline = 1;
4809
4810 error = ext4_inline_data_fiemap(inode, fieinfo, &has_inline);
4811
4812 if (has_inline)
4813 return error;
4814 }
4815
5000 /* fallback to generic here if not in extents fmt */ 4816 /* fallback to generic here if not in extents fmt */
5001 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 4817 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
5002 return generic_block_fiemap(inode, fieinfo, start, len, 4818 return generic_block_fiemap(inode, fieinfo, start, len,
@@ -5018,11 +4834,11 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
5018 len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1; 4834 len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
5019 4835
5020 /* 4836 /*
5021 * Walk the extent tree gathering extent information. 4837 * Walk the extent tree gathering extent information
5022 * ext4_ext_fiemap_cb will push extents back to user. 4838 * and pushing extents back to the user.
5023 */ 4839 */
5024 error = ext4_ext_walk_space(inode, start_blk, len_blks, 4840 error = ext4_fill_fiemap_extents(inode, start_blk,
5025 ext4_ext_fiemap_cb, fieinfo); 4841 len_blks, fieinfo);
5026 } 4842 }
5027 4843
5028 return error; 4844 return error;
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
new file mode 100644
index 000000000000..564d981a2fcc
--- /dev/null
+++ b/fs/ext4/extents_status.c
@@ -0,0 +1,500 @@
1/*
2 * fs/ext4/extents_status.c
3 *
4 * Written by Yongqiang Yang <xiaoqiangnk@gmail.com>
5 * Modified by
6 * Allison Henderson <achender@linux.vnet.ibm.com>
7 * Hugh Dickins <hughd@google.com>
8 * Zheng Liu <wenqing.lz@taobao.com>
9 *
10 * Ext4 extents status tree core functions.
11 */
12#include <linux/rbtree.h>
13#include "ext4.h"
14#include "extents_status.h"
15#include "ext4_extents.h"
16
17#include <trace/events/ext4.h>
18
19/*
20 * According to previous discussion in Ext4 Developer Workshop, we
21 * will introduce a new structure called io tree to track all extent
22 * status in order to solve some problems that we have met
23 * (e.g. Reservation space warning), and provide extent-level locking.
24 * Delay extent tree is the first step to achieve this goal. It is
25 * original built by Yongqiang Yang. At that time it is called delay
26 * extent tree, whose goal is only track delay extent in memory to
27 * simplify the implementation of fiemap and bigalloc, and introduce
28 * lseek SEEK_DATA/SEEK_HOLE support. That is why it is still called
29 * delay extent tree at the following comment. But for better
30 * understand what it does, it has been rename to extent status tree.
31 *
32 * Currently the first step has been done. All delay extents are
33 * tracked in the tree. It maintains the delay extent when a delay
34 * allocation is issued, and the delay extent is written out or
35 * invalidated. Therefore the implementation of fiemap and bigalloc
36 * are simplified, and SEEK_DATA/SEEK_HOLE are introduced.
37 *
38 * The following comment describes the implemenmtation of extent
39 * status tree and future works.
40 */
41
42/*
43 * extents status tree implementation for ext4.
44 *
45 *
46 * ==========================================================================
47 * Extents status encompass delayed extents and extent locks
48 *
49 * 1. Why delayed extent implementation ?
50 *
51 * Without delayed extent, ext4 identifies a delayed extent by looking
52 * up page cache, this has several deficiencies - complicated, buggy,
53 * and inefficient code.
54 *
55 * FIEMAP, SEEK_HOLE/DATA, bigalloc, punch hole and writeout all need
56 * to know if a block or a range of blocks are belonged to a delayed
57 * extent.
58 *
59 * Let us have a look at how they do without delayed extents implementation.
60 * -- FIEMAP
61 * FIEMAP looks up page cache to identify delayed allocations from holes.
62 *
63 * -- SEEK_HOLE/DATA
64 * SEEK_HOLE/DATA has the same problem as FIEMAP.
65 *
66 * -- bigalloc
67 * bigalloc looks up page cache to figure out if a block is
68 * already under delayed allocation or not to determine whether
69 * quota reserving is needed for the cluster.
70 *
71 * -- punch hole
72 * punch hole looks up page cache to identify a delayed extent.
73 *
74 * -- writeout
75 * Writeout looks up whole page cache to see if a buffer is
76 * mapped, If there are not very many delayed buffers, then it is
77 * time comsuming.
78 *
79 * With delayed extents implementation, FIEMAP, SEEK_HOLE/DATA,
80 * bigalloc and writeout can figure out if a block or a range of
81 * blocks is under delayed allocation(belonged to a delayed extent) or
82 * not by searching the delayed extent tree.
83 *
84 *
85 * ==========================================================================
86 * 2. ext4 delayed extents impelmentation
87 *
88 * -- delayed extent
89 * A delayed extent is a range of blocks which are contiguous
90 * logically and under delayed allocation. Unlike extent in
91 * ext4, delayed extent in ext4 is a in-memory struct, there is
92 * no corresponding on-disk data. There is no limit on length of
93 * delayed extent, so a delayed extent can contain as many blocks
94 * as they are contiguous logically.
95 *
96 * -- delayed extent tree
97 * Every inode has a delayed extent tree and all under delayed
98 * allocation blocks are added to the tree as delayed extents.
99 * Delayed extents in the tree are ordered by logical block no.
100 *
101 * -- operations on a delayed extent tree
102 * There are three operations on a delayed extent tree: find next
103 * delayed extent, adding a space(a range of blocks) and removing
104 * a space.
105 *
106 * -- race on a delayed extent tree
107 * Delayed extent tree is protected inode->i_es_lock.
108 *
109 *
110 * ==========================================================================
111 * 3. performance analysis
112 * -- overhead
113 * 1. There is a cache extent for write access, so if writes are
114 * not very random, adding space operaions are in O(1) time.
115 *
116 * -- gain
117 * 2. Code is much simpler, more readable, more maintainable and
118 * more efficient.
119 *
120 *
121 * ==========================================================================
122 * 4. TODO list
123 * -- Track all extent status
124 *
125 * -- Improve get block process
126 *
127 * -- Extent-level locking
128 */
129
130static struct kmem_cache *ext4_es_cachep;
131
132int __init ext4_init_es(void)
133{
134 ext4_es_cachep = KMEM_CACHE(extent_status, SLAB_RECLAIM_ACCOUNT);
135 if (ext4_es_cachep == NULL)
136 return -ENOMEM;
137 return 0;
138}
139
140void ext4_exit_es(void)
141{
142 if (ext4_es_cachep)
143 kmem_cache_destroy(ext4_es_cachep);
144}
145
146void ext4_es_init_tree(struct ext4_es_tree *tree)
147{
148 tree->root = RB_ROOT;
149 tree->cache_es = NULL;
150}
151
152#ifdef ES_DEBUG__
153static void ext4_es_print_tree(struct inode *inode)
154{
155 struct ext4_es_tree *tree;
156 struct rb_node *node;
157
158 printk(KERN_DEBUG "status extents for inode %lu:", inode->i_ino);
159 tree = &EXT4_I(inode)->i_es_tree;
160 node = rb_first(&tree->root);
161 while (node) {
162 struct extent_status *es;
163 es = rb_entry(node, struct extent_status, rb_node);
164 printk(KERN_DEBUG " [%u/%u)", es->start, es->len);
165 node = rb_next(node);
166 }
167 printk(KERN_DEBUG "\n");
168}
169#else
170#define ext4_es_print_tree(inode)
171#endif
172
173static inline ext4_lblk_t extent_status_end(struct extent_status *es)
174{
175 BUG_ON(es->start + es->len < es->start);
176 return es->start + es->len - 1;
177}
178
179/*
180 * search through the tree for an delayed extent with a given offset. If
181 * it can't be found, try to find next extent.
182 */
183static struct extent_status *__es_tree_search(struct rb_root *root,
184 ext4_lblk_t offset)
185{
186 struct rb_node *node = root->rb_node;
187 struct extent_status *es = NULL;
188
189 while (node) {
190 es = rb_entry(node, struct extent_status, rb_node);
191 if (offset < es->start)
192 node = node->rb_left;
193 else if (offset > extent_status_end(es))
194 node = node->rb_right;
195 else
196 return es;
197 }
198
199 if (es && offset < es->start)
200 return es;
201
202 if (es && offset > extent_status_end(es)) {
203 node = rb_next(&es->rb_node);
204 return node ? rb_entry(node, struct extent_status, rb_node) :
205 NULL;
206 }
207
208 return NULL;
209}
210
211/*
212 * ext4_es_find_extent: find the 1st delayed extent covering @es->start
213 * if it exists, otherwise, the next extent after @es->start.
214 *
215 * @inode: the inode which owns delayed extents
216 * @es: delayed extent that we found
217 *
218 * Returns the first block of the next extent after es, otherwise
219 * EXT_MAX_BLOCKS if no delay extent is found.
220 * Delayed extent is returned via @es.
221 */
222ext4_lblk_t ext4_es_find_extent(struct inode *inode, struct extent_status *es)
223{
224 struct ext4_es_tree *tree = NULL;
225 struct extent_status *es1 = NULL;
226 struct rb_node *node;
227 ext4_lblk_t ret = EXT_MAX_BLOCKS;
228
229 trace_ext4_es_find_extent_enter(inode, es->start);
230
231 read_lock(&EXT4_I(inode)->i_es_lock);
232 tree = &EXT4_I(inode)->i_es_tree;
233
234 /* find delay extent in cache firstly */
235 if (tree->cache_es) {
236 es1 = tree->cache_es;
237 if (in_range(es->start, es1->start, es1->len)) {
238 es_debug("%u cached by [%u/%u)\n",
239 es->start, es1->start, es1->len);
240 goto out;
241 }
242 }
243
244 es->len = 0;
245 es1 = __es_tree_search(&tree->root, es->start);
246
247out:
248 if (es1) {
249 tree->cache_es = es1;
250 es->start = es1->start;
251 es->len = es1->len;
252 node = rb_next(&es1->rb_node);
253 if (node) {
254 es1 = rb_entry(node, struct extent_status, rb_node);
255 ret = es1->start;
256 }
257 }
258
259 read_unlock(&EXT4_I(inode)->i_es_lock);
260
261 trace_ext4_es_find_extent_exit(inode, es, ret);
262 return ret;
263}
264
265static struct extent_status *
266ext4_es_alloc_extent(ext4_lblk_t start, ext4_lblk_t len)
267{
268 struct extent_status *es;
269 es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
270 if (es == NULL)
271 return NULL;
272 es->start = start;
273 es->len = len;
274 return es;
275}
276
277static void ext4_es_free_extent(struct extent_status *es)
278{
279 kmem_cache_free(ext4_es_cachep, es);
280}
281
282static struct extent_status *
283ext4_es_try_to_merge_left(struct ext4_es_tree *tree, struct extent_status *es)
284{
285 struct extent_status *es1;
286 struct rb_node *node;
287
288 node = rb_prev(&es->rb_node);
289 if (!node)
290 return es;
291
292 es1 = rb_entry(node, struct extent_status, rb_node);
293 if (es->start == extent_status_end(es1) + 1) {
294 es1->len += es->len;
295 rb_erase(&es->rb_node, &tree->root);
296 ext4_es_free_extent(es);
297 es = es1;
298 }
299
300 return es;
301}
302
303static struct extent_status *
304ext4_es_try_to_merge_right(struct ext4_es_tree *tree, struct extent_status *es)
305{
306 struct extent_status *es1;
307 struct rb_node *node;
308
309 node = rb_next(&es->rb_node);
310 if (!node)
311 return es;
312
313 es1 = rb_entry(node, struct extent_status, rb_node);
314 if (es1->start == extent_status_end(es) + 1) {
315 es->len += es1->len;
316 rb_erase(node, &tree->root);
317 ext4_es_free_extent(es1);
318 }
319
320 return es;
321}
322
323static int __es_insert_extent(struct ext4_es_tree *tree, ext4_lblk_t offset,
324 ext4_lblk_t len)
325{
326 struct rb_node **p = &tree->root.rb_node;
327 struct rb_node *parent = NULL;
328 struct extent_status *es;
329 ext4_lblk_t end = offset + len - 1;
330
331 BUG_ON(end < offset);
332 es = tree->cache_es;
333 if (es && offset == (extent_status_end(es) + 1)) {
334 es_debug("cached by [%u/%u)\n", es->start, es->len);
335 es->len += len;
336 es = ext4_es_try_to_merge_right(tree, es);
337 goto out;
338 } else if (es && es->start == end + 1) {
339 es_debug("cached by [%u/%u)\n", es->start, es->len);
340 es->start = offset;
341 es->len += len;
342 es = ext4_es_try_to_merge_left(tree, es);
343 goto out;
344 } else if (es && es->start <= offset &&
345 end <= extent_status_end(es)) {
346 es_debug("cached by [%u/%u)\n", es->start, es->len);
347 goto out;
348 }
349
350 while (*p) {
351 parent = *p;
352 es = rb_entry(parent, struct extent_status, rb_node);
353
354 if (offset < es->start) {
355 if (es->start == end + 1) {
356 es->start = offset;
357 es->len += len;
358 es = ext4_es_try_to_merge_left(tree, es);
359 goto out;
360 }
361 p = &(*p)->rb_left;
362 } else if (offset > extent_status_end(es)) {
363 if (offset == extent_status_end(es) + 1) {
364 es->len += len;
365 es = ext4_es_try_to_merge_right(tree, es);
366 goto out;
367 }
368 p = &(*p)->rb_right;
369 } else {
370 if (extent_status_end(es) <= end)
371 es->len = offset - es->start + len;
372 goto out;
373 }
374 }
375
376 es = ext4_es_alloc_extent(offset, len);
377 if (!es)
378 return -ENOMEM;
379 rb_link_node(&es->rb_node, parent, p);
380 rb_insert_color(&es->rb_node, &tree->root);
381
382out:
383 tree->cache_es = es;
384 return 0;
385}
386
387/*
388 * ext4_es_insert_extent() adds a space to a delayed extent tree.
389 * Caller holds inode->i_es_lock.
390 *
391 * ext4_es_insert_extent is called by ext4_da_write_begin and
392 * ext4_es_remove_extent.
393 *
394 * Return 0 on success, error code on failure.
395 */
396int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t offset,
397 ext4_lblk_t len)
398{
399 struct ext4_es_tree *tree;
400 int err = 0;
401
402 trace_ext4_es_insert_extent(inode, offset, len);
403 es_debug("add [%u/%u) to extent status tree of inode %lu\n",
404 offset, len, inode->i_ino);
405
406 write_lock(&EXT4_I(inode)->i_es_lock);
407 tree = &EXT4_I(inode)->i_es_tree;
408 err = __es_insert_extent(tree, offset, len);
409 write_unlock(&EXT4_I(inode)->i_es_lock);
410
411 ext4_es_print_tree(inode);
412
413 return err;
414}
415
416/*
417 * ext4_es_remove_extent() removes a space from a delayed extent tree.
418 * Caller holds inode->i_es_lock.
419 *
420 * Return 0 on success, error code on failure.
421 */
422int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t offset,
423 ext4_lblk_t len)
424{
425 struct rb_node *node;
426 struct ext4_es_tree *tree;
427 struct extent_status *es;
428 struct extent_status orig_es;
429 ext4_lblk_t len1, len2, end;
430 int err = 0;
431
432 trace_ext4_es_remove_extent(inode, offset, len);
433 es_debug("remove [%u/%u) from extent status tree of inode %lu\n",
434 offset, len, inode->i_ino);
435
436 end = offset + len - 1;
437 BUG_ON(end < offset);
438 write_lock(&EXT4_I(inode)->i_es_lock);
439 tree = &EXT4_I(inode)->i_es_tree;
440 es = __es_tree_search(&tree->root, offset);
441 if (!es)
442 goto out;
443 if (es->start > end)
444 goto out;
445
446 /* Simply invalidate cache_es. */
447 tree->cache_es = NULL;
448
449 orig_es.start = es->start;
450 orig_es.len = es->len;
451 len1 = offset > es->start ? offset - es->start : 0;
452 len2 = extent_status_end(es) > end ?
453 extent_status_end(es) - end : 0;
454 if (len1 > 0)
455 es->len = len1;
456 if (len2 > 0) {
457 if (len1 > 0) {
458 err = __es_insert_extent(tree, end + 1, len2);
459 if (err) {
460 es->start = orig_es.start;
461 es->len = orig_es.len;
462 goto out;
463 }
464 } else {
465 es->start = end + 1;
466 es->len = len2;
467 }
468 goto out;
469 }
470
471 if (len1 > 0) {
472 node = rb_next(&es->rb_node);
473 if (node)
474 es = rb_entry(node, struct extent_status, rb_node);
475 else
476 es = NULL;
477 }
478
479 while (es && extent_status_end(es) <= end) {
480 node = rb_next(&es->rb_node);
481 rb_erase(&es->rb_node, &tree->root);
482 ext4_es_free_extent(es);
483 if (!node) {
484 es = NULL;
485 break;
486 }
487 es = rb_entry(node, struct extent_status, rb_node);
488 }
489
490 if (es && es->start < end + 1) {
491 len1 = extent_status_end(es) - end;
492 es->start = end + 1;
493 es->len = len1;
494 }
495
496out:
497 write_unlock(&EXT4_I(inode)->i_es_lock);
498 ext4_es_print_tree(inode);
499 return err;
500}
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
new file mode 100644
index 000000000000..077f82db092a
--- /dev/null
+++ b/fs/ext4/extents_status.h
@@ -0,0 +1,45 @@
1/*
2 * fs/ext4/extents_status.h
3 *
4 * Written by Yongqiang Yang <xiaoqiangnk@gmail.com>
5 * Modified by
6 * Allison Henderson <achender@linux.vnet.ibm.com>
7 * Zheng Liu <wenqing.lz@taobao.com>
8 *
9 */
10
11#ifndef _EXT4_EXTENTS_STATUS_H
12#define _EXT4_EXTENTS_STATUS_H
13
14/*
15 * Turn on ES_DEBUG__ to get lots of info about extent status operations.
16 */
17#ifdef ES_DEBUG__
18#define es_debug(fmt, ...) printk(fmt, ##__VA_ARGS__)
19#else
20#define es_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
21#endif
22
23struct extent_status {
24 struct rb_node rb_node;
25 ext4_lblk_t start; /* first block extent covers */
26 ext4_lblk_t len; /* length of extent in block */
27};
28
29struct ext4_es_tree {
30 struct rb_root root;
31 struct extent_status *cache_es; /* recently accessed extent */
32};
33
34extern int __init ext4_init_es(void);
35extern void ext4_exit_es(void);
36extern void ext4_es_init_tree(struct ext4_es_tree *tree);
37
38extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t start,
39 ext4_lblk_t len);
40extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t start,
41 ext4_lblk_t len);
42extern ext4_lblk_t ext4_es_find_extent(struct inode *inode,
43 struct extent_status *es);
44
45#endif /* _EXT4_EXTENTS_STATUS_H */
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index bf3966bccd34..b64a60bf105a 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -24,6 +24,7 @@
24#include <linux/mount.h> 24#include <linux/mount.h>
25#include <linux/path.h> 25#include <linux/path.h>
26#include <linux/quotaops.h> 26#include <linux/quotaops.h>
27#include <linux/pagevec.h>
27#include "ext4.h" 28#include "ext4.h"
28#include "ext4_jbd2.h" 29#include "ext4_jbd2.h"
29#include "xattr.h" 30#include "xattr.h"
@@ -286,6 +287,324 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
286} 287}
287 288
288/* 289/*
290 * Here we use ext4_map_blocks() to get a block mapping for a extent-based
291 * file rather than ext4_ext_walk_space() because we can introduce
292 * SEEK_DATA/SEEK_HOLE for block-mapped and extent-mapped file at the same
293 * function. When extent status tree has been fully implemented, it will
294 * track all extent status for a file and we can directly use it to
295 * retrieve the offset for SEEK_DATA/SEEK_HOLE.
296 */
297
298/*
299 * When we retrieve the offset for SEEK_DATA/SEEK_HOLE, we would need to
300 * lookup page cache to check whether or not there has some data between
301 * [startoff, endoff] because, if this range contains an unwritten extent,
302 * we determine this extent as a data or a hole according to whether the
303 * page cache has data or not.
304 */
305static int ext4_find_unwritten_pgoff(struct inode *inode,
306 int origin,
307 struct ext4_map_blocks *map,
308 loff_t *offset)
309{
310 struct pagevec pvec;
311 unsigned int blkbits;
312 pgoff_t index;
313 pgoff_t end;
314 loff_t endoff;
315 loff_t startoff;
316 loff_t lastoff;
317 int found = 0;
318
319 blkbits = inode->i_sb->s_blocksize_bits;
320 startoff = *offset;
321 lastoff = startoff;
322 endoff = (map->m_lblk + map->m_len) << blkbits;
323
324 index = startoff >> PAGE_CACHE_SHIFT;
325 end = endoff >> PAGE_CACHE_SHIFT;
326
327 pagevec_init(&pvec, 0);
328 do {
329 int i, num;
330 unsigned long nr_pages;
331
332 num = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
333 nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
334 (pgoff_t)num);
335 if (nr_pages == 0) {
336 if (origin == SEEK_DATA)
337 break;
338
339 BUG_ON(origin != SEEK_HOLE);
340 /*
341 * If this is the first time to go into the loop and
342 * offset is not beyond the end offset, it will be a
343 * hole at this offset
344 */
345 if (lastoff == startoff || lastoff < endoff)
346 found = 1;
347 break;
348 }
349
350 /*
351 * If this is the first time to go into the loop and
352 * offset is smaller than the first page offset, it will be a
353 * hole at this offset.
354 */
355 if (lastoff == startoff && origin == SEEK_HOLE &&
356 lastoff < page_offset(pvec.pages[0])) {
357 found = 1;
358 break;
359 }
360
361 for (i = 0; i < nr_pages; i++) {
362 struct page *page = pvec.pages[i];
363 struct buffer_head *bh, *head;
364
365 /*
366 * If the current offset is not beyond the end of given
367 * range, it will be a hole.
368 */
369 if (lastoff < endoff && origin == SEEK_HOLE &&
370 page->index > end) {
371 found = 1;
372 *offset = lastoff;
373 goto out;
374 }
375
376 lock_page(page);
377
378 if (unlikely(page->mapping != inode->i_mapping)) {
379 unlock_page(page);
380 continue;
381 }
382
383 if (!page_has_buffers(page)) {
384 unlock_page(page);
385 continue;
386 }
387
388 if (page_has_buffers(page)) {
389 lastoff = page_offset(page);
390 bh = head = page_buffers(page);
391 do {
392 if (buffer_uptodate(bh) ||
393 buffer_unwritten(bh)) {
394 if (origin == SEEK_DATA)
395 found = 1;
396 } else {
397 if (origin == SEEK_HOLE)
398 found = 1;
399 }
400 if (found) {
401 *offset = max_t(loff_t,
402 startoff, lastoff);
403 unlock_page(page);
404 goto out;
405 }
406 lastoff += bh->b_size;
407 bh = bh->b_this_page;
408 } while (bh != head);
409 }
410
411 lastoff = page_offset(page) + PAGE_SIZE;
412 unlock_page(page);
413 }
414
415 /*
416 * The no. of pages is less than our desired, that would be a
417 * hole in there.
418 */
419 if (nr_pages < num && origin == SEEK_HOLE) {
420 found = 1;
421 *offset = lastoff;
422 break;
423 }
424
425 index = pvec.pages[i - 1]->index + 1;
426 pagevec_release(&pvec);
427 } while (index <= end);
428
429out:
430 pagevec_release(&pvec);
431 return found;
432}
433
434/*
435 * ext4_seek_data() retrieves the offset for SEEK_DATA.
436 */
437static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
438{
439 struct inode *inode = file->f_mapping->host;
440 struct ext4_map_blocks map;
441 struct extent_status es;
442 ext4_lblk_t start, last, end;
443 loff_t dataoff, isize;
444 int blkbits;
445 int ret = 0;
446
447 mutex_lock(&inode->i_mutex);
448
449 isize = i_size_read(inode);
450 if (offset >= isize) {
451 mutex_unlock(&inode->i_mutex);
452 return -ENXIO;
453 }
454
455 blkbits = inode->i_sb->s_blocksize_bits;
456 start = offset >> blkbits;
457 last = start;
458 end = isize >> blkbits;
459 dataoff = offset;
460
461 do {
462 map.m_lblk = last;
463 map.m_len = end - last + 1;
464 ret = ext4_map_blocks(NULL, inode, &map, 0);
465 if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
466 if (last != start)
467 dataoff = last << blkbits;
468 break;
469 }
470
471 /*
472 * If there is a delay extent at this offset,
473 * it will be as a data.
474 */
475 es.start = last;
476 (void)ext4_es_find_extent(inode, &es);
477 if (last >= es.start &&
478 last < es.start + es.len) {
479 if (last != start)
480 dataoff = last << blkbits;
481 break;
482 }
483
484 /*
485 * If there is a unwritten extent at this offset,
486 * it will be as a data or a hole according to page
487 * cache that has data or not.
488 */
489 if (map.m_flags & EXT4_MAP_UNWRITTEN) {
490 int unwritten;
491 unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA,
492 &map, &dataoff);
493 if (unwritten)
494 break;
495 }
496
497 last++;
498 dataoff = last << blkbits;
499 } while (last <= end);
500
501 mutex_unlock(&inode->i_mutex);
502
503 if (dataoff > isize)
504 return -ENXIO;
505
506 if (dataoff < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
507 return -EINVAL;
508 if (dataoff > maxsize)
509 return -EINVAL;
510
511 if (dataoff != file->f_pos) {
512 file->f_pos = dataoff;
513 file->f_version = 0;
514 }
515
516 return dataoff;
517}
518
519/*
520 * ext4_seek_hole() retrieves the offset for SEEK_HOLE.
521 */
522static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
523{
524 struct inode *inode = file->f_mapping->host;
525 struct ext4_map_blocks map;
526 struct extent_status es;
527 ext4_lblk_t start, last, end;
528 loff_t holeoff, isize;
529 int blkbits;
530 int ret = 0;
531
532 mutex_lock(&inode->i_mutex);
533
534 isize = i_size_read(inode);
535 if (offset >= isize) {
536 mutex_unlock(&inode->i_mutex);
537 return -ENXIO;
538 }
539
540 blkbits = inode->i_sb->s_blocksize_bits;
541 start = offset >> blkbits;
542 last = start;
543 end = isize >> blkbits;
544 holeoff = offset;
545
546 do {
547 map.m_lblk = last;
548 map.m_len = end - last + 1;
549 ret = ext4_map_blocks(NULL, inode, &map, 0);
550 if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
551 last += ret;
552 holeoff = last << blkbits;
553 continue;
554 }
555
556 /*
557 * If there is a delay extent at this offset,
558 * we will skip this extent.
559 */
560 es.start = last;
561 (void)ext4_es_find_extent(inode, &es);
562 if (last >= es.start &&
563 last < es.start + es.len) {
564 last = es.start + es.len;
565 holeoff = last << blkbits;
566 continue;
567 }
568
569 /*
570 * If there is a unwritten extent at this offset,
571 * it will be as a data or a hole according to page
572 * cache that has data or not.
573 */
574 if (map.m_flags & EXT4_MAP_UNWRITTEN) {
575 int unwritten;
576 unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
577 &map, &holeoff);
578 if (!unwritten) {
579 last += ret;
580 holeoff = last << blkbits;
581 continue;
582 }
583 }
584
585 /* find a hole */
586 break;
587 } while (last <= end);
588
589 mutex_unlock(&inode->i_mutex);
590
591 if (holeoff > isize)
592 holeoff = isize;
593
594 if (holeoff < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
595 return -EINVAL;
596 if (holeoff > maxsize)
597 return -EINVAL;
598
599 if (holeoff != file->f_pos) {
600 file->f_pos = holeoff;
601 file->f_version = 0;
602 }
603
604 return holeoff;
605}
606
607/*
289 * ext4_llseek() handles both block-mapped and extent-mapped maxbytes values 608 * ext4_llseek() handles both block-mapped and extent-mapped maxbytes values
290 * by calling generic_file_llseek_size() with the appropriate maxbytes 609 * by calling generic_file_llseek_size() with the appropriate maxbytes
291 * value for each. 610 * value for each.
@@ -300,8 +619,19 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
300 else 619 else
301 maxbytes = inode->i_sb->s_maxbytes; 620 maxbytes = inode->i_sb->s_maxbytes;
302 621
303 return generic_file_llseek_size(file, offset, origin, 622 switch (origin) {
304 maxbytes, i_size_read(inode)); 623 case SEEK_SET:
624 case SEEK_CUR:
625 case SEEK_END:
626 return generic_file_llseek_size(file, offset, origin,
627 maxbytes, i_size_read(inode));
628 case SEEK_DATA:
629 return ext4_seek_data(file, offset, maxbytes);
630 case SEEK_HOLE:
631 return ext4_seek_hole(file, offset, maxbytes);
632 }
633
634 return -EINVAL;
305} 635}
306 636
307const struct file_operations ext4_file_operations = { 637const struct file_operations ext4_file_operations = {
@@ -326,12 +656,10 @@ const struct file_operations ext4_file_operations = {
326const struct inode_operations ext4_file_inode_operations = { 656const struct inode_operations ext4_file_inode_operations = {
327 .setattr = ext4_setattr, 657 .setattr = ext4_setattr,
328 .getattr = ext4_getattr, 658 .getattr = ext4_getattr,
329#ifdef CONFIG_EXT4_FS_XATTR
330 .setxattr = generic_setxattr, 659 .setxattr = generic_setxattr,
331 .getxattr = generic_getxattr, 660 .getxattr = generic_getxattr,
332 .listxattr = ext4_listxattr, 661 .listxattr = ext4_listxattr,
333 .removexattr = generic_removexattr, 662 .removexattr = generic_removexattr,
334#endif
335 .get_acl = ext4_get_acl, 663 .get_acl = ext4_get_acl,
336 .fiemap = ext4_fiemap, 664 .fiemap = ext4_fiemap,
337}; 665};
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index be1d89f385b4..dfbc1fe96674 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -44,7 +44,6 @@
44 */ 44 */
45static int ext4_sync_parent(struct inode *inode) 45static int ext4_sync_parent(struct inode *inode)
46{ 46{
47 struct writeback_control wbc;
48 struct dentry *dentry = NULL; 47 struct dentry *dentry = NULL;
49 struct inode *next; 48 struct inode *next;
50 int ret = 0; 49 int ret = 0;
@@ -66,10 +65,7 @@ static int ext4_sync_parent(struct inode *inode)
66 ret = sync_mapping_buffers(inode->i_mapping); 65 ret = sync_mapping_buffers(inode->i_mapping);
67 if (ret) 66 if (ret)
68 break; 67 break;
69 memset(&wbc, 0, sizeof(wbc)); 68 ret = sync_inode_metadata(inode, 1);
70 wbc.sync_mode = WB_SYNC_ALL;
71 wbc.nr_to_write = 0; /* only write out the inode */
72 ret = sync_inode(inode, &wbc);
73 if (ret) 69 if (ret)
74 break; 70 break;
75 } 71 }
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 3a100e7a62a8..3f32c8012447 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -762,7 +762,6 @@ got:
762 762
763 BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap"); 763 BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
764 err = ext4_handle_dirty_metadata(handle, NULL, block_bitmap_bh); 764 err = ext4_handle_dirty_metadata(handle, NULL, block_bitmap_bh);
765 brelse(block_bitmap_bh);
766 765
767 /* recheck and clear flag under lock if we still need to */ 766 /* recheck and clear flag under lock if we still need to */
768 ext4_lock_group(sb, group); 767 ext4_lock_group(sb, group);
@@ -775,6 +774,7 @@ got:
775 ext4_group_desc_csum_set(sb, group, gdp); 774 ext4_group_desc_csum_set(sb, group, gdp);
776 } 775 }
777 ext4_unlock_group(sb, group); 776 ext4_unlock_group(sb, group);
777 brelse(block_bitmap_bh);
778 778
779 if (err) 779 if (err)
780 goto fail; 780 goto fail;
@@ -902,6 +902,10 @@ got:
902 902
903 ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; 903 ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
904 904
905 ei->i_inline_off = 0;
906 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_INLINE_DATA))
907 ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
908
905 ret = inode; 909 ret = inode;
906 dquot_initialize(inode); 910 dquot_initialize(inode);
907 err = dquot_alloc_inode(inode); 911 err = dquot_alloc_inode(inode);
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 792e388e7b44..20862f96e8ae 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -22,6 +22,7 @@
22 22
23#include "ext4_jbd2.h" 23#include "ext4_jbd2.h"
24#include "truncate.h" 24#include "truncate.h"
25#include "ext4_extents.h" /* Needed for EXT_MAX_BLOCKS */
25 26
26#include <trace/events/ext4.h> 27#include <trace/events/ext4.h>
27 28
@@ -755,8 +756,7 @@ cleanup:
755 partial--; 756 partial--;
756 } 757 }
757out: 758out:
758 trace_ext4_ind_map_blocks_exit(inode, map->m_lblk, 759 trace_ext4_ind_map_blocks_exit(inode, map, err);
759 map->m_pblk, map->m_len, err);
760 return err; 760 return err;
761} 761}
762 762
@@ -1412,6 +1412,7 @@ void ext4_ind_truncate(struct inode *inode)
1412 down_write(&ei->i_data_sem); 1412 down_write(&ei->i_data_sem);
1413 1413
1414 ext4_discard_preallocations(inode); 1414 ext4_discard_preallocations(inode);
1415 ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block);
1415 1416
1416 /* 1417 /*
1417 * The orphan list entry will now protect us from any crash which 1418 * The orphan list entry will now protect us from any crash which
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
new file mode 100644
index 000000000000..387c47c6cda9
--- /dev/null
+++ b/fs/ext4/inline.c
@@ -0,0 +1,1884 @@
1/*
2 * Copyright (c) 2012 Taobao.
3 * Written by Tao Ma <boyu.mt@taobao.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of version 2.1 of the GNU Lesser General Public License
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 */
14#include "ext4_jbd2.h"
15#include "ext4.h"
16#include "xattr.h"
17#include "truncate.h"
18#include <linux/fiemap.h>
19
20#define EXT4_XATTR_SYSTEM_DATA "data"
21#define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS))
22#define EXT4_INLINE_DOTDOT_SIZE 4
23
24int ext4_get_inline_size(struct inode *inode)
25{
26 if (EXT4_I(inode)->i_inline_off)
27 return EXT4_I(inode)->i_inline_size;
28
29 return 0;
30}
31
32static int get_max_inline_xattr_value_size(struct inode *inode,
33 struct ext4_iloc *iloc)
34{
35 struct ext4_xattr_ibody_header *header;
36 struct ext4_xattr_entry *entry;
37 struct ext4_inode *raw_inode;
38 int free, min_offs;
39
40 min_offs = EXT4_SB(inode->i_sb)->s_inode_size -
41 EXT4_GOOD_OLD_INODE_SIZE -
42 EXT4_I(inode)->i_extra_isize -
43 sizeof(struct ext4_xattr_ibody_header);
44
45 /*
46 * We need to subtract another sizeof(__u32) since an in-inode xattr
47 * needs an empty 4 bytes to indicate the gap between the xattr entry
48 * and the name/value pair.
49 */
50 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
51 return EXT4_XATTR_SIZE(min_offs -
52 EXT4_XATTR_LEN(strlen(EXT4_XATTR_SYSTEM_DATA)) -
53 EXT4_XATTR_ROUND - sizeof(__u32));
54
55 raw_inode = ext4_raw_inode(iloc);
56 header = IHDR(inode, raw_inode);
57 entry = IFIRST(header);
58
59 /* Compute min_offs. */
60 for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
61 if (!entry->e_value_block && entry->e_value_size) {
62 size_t offs = le16_to_cpu(entry->e_value_offs);
63 if (offs < min_offs)
64 min_offs = offs;
65 }
66 }
67 free = min_offs -
68 ((void *)entry - (void *)IFIRST(header)) - sizeof(__u32);
69
70 if (EXT4_I(inode)->i_inline_off) {
71 entry = (struct ext4_xattr_entry *)
72 ((void *)raw_inode + EXT4_I(inode)->i_inline_off);
73
74 free += le32_to_cpu(entry->e_value_size);
75 goto out;
76 }
77
78 free -= EXT4_XATTR_LEN(strlen(EXT4_XATTR_SYSTEM_DATA));
79
80 if (free > EXT4_XATTR_ROUND)
81 free = EXT4_XATTR_SIZE(free - EXT4_XATTR_ROUND);
82 else
83 free = 0;
84
85out:
86 return free;
87}
88
89/*
90 * Get the maximum size we now can store in an inode.
91 * If we can't find the space for a xattr entry, don't use the space
92 * of the extents since we have no space to indicate the inline data.
93 */
94int ext4_get_max_inline_size(struct inode *inode)
95{
96 int error, max_inline_size;
97 struct ext4_iloc iloc;
98
99 if (EXT4_I(inode)->i_extra_isize == 0)
100 return 0;
101
102 error = ext4_get_inode_loc(inode, &iloc);
103 if (error) {
104 ext4_error_inode(inode, __func__, __LINE__, 0,
105 "can't get inode location %lu",
106 inode->i_ino);
107 return 0;
108 }
109
110 down_read(&EXT4_I(inode)->xattr_sem);
111 max_inline_size = get_max_inline_xattr_value_size(inode, &iloc);
112 up_read(&EXT4_I(inode)->xattr_sem);
113
114 brelse(iloc.bh);
115
116 if (!max_inline_size)
117 return 0;
118
119 return max_inline_size + EXT4_MIN_INLINE_DATA_SIZE;
120}
121
122int ext4_has_inline_data(struct inode *inode)
123{
124 return ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA) &&
125 EXT4_I(inode)->i_inline_off;
126}
127
128/*
129 * this function does not take xattr_sem, which is OK because it is
130 * currently only used in a code path coming form ext4_iget, before
131 * the new inode has been unlocked
132 */
133int ext4_find_inline_data_nolock(struct inode *inode)
134{
135 struct ext4_xattr_ibody_find is = {
136 .s = { .not_found = -ENODATA, },
137 };
138 struct ext4_xattr_info i = {
139 .name_index = EXT4_XATTR_INDEX_SYSTEM,
140 .name = EXT4_XATTR_SYSTEM_DATA,
141 };
142 int error;
143
144 if (EXT4_I(inode)->i_extra_isize == 0)
145 return 0;
146
147 error = ext4_get_inode_loc(inode, &is.iloc);
148 if (error)
149 return error;
150
151 error = ext4_xattr_ibody_find(inode, &i, &is);
152 if (error)
153 goto out;
154
155 if (!is.s.not_found) {
156 EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here -
157 (void *)ext4_raw_inode(&is.iloc));
158 EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE +
159 le32_to_cpu(is.s.here->e_value_size);
160 ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
161 }
162out:
163 brelse(is.iloc.bh);
164 return error;
165}
166
167static int ext4_read_inline_data(struct inode *inode, void *buffer,
168 unsigned int len,
169 struct ext4_iloc *iloc)
170{
171 struct ext4_xattr_entry *entry;
172 struct ext4_xattr_ibody_header *header;
173 int cp_len = 0;
174 struct ext4_inode *raw_inode;
175
176 if (!len)
177 return 0;
178
179 BUG_ON(len > EXT4_I(inode)->i_inline_size);
180
181 cp_len = len < EXT4_MIN_INLINE_DATA_SIZE ?
182 len : EXT4_MIN_INLINE_DATA_SIZE;
183
184 raw_inode = ext4_raw_inode(iloc);
185 memcpy(buffer, (void *)(raw_inode->i_block), cp_len);
186
187 len -= cp_len;
188 buffer += cp_len;
189
190 if (!len)
191 goto out;
192
193 header = IHDR(inode, raw_inode);
194 entry = (struct ext4_xattr_entry *)((void *)raw_inode +
195 EXT4_I(inode)->i_inline_off);
196 len = min_t(unsigned int, len,
197 (unsigned int)le32_to_cpu(entry->e_value_size));
198
199 memcpy(buffer,
200 (void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs), len);
201 cp_len += len;
202
203out:
204 return cp_len;
205}
206
207/*
208 * write the buffer to the inline inode.
209 * If 'create' is set, we don't need to do the extra copy in the xattr
210 * value since it is already handled by ext4_xattr_ibody_inline_set.
211 * That saves us one memcpy.
212 */
213void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc,
214 void *buffer, loff_t pos, unsigned int len)
215{
216 struct ext4_xattr_entry *entry;
217 struct ext4_xattr_ibody_header *header;
218 struct ext4_inode *raw_inode;
219 int cp_len = 0;
220
221 BUG_ON(!EXT4_I(inode)->i_inline_off);
222 BUG_ON(pos + len > EXT4_I(inode)->i_inline_size);
223
224 raw_inode = ext4_raw_inode(iloc);
225 buffer += pos;
226
227 if (pos < EXT4_MIN_INLINE_DATA_SIZE) {
228 cp_len = pos + len > EXT4_MIN_INLINE_DATA_SIZE ?
229 EXT4_MIN_INLINE_DATA_SIZE - pos : len;
230 memcpy((void *)raw_inode->i_block + pos, buffer, cp_len);
231
232 len -= cp_len;
233 buffer += cp_len;
234 pos += cp_len;
235 }
236
237 if (!len)
238 return;
239
240 pos -= EXT4_MIN_INLINE_DATA_SIZE;
241 header = IHDR(inode, raw_inode);
242 entry = (struct ext4_xattr_entry *)((void *)raw_inode +
243 EXT4_I(inode)->i_inline_off);
244
245 memcpy((void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs) + pos,
246 buffer, len);
247}
248
249static int ext4_create_inline_data(handle_t *handle,
250 struct inode *inode, unsigned len)
251{
252 int error;
253 void *value = NULL;
254 struct ext4_xattr_ibody_find is = {
255 .s = { .not_found = -ENODATA, },
256 };
257 struct ext4_xattr_info i = {
258 .name_index = EXT4_XATTR_INDEX_SYSTEM,
259 .name = EXT4_XATTR_SYSTEM_DATA,
260 };
261
262 error = ext4_get_inode_loc(inode, &is.iloc);
263 if (error)
264 return error;
265
266 error = ext4_journal_get_write_access(handle, is.iloc.bh);
267 if (error)
268 goto out;
269
270 if (len > EXT4_MIN_INLINE_DATA_SIZE) {
271 value = EXT4_ZERO_XATTR_VALUE;
272 len -= EXT4_MIN_INLINE_DATA_SIZE;
273 } else {
274 value = "";
275 len = 0;
276 }
277
278 /* Insert the the xttr entry. */
279 i.value = value;
280 i.value_len = len;
281
282 error = ext4_xattr_ibody_find(inode, &i, &is);
283 if (error)
284 goto out;
285
286 BUG_ON(!is.s.not_found);
287
288 error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is);
289 if (error) {
290 if (error == -ENOSPC)
291 ext4_clear_inode_state(inode,
292 EXT4_STATE_MAY_INLINE_DATA);
293 goto out;
294 }
295
296 memset((void *)ext4_raw_inode(&is.iloc)->i_block,
297 0, EXT4_MIN_INLINE_DATA_SIZE);
298
299 EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here -
300 (void *)ext4_raw_inode(&is.iloc));
301 EXT4_I(inode)->i_inline_size = len + EXT4_MIN_INLINE_DATA_SIZE;
302 ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
303 ext4_set_inode_flag(inode, EXT4_INODE_INLINE_DATA);
304 get_bh(is.iloc.bh);
305 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
306
307out:
308 brelse(is.iloc.bh);
309 return error;
310}
311
312static int ext4_update_inline_data(handle_t *handle, struct inode *inode,
313 unsigned int len)
314{
315 int error;
316 void *value = NULL;
317 struct ext4_xattr_ibody_find is = {
318 .s = { .not_found = -ENODATA, },
319 };
320 struct ext4_xattr_info i = {
321 .name_index = EXT4_XATTR_INDEX_SYSTEM,
322 .name = EXT4_XATTR_SYSTEM_DATA,
323 };
324
325 /* If the old space is ok, write the data directly. */
326 if (len <= EXT4_I(inode)->i_inline_size)
327 return 0;
328
329 error = ext4_get_inode_loc(inode, &is.iloc);
330 if (error)
331 return error;
332
333 error = ext4_xattr_ibody_find(inode, &i, &is);
334 if (error)
335 goto out;
336
337 BUG_ON(is.s.not_found);
338
339 len -= EXT4_MIN_INLINE_DATA_SIZE;
340 value = kzalloc(len, GFP_NOFS);
341 if (!value)
342 goto out;
343
344 error = ext4_xattr_ibody_get(inode, i.name_index, i.name,
345 value, len);
346 if (error == -ENODATA)
347 goto out;
348
349 error = ext4_journal_get_write_access(handle, is.iloc.bh);
350 if (error)
351 goto out;
352
353 /* Update the xttr entry. */
354 i.value = value;
355 i.value_len = len;
356
357 error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is);
358 if (error)
359 goto out;
360
361 EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here -
362 (void *)ext4_raw_inode(&is.iloc));
363 EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE +
364 le32_to_cpu(is.s.here->e_value_size);
365 ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
366 get_bh(is.iloc.bh);
367 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
368
369out:
370 kfree(value);
371 brelse(is.iloc.bh);
372 return error;
373}
374
375int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
376 unsigned int len)
377{
378 int ret, size;
379 struct ext4_inode_info *ei = EXT4_I(inode);
380
381 if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
382 return -ENOSPC;
383
384 size = ext4_get_max_inline_size(inode);
385 if (size < len)
386 return -ENOSPC;
387
388 down_write(&EXT4_I(inode)->xattr_sem);
389
390 if (ei->i_inline_off)
391 ret = ext4_update_inline_data(handle, inode, len);
392 else
393 ret = ext4_create_inline_data(handle, inode, len);
394
395 up_write(&EXT4_I(inode)->xattr_sem);
396
397 return ret;
398}
399
400static int ext4_destroy_inline_data_nolock(handle_t *handle,
401 struct inode *inode)
402{
403 struct ext4_inode_info *ei = EXT4_I(inode);
404 struct ext4_xattr_ibody_find is = {
405 .s = { .not_found = 0, },
406 };
407 struct ext4_xattr_info i = {
408 .name_index = EXT4_XATTR_INDEX_SYSTEM,
409 .name = EXT4_XATTR_SYSTEM_DATA,
410 .value = NULL,
411 .value_len = 0,
412 };
413 int error;
414
415 if (!ei->i_inline_off)
416 return 0;
417
418 error = ext4_get_inode_loc(inode, &is.iloc);
419 if (error)
420 return error;
421
422 error = ext4_xattr_ibody_find(inode, &i, &is);
423 if (error)
424 goto out;
425
426 error = ext4_journal_get_write_access(handle, is.iloc.bh);
427 if (error)
428 goto out;
429
430 error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is);
431 if (error)
432 goto out;
433
434 memset((void *)ext4_raw_inode(&is.iloc)->i_block,
435 0, EXT4_MIN_INLINE_DATA_SIZE);
436
437 if (EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb,
438 EXT4_FEATURE_INCOMPAT_EXTENTS)) {
439 if (S_ISDIR(inode->i_mode) ||
440 S_ISREG(inode->i_mode) || S_ISLNK(inode->i_mode)) {
441 ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS);
442 ext4_ext_tree_init(handle, inode);
443 }
444 }
445 ext4_clear_inode_flag(inode, EXT4_INODE_INLINE_DATA);
446
447 get_bh(is.iloc.bh);
448 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
449
450 EXT4_I(inode)->i_inline_off = 0;
451 EXT4_I(inode)->i_inline_size = 0;
452 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
453out:
454 brelse(is.iloc.bh);
455 if (error == -ENODATA)
456 error = 0;
457 return error;
458}
459
460static int ext4_read_inline_page(struct inode *inode, struct page *page)
461{
462 void *kaddr;
463 int ret = 0;
464 size_t len;
465 struct ext4_iloc iloc;
466
467 BUG_ON(!PageLocked(page));
468 BUG_ON(!ext4_has_inline_data(inode));
469 BUG_ON(page->index);
470
471 if (!EXT4_I(inode)->i_inline_off) {
472 ext4_warning(inode->i_sb, "inode %lu doesn't have inline data.",
473 inode->i_ino);
474 goto out;
475 }
476
477 ret = ext4_get_inode_loc(inode, &iloc);
478 if (ret)
479 goto out;
480
481 len = min_t(size_t, ext4_get_inline_size(inode), i_size_read(inode));
482 kaddr = kmap_atomic(page);
483 ret = ext4_read_inline_data(inode, kaddr, len, &iloc);
484 flush_dcache_page(page);
485 kunmap_atomic(kaddr);
486 zero_user_segment(page, len, PAGE_CACHE_SIZE);
487 SetPageUptodate(page);
488 brelse(iloc.bh);
489
490out:
491 return ret;
492}
493
494int ext4_readpage_inline(struct inode *inode, struct page *page)
495{
496 int ret = 0;
497
498 down_read(&EXT4_I(inode)->xattr_sem);
499 if (!ext4_has_inline_data(inode)) {
500 up_read(&EXT4_I(inode)->xattr_sem);
501 return -EAGAIN;
502 }
503
504 /*
505 * Current inline data can only exist in the 1st page,
506 * So for all the other pages, just set them uptodate.
507 */
508 if (!page->index)
509 ret = ext4_read_inline_page(inode, page);
510 else if (!PageUptodate(page)) {
511 zero_user_segment(page, 0, PAGE_CACHE_SIZE);
512 SetPageUptodate(page);
513 }
514
515 up_read(&EXT4_I(inode)->xattr_sem);
516
517 unlock_page(page);
518 return ret >= 0 ? 0 : ret;
519}
520
521static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
522 struct inode *inode,
523 unsigned flags)
524{
525 int ret, needed_blocks;
526 handle_t *handle = NULL;
527 int retries = 0, sem_held = 0;
528 struct page *page = NULL;
529 unsigned from, to;
530 struct ext4_iloc iloc;
531
532 if (!ext4_has_inline_data(inode)) {
533 /*
534 * clear the flag so that no new write
535 * will trap here again.
536 */
537 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
538 return 0;
539 }
540
541 needed_blocks = ext4_writepage_trans_blocks(inode);
542
543 ret = ext4_get_inode_loc(inode, &iloc);
544 if (ret)
545 return ret;
546
547retry:
548 handle = ext4_journal_start(inode, needed_blocks);
549 if (IS_ERR(handle)) {
550 ret = PTR_ERR(handle);
551 handle = NULL;
552 goto out;
553 }
554
555 /* We cannot recurse into the filesystem as the transaction is already
556 * started */
557 flags |= AOP_FLAG_NOFS;
558
559 page = grab_cache_page_write_begin(mapping, 0, flags);
560 if (!page) {
561 ret = -ENOMEM;
562 goto out;
563 }
564
565 down_write(&EXT4_I(inode)->xattr_sem);
566 sem_held = 1;
567 /* If some one has already done this for us, just exit. */
568 if (!ext4_has_inline_data(inode)) {
569 ret = 0;
570 goto out;
571 }
572
573 from = 0;
574 to = ext4_get_inline_size(inode);
575 if (!PageUptodate(page)) {
576 ret = ext4_read_inline_page(inode, page);
577 if (ret < 0)
578 goto out;
579 }
580
581 ret = ext4_destroy_inline_data_nolock(handle, inode);
582 if (ret)
583 goto out;
584
585 if (ext4_should_dioread_nolock(inode))
586 ret = __block_write_begin(page, from, to, ext4_get_block_write);
587 else
588 ret = __block_write_begin(page, from, to, ext4_get_block);
589
590 if (!ret && ext4_should_journal_data(inode)) {
591 ret = ext4_walk_page_buffers(handle, page_buffers(page),
592 from, to, NULL,
593 do_journal_get_write_access);
594 }
595
596 if (ret) {
597 unlock_page(page);
598 page_cache_release(page);
599 ext4_orphan_add(handle, inode);
600 up_write(&EXT4_I(inode)->xattr_sem);
601 sem_held = 0;
602 ext4_journal_stop(handle);
603 handle = NULL;
604 ext4_truncate_failed_write(inode);
605 /*
606 * If truncate failed early the inode might
607 * still be on the orphan list; we need to
608 * make sure the inode is removed from the
609 * orphan list in that case.
610 */
611 if (inode->i_nlink)
612 ext4_orphan_del(NULL, inode);
613 }
614
615 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
616 goto retry;
617
618 block_commit_write(page, from, to);
619out:
620 if (page) {
621 unlock_page(page);
622 page_cache_release(page);
623 }
624 if (sem_held)
625 up_write(&EXT4_I(inode)->xattr_sem);
626 if (handle)
627 ext4_journal_stop(handle);
628 brelse(iloc.bh);
629 return ret;
630}
631
632/*
633 * Try to write data in the inode.
634 * If the inode has inline data, check whether the new write can be
635 * in the inode also. If not, create the page the handle, move the data
636 * to the page make it update and let the later codes create extent for it.
637 */
638int ext4_try_to_write_inline_data(struct address_space *mapping,
639 struct inode *inode,
640 loff_t pos, unsigned len,
641 unsigned flags,
642 struct page **pagep)
643{
644 int ret;
645 handle_t *handle;
646 struct page *page;
647 struct ext4_iloc iloc;
648
649 if (pos + len > ext4_get_max_inline_size(inode))
650 goto convert;
651
652 ret = ext4_get_inode_loc(inode, &iloc);
653 if (ret)
654 return ret;
655
656 /*
657 * The possible write could happen in the inode,
658 * so try to reserve the space in inode first.
659 */
660 handle = ext4_journal_start(inode, 1);
661 if (IS_ERR(handle)) {
662 ret = PTR_ERR(handle);
663 handle = NULL;
664 goto out;
665 }
666
667 ret = ext4_prepare_inline_data(handle, inode, pos + len);
668 if (ret && ret != -ENOSPC)
669 goto out;
670
671 /* We don't have space in inline inode, so convert it to extent. */
672 if (ret == -ENOSPC) {
673 ext4_journal_stop(handle);
674 brelse(iloc.bh);
675 goto convert;
676 }
677
678 flags |= AOP_FLAG_NOFS;
679
680 page = grab_cache_page_write_begin(mapping, 0, flags);
681 if (!page) {
682 ret = -ENOMEM;
683 goto out;
684 }
685
686 *pagep = page;
687 down_read(&EXT4_I(inode)->xattr_sem);
688 if (!ext4_has_inline_data(inode)) {
689 ret = 0;
690 unlock_page(page);
691 page_cache_release(page);
692 goto out_up_read;
693 }
694
695 if (!PageUptodate(page)) {
696 ret = ext4_read_inline_page(inode, page);
697 if (ret < 0)
698 goto out_up_read;
699 }
700
701 ret = 1;
702 handle = NULL;
703out_up_read:
704 up_read(&EXT4_I(inode)->xattr_sem);
705out:
706 if (handle)
707 ext4_journal_stop(handle);
708 brelse(iloc.bh);
709 return ret;
710convert:
711 return ext4_convert_inline_data_to_extent(mapping,
712 inode, flags);
713}
714
715int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
716 unsigned copied, struct page *page)
717{
718 int ret;
719 void *kaddr;
720 struct ext4_iloc iloc;
721
722 if (unlikely(copied < len)) {
723 if (!PageUptodate(page)) {
724 copied = 0;
725 goto out;
726 }
727 }
728
729 ret = ext4_get_inode_loc(inode, &iloc);
730 if (ret) {
731 ext4_std_error(inode->i_sb, ret);
732 copied = 0;
733 goto out;
734 }
735
736 down_write(&EXT4_I(inode)->xattr_sem);
737 BUG_ON(!ext4_has_inline_data(inode));
738
739 kaddr = kmap_atomic(page);
740 ext4_write_inline_data(inode, &iloc, kaddr, pos, len);
741 kunmap_atomic(kaddr);
742 SetPageUptodate(page);
743 /* clear page dirty so that writepages wouldn't work for us. */
744 ClearPageDirty(page);
745
746 up_write(&EXT4_I(inode)->xattr_sem);
747 brelse(iloc.bh);
748out:
749 return copied;
750}
751
752struct buffer_head *
753ext4_journalled_write_inline_data(struct inode *inode,
754 unsigned len,
755 struct page *page)
756{
757 int ret;
758 void *kaddr;
759 struct ext4_iloc iloc;
760
761 ret = ext4_get_inode_loc(inode, &iloc);
762 if (ret) {
763 ext4_std_error(inode->i_sb, ret);
764 return NULL;
765 }
766
767 down_write(&EXT4_I(inode)->xattr_sem);
768 kaddr = kmap_atomic(page);
769 ext4_write_inline_data(inode, &iloc, kaddr, 0, len);
770 kunmap_atomic(kaddr);
771 up_write(&EXT4_I(inode)->xattr_sem);
772
773 return iloc.bh;
774}
775
776/*
777 * Try to make the page cache and handle ready for the inline data case.
778 * We can call this function in 2 cases:
779 * 1. The inode is created and the first write exceeds inline size. We can
780 * clear the inode state safely.
781 * 2. The inode has inline data, then we need to read the data, make it
782 * update and dirty so that ext4_da_writepages can handle it. We don't
783 * need to start the journal since the file's metatdata isn't changed now.
784 */
785static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping,
786 struct inode *inode,
787 unsigned flags,
788 void **fsdata)
789{
790 int ret = 0, inline_size;
791 struct page *page;
792
793 page = grab_cache_page_write_begin(mapping, 0, flags);
794 if (!page)
795 return -ENOMEM;
796
797 down_read(&EXT4_I(inode)->xattr_sem);
798 if (!ext4_has_inline_data(inode)) {
799 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
800 goto out;
801 }
802
803 inline_size = ext4_get_inline_size(inode);
804
805 if (!PageUptodate(page)) {
806 ret = ext4_read_inline_page(inode, page);
807 if (ret < 0)
808 goto out;
809 }
810
811 ret = __block_write_begin(page, 0, inline_size,
812 ext4_da_get_block_prep);
813 if (ret) {
814 ext4_truncate_failed_write(inode);
815 goto out;
816 }
817
818 SetPageDirty(page);
819 SetPageUptodate(page);
820 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
821 *fsdata = (void *)CONVERT_INLINE_DATA;
822
823out:
824 up_read(&EXT4_I(inode)->xattr_sem);
825 if (page) {
826 unlock_page(page);
827 page_cache_release(page);
828 }
829 return ret;
830}
831
832/*
833 * Prepare the write for the inline data.
834 * If the the data can be written into the inode, we just read
835 * the page and make it uptodate, and start the journal.
836 * Otherwise read the page, makes it dirty so that it can be
837 * handle in writepages(the i_disksize update is left to the
838 * normal ext4_da_write_end).
839 */
840int ext4_da_write_inline_data_begin(struct address_space *mapping,
841 struct inode *inode,
842 loff_t pos, unsigned len,
843 unsigned flags,
844 struct page **pagep,
845 void **fsdata)
846{
847 int ret, inline_size;
848 handle_t *handle;
849 struct page *page;
850 struct ext4_iloc iloc;
851
852 ret = ext4_get_inode_loc(inode, &iloc);
853 if (ret)
854 return ret;
855
856 handle = ext4_journal_start(inode, 1);
857 if (IS_ERR(handle)) {
858 ret = PTR_ERR(handle);
859 handle = NULL;
860 goto out;
861 }
862
863 inline_size = ext4_get_max_inline_size(inode);
864
865 ret = -ENOSPC;
866 if (inline_size >= pos + len) {
867 ret = ext4_prepare_inline_data(handle, inode, pos + len);
868 if (ret && ret != -ENOSPC)
869 goto out;
870 }
871
872 if (ret == -ENOSPC) {
873 ret = ext4_da_convert_inline_data_to_extent(mapping,
874 inode,
875 flags,
876 fsdata);
877 goto out;
878 }
879
880 /*
881 * We cannot recurse into the filesystem as the transaction
882 * is already started.
883 */
884 flags |= AOP_FLAG_NOFS;
885
886 page = grab_cache_page_write_begin(mapping, 0, flags);
887 if (!page) {
888 ret = -ENOMEM;
889 goto out;
890 }
891
892 down_read(&EXT4_I(inode)->xattr_sem);
893 if (!ext4_has_inline_data(inode)) {
894 ret = 0;
895 goto out_release_page;
896 }
897
898 if (!PageUptodate(page)) {
899 ret = ext4_read_inline_page(inode, page);
900 if (ret < 0)
901 goto out_release_page;
902 }
903
904 up_read(&EXT4_I(inode)->xattr_sem);
905 *pagep = page;
906 handle = NULL;
907 brelse(iloc.bh);
908 return 1;
909out_release_page:
910 up_read(&EXT4_I(inode)->xattr_sem);
911 unlock_page(page);
912 page_cache_release(page);
913out:
914 if (handle)
915 ext4_journal_stop(handle);
916 brelse(iloc.bh);
917 return ret;
918}
919
920int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
921 unsigned len, unsigned copied,
922 struct page *page)
923{
924 int i_size_changed = 0;
925
926 copied = ext4_write_inline_data_end(inode, pos, len, copied, page);
927
928 /*
929 * No need to use i_size_read() here, the i_size
930 * cannot change under us because we hold i_mutex.
931 *
932 * But it's important to update i_size while still holding page lock:
933 * page writeout could otherwise come in and zero beyond i_size.
934 */
935 if (pos+copied > inode->i_size) {
936 i_size_write(inode, pos+copied);
937 i_size_changed = 1;
938 }
939 unlock_page(page);
940 page_cache_release(page);
941
942 /*
943 * Don't mark the inode dirty under page lock. First, it unnecessarily
944 * makes the holding time of page lock longer. Second, it forces lock
945 * ordering of page lock and transaction start for journaling
946 * filesystems.
947 */
948 if (i_size_changed)
949 mark_inode_dirty(inode);
950
951 return copied;
952}
953
954#ifdef INLINE_DIR_DEBUG
955void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh,
956 void *inline_start, int inline_size)
957{
958 int offset;
959 unsigned short de_len;
960 struct ext4_dir_entry_2 *de = inline_start;
961 void *dlimit = inline_start + inline_size;
962
963 trace_printk("inode %lu\n", dir->i_ino);
964 offset = 0;
965 while ((void *)de < dlimit) {
966 de_len = ext4_rec_len_from_disk(de->rec_len, inline_size);
967 trace_printk("de: off %u rlen %u name %*.s nlen %u ino %u\n",
968 offset, de_len, de->name_len, de->name,
969 de->name_len, le32_to_cpu(de->inode));
970 if (ext4_check_dir_entry(dir, NULL, de, bh,
971 inline_start, inline_size, offset))
972 BUG();
973
974 offset += de_len;
975 de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
976 }
977}
978#else
979#define ext4_show_inline_dir(dir, bh, inline_start, inline_size)
980#endif
981
982/*
983 * Add a new entry into a inline dir.
984 * It will return -ENOSPC if no space is available, and -EIO
985 * and -EEXIST if directory entry already exists.
986 */
987static int ext4_add_dirent_to_inline(handle_t *handle,
988 struct dentry *dentry,
989 struct inode *inode,
990 struct ext4_iloc *iloc,
991 void *inline_start, int inline_size)
992{
993 struct inode *dir = dentry->d_parent->d_inode;
994 const char *name = dentry->d_name.name;
995 int namelen = dentry->d_name.len;
996 unsigned short reclen;
997 int err;
998 struct ext4_dir_entry_2 *de;
999
1000 reclen = EXT4_DIR_REC_LEN(namelen);
1001 err = ext4_find_dest_de(dir, inode, iloc->bh,
1002 inline_start, inline_size,
1003 name, namelen, &de);
1004 if (err)
1005 return err;
1006
1007 err = ext4_journal_get_write_access(handle, iloc->bh);
1008 if (err)
1009 return err;
1010 ext4_insert_dentry(inode, de, inline_size, name, namelen);
1011
1012 ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size);
1013
1014 /*
1015 * XXX shouldn't update any times until successful
1016 * completion of syscall, but too many callers depend
1017 * on this.
1018 *
1019 * XXX similarly, too many callers depend on
1020 * ext4_new_inode() setting the times, but error
1021 * recovery deletes the inode, so the worst that can
1022 * happen is that the times are slightly out of date
1023 * and/or different from the directory change time.
1024 */
1025 dir->i_mtime = dir->i_ctime = ext4_current_time(dir);
1026 ext4_update_dx_flag(dir);
1027 dir->i_version++;
1028 ext4_mark_inode_dirty(handle, dir);
1029 return 1;
1030}
1031
1032static void *ext4_get_inline_xattr_pos(struct inode *inode,
1033 struct ext4_iloc *iloc)
1034{
1035 struct ext4_xattr_entry *entry;
1036 struct ext4_xattr_ibody_header *header;
1037
1038 BUG_ON(!EXT4_I(inode)->i_inline_off);
1039
1040 header = IHDR(inode, ext4_raw_inode(iloc));
1041 entry = (struct ext4_xattr_entry *)((void *)ext4_raw_inode(iloc) +
1042 EXT4_I(inode)->i_inline_off);
1043
1044 return (void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs);
1045}
1046
1047/* Set the final de to cover the whole block. */
1048static void ext4_update_final_de(void *de_buf, int old_size, int new_size)
1049{
1050 struct ext4_dir_entry_2 *de, *prev_de;
1051 void *limit;
1052 int de_len;
1053
1054 de = (struct ext4_dir_entry_2 *)de_buf;
1055 if (old_size) {
1056 limit = de_buf + old_size;
1057 do {
1058 prev_de = de;
1059 de_len = ext4_rec_len_from_disk(de->rec_len, old_size);
1060 de_buf += de_len;
1061 de = (struct ext4_dir_entry_2 *)de_buf;
1062 } while (de_buf < limit);
1063
1064 prev_de->rec_len = ext4_rec_len_to_disk(de_len + new_size -
1065 old_size, new_size);
1066 } else {
1067 /* this is just created, so create an empty entry. */
1068 de->inode = 0;
1069 de->rec_len = ext4_rec_len_to_disk(new_size, new_size);
1070 }
1071}
1072
1073static int ext4_update_inline_dir(handle_t *handle, struct inode *dir,
1074 struct ext4_iloc *iloc)
1075{
1076 int ret;
1077 int old_size = EXT4_I(dir)->i_inline_size - EXT4_MIN_INLINE_DATA_SIZE;
1078 int new_size = get_max_inline_xattr_value_size(dir, iloc);
1079
1080 if (new_size - old_size <= EXT4_DIR_REC_LEN(1))
1081 return -ENOSPC;
1082
1083 ret = ext4_update_inline_data(handle, dir,
1084 new_size + EXT4_MIN_INLINE_DATA_SIZE);
1085 if (ret)
1086 return ret;
1087
1088 ext4_update_final_de(ext4_get_inline_xattr_pos(dir, iloc), old_size,
1089 EXT4_I(dir)->i_inline_size -
1090 EXT4_MIN_INLINE_DATA_SIZE);
1091 dir->i_size = EXT4_I(dir)->i_disksize = EXT4_I(dir)->i_inline_size;
1092 return 0;
1093}
1094
1095static void ext4_restore_inline_data(handle_t *handle, struct inode *inode,
1096 struct ext4_iloc *iloc,
1097 void *buf, int inline_size)
1098{
1099 ext4_create_inline_data(handle, inode, inline_size);
1100 ext4_write_inline_data(inode, iloc, buf, 0, inline_size);
1101 ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
1102}
1103
1104static int ext4_finish_convert_inline_dir(handle_t *handle,
1105 struct inode *inode,
1106 struct buffer_head *dir_block,
1107 void *buf,
1108 int inline_size)
1109{
1110 int err, csum_size = 0, header_size = 0;
1111 struct ext4_dir_entry_2 *de;
1112 struct ext4_dir_entry_tail *t;
1113 void *target = dir_block->b_data;
1114
1115 /*
1116 * First create "." and ".." and then copy the dir information
1117 * back to the block.
1118 */
1119 de = (struct ext4_dir_entry_2 *)target;
1120 de = ext4_init_dot_dotdot(inode, de,
1121 inode->i_sb->s_blocksize, csum_size,
1122 le32_to_cpu(((struct ext4_dir_entry_2 *)buf)->inode), 1);
1123 header_size = (void *)de - target;
1124
1125 memcpy((void *)de, buf + EXT4_INLINE_DOTDOT_SIZE,
1126 inline_size - EXT4_INLINE_DOTDOT_SIZE);
1127
1128 if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
1129 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1130 csum_size = sizeof(struct ext4_dir_entry_tail);
1131
1132 inode->i_size = inode->i_sb->s_blocksize;
1133 i_size_write(inode, inode->i_sb->s_blocksize);
1134 EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
1135 ext4_update_final_de(dir_block->b_data,
1136 inline_size - EXT4_INLINE_DOTDOT_SIZE + header_size,
1137 inode->i_sb->s_blocksize - csum_size);
1138
1139 if (csum_size) {
1140 t = EXT4_DIRENT_TAIL(dir_block->b_data,
1141 inode->i_sb->s_blocksize);
1142 initialize_dirent_tail(t, inode->i_sb->s_blocksize);
1143 }
1144 set_buffer_uptodate(dir_block);
1145 err = ext4_handle_dirty_dirent_node(handle, inode, dir_block);
1146 if (err)
1147 goto out;
1148 set_buffer_verified(dir_block);
1149out:
1150 return err;
1151}
1152
1153static int ext4_convert_inline_data_nolock(handle_t *handle,
1154 struct inode *inode,
1155 struct ext4_iloc *iloc)
1156{
1157 int error;
1158 void *buf = NULL;
1159 struct buffer_head *data_bh = NULL;
1160 struct ext4_map_blocks map;
1161 int inline_size;
1162
1163 inline_size = ext4_get_inline_size(inode);
1164 buf = kmalloc(inline_size, GFP_NOFS);
1165 if (!buf) {
1166 error = -ENOMEM;
1167 goto out;
1168 }
1169
1170 error = ext4_read_inline_data(inode, buf, inline_size, iloc);
1171 if (error < 0)
1172 goto out;
1173
1174 error = ext4_destroy_inline_data_nolock(handle, inode);
1175 if (error)
1176 goto out;
1177
1178 map.m_lblk = 0;
1179 map.m_len = 1;
1180 map.m_flags = 0;
1181 error = ext4_map_blocks(handle, inode, &map, EXT4_GET_BLOCKS_CREATE);
1182 if (error < 0)
1183 goto out_restore;
1184 if (!(map.m_flags & EXT4_MAP_MAPPED)) {
1185 error = -EIO;
1186 goto out_restore;
1187 }
1188
1189 data_bh = sb_getblk(inode->i_sb, map.m_pblk);
1190 if (!data_bh) {
1191 error = -EIO;
1192 goto out_restore;
1193 }
1194
1195 lock_buffer(data_bh);
1196 error = ext4_journal_get_create_access(handle, data_bh);
1197 if (error) {
1198 unlock_buffer(data_bh);
1199 error = -EIO;
1200 goto out_restore;
1201 }
1202 memset(data_bh->b_data, 0, inode->i_sb->s_blocksize);
1203
1204 if (!S_ISDIR(inode->i_mode)) {
1205 memcpy(data_bh->b_data, buf, inline_size);
1206 set_buffer_uptodate(data_bh);
1207 error = ext4_handle_dirty_metadata(handle,
1208 inode, data_bh);
1209 } else {
1210 error = ext4_finish_convert_inline_dir(handle, inode, data_bh,
1211 buf, inline_size);
1212 }
1213
1214 unlock_buffer(data_bh);
1215out_restore:
1216 if (error)
1217 ext4_restore_inline_data(handle, inode, iloc, buf, inline_size);
1218
1219out:
1220 brelse(data_bh);
1221 kfree(buf);
1222 return error;
1223}
1224
1225/*
1226 * Try to add the new entry to the inline data.
1227 * If succeeds, return 0. If not, extended the inline dir and copied data to
1228 * the new created block.
1229 */
1230int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
1231 struct inode *inode)
1232{
1233 int ret, inline_size;
1234 void *inline_start;
1235 struct ext4_iloc iloc;
1236 struct inode *dir = dentry->d_parent->d_inode;
1237
1238 ret = ext4_get_inode_loc(dir, &iloc);
1239 if (ret)
1240 return ret;
1241
1242 down_write(&EXT4_I(dir)->xattr_sem);
1243 if (!ext4_has_inline_data(dir))
1244 goto out;
1245
1246 inline_start = (void *)ext4_raw_inode(&iloc)->i_block +
1247 EXT4_INLINE_DOTDOT_SIZE;
1248 inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
1249
1250 ret = ext4_add_dirent_to_inline(handle, dentry, inode, &iloc,
1251 inline_start, inline_size);
1252 if (ret != -ENOSPC)
1253 goto out;
1254
1255 /* check whether it can be inserted to inline xattr space. */
1256 inline_size = EXT4_I(dir)->i_inline_size -
1257 EXT4_MIN_INLINE_DATA_SIZE;
1258 if (!inline_size) {
1259 /* Try to use the xattr space.*/
1260 ret = ext4_update_inline_dir(handle, dir, &iloc);
1261 if (ret && ret != -ENOSPC)
1262 goto out;
1263
1264 inline_size = EXT4_I(dir)->i_inline_size -
1265 EXT4_MIN_INLINE_DATA_SIZE;
1266 }
1267
1268 if (inline_size) {
1269 inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
1270
1271 ret = ext4_add_dirent_to_inline(handle, dentry, inode, &iloc,
1272 inline_start, inline_size);
1273
1274 if (ret != -ENOSPC)
1275 goto out;
1276 }
1277
1278 /*
1279 * The inline space is filled up, so create a new block for it.
1280 * As the extent tree will be created, we have to save the inline
1281 * dir first.
1282 */
1283 ret = ext4_convert_inline_data_nolock(handle, dir, &iloc);
1284
1285out:
1286 ext4_mark_inode_dirty(handle, dir);
1287 up_write(&EXT4_I(dir)->xattr_sem);
1288 brelse(iloc.bh);
1289 return ret;
1290}
1291
1292int ext4_read_inline_dir(struct file *filp,
1293 void *dirent, filldir_t filldir,
1294 int *has_inline_data)
1295{
1296 int error = 0;
1297 unsigned int offset, parent_ino;
1298 int i, stored;
1299 struct ext4_dir_entry_2 *de;
1300 struct super_block *sb;
1301 struct inode *inode = filp->f_path.dentry->d_inode;
1302 int ret, inline_size = 0;
1303 struct ext4_iloc iloc;
1304 void *dir_buf = NULL;
1305
1306 ret = ext4_get_inode_loc(inode, &iloc);
1307 if (ret)
1308 return ret;
1309
1310 down_read(&EXT4_I(inode)->xattr_sem);
1311 if (!ext4_has_inline_data(inode)) {
1312 up_read(&EXT4_I(inode)->xattr_sem);
1313 *has_inline_data = 0;
1314 goto out;
1315 }
1316
1317 inline_size = ext4_get_inline_size(inode);
1318 dir_buf = kmalloc(inline_size, GFP_NOFS);
1319 if (!dir_buf) {
1320 ret = -ENOMEM;
1321 up_read(&EXT4_I(inode)->xattr_sem);
1322 goto out;
1323 }
1324
1325 ret = ext4_read_inline_data(inode, dir_buf, inline_size, &iloc);
1326 up_read(&EXT4_I(inode)->xattr_sem);
1327 if (ret < 0)
1328 goto out;
1329
1330 sb = inode->i_sb;
1331 stored = 0;
1332 parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode);
1333
1334 while (!error && !stored && filp->f_pos < inode->i_size) {
1335revalidate:
1336 /*
1337 * If the version has changed since the last call to
1338 * readdir(2), then we might be pointing to an invalid
1339 * dirent right now. Scan from the start of the inline
1340 * dir to make sure.
1341 */
1342 if (filp->f_version != inode->i_version) {
1343 for (i = 0;
1344 i < inode->i_size && i < offset;) {
1345 if (!i) {
1346 /* skip "." and ".." if needed. */
1347 i += EXT4_INLINE_DOTDOT_SIZE;
1348 continue;
1349 }
1350 de = (struct ext4_dir_entry_2 *)
1351 (dir_buf + i);
1352 /* It's too expensive to do a full
1353 * dirent test each time round this
1354 * loop, but we do have to test at
1355 * least that it is non-zero. A
1356 * failure will be detected in the
1357 * dirent test below. */
1358 if (ext4_rec_len_from_disk(de->rec_len,
1359 inline_size) < EXT4_DIR_REC_LEN(1))
1360 break;
1361 i += ext4_rec_len_from_disk(de->rec_len,
1362 inline_size);
1363 }
1364 offset = i;
1365 filp->f_pos = offset;
1366 filp->f_version = inode->i_version;
1367 }
1368
1369 while (!error && filp->f_pos < inode->i_size) {
1370 if (filp->f_pos == 0) {
1371 error = filldir(dirent, ".", 1, 0, inode->i_ino,
1372 DT_DIR);
1373 if (error)
1374 break;
1375 stored++;
1376
1377 error = filldir(dirent, "..", 2, 0, parent_ino,
1378 DT_DIR);
1379 if (error)
1380 break;
1381 stored++;
1382
1383 filp->f_pos = offset = EXT4_INLINE_DOTDOT_SIZE;
1384 continue;
1385 }
1386
1387 de = (struct ext4_dir_entry_2 *)(dir_buf + offset);
1388 if (ext4_check_dir_entry(inode, filp, de,
1389 iloc.bh, dir_buf,
1390 inline_size, offset)) {
1391 ret = stored;
1392 goto out;
1393 }
1394 offset += ext4_rec_len_from_disk(de->rec_len,
1395 inline_size);
1396 if (le32_to_cpu(de->inode)) {
1397 /* We might block in the next section
1398 * if the data destination is
1399 * currently swapped out. So, use a
1400 * version stamp to detect whether or
1401 * not the directory has been modified
1402 * during the copy operation.
1403 */
1404 u64 version = filp->f_version;
1405
1406 error = filldir(dirent, de->name,
1407 de->name_len,
1408 filp->f_pos,
1409 le32_to_cpu(de->inode),
1410 get_dtype(sb, de->file_type));
1411 if (error)
1412 break;
1413 if (version != filp->f_version)
1414 goto revalidate;
1415 stored++;
1416 }
1417 filp->f_pos += ext4_rec_len_from_disk(de->rec_len,
1418 inline_size);
1419 }
1420 offset = 0;
1421 }
1422out:
1423 kfree(dir_buf);
1424 brelse(iloc.bh);
1425 return ret;
1426}
1427
1428struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
1429 struct ext4_dir_entry_2 **parent_de,
1430 int *retval)
1431{
1432 struct ext4_iloc iloc;
1433
1434 *retval = ext4_get_inode_loc(inode, &iloc);
1435 if (*retval)
1436 return NULL;
1437
1438 *parent_de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block;
1439
1440 return iloc.bh;
1441}
1442
1443/*
1444 * Try to create the inline data for the new dir.
1445 * If it succeeds, return 0, otherwise return the error.
1446 * In case of ENOSPC, the caller should create the normal disk layout dir.
1447 */
1448int ext4_try_create_inline_dir(handle_t *handle, struct inode *parent,
1449 struct inode *inode)
1450{
1451 int ret, inline_size = EXT4_MIN_INLINE_DATA_SIZE;
1452 struct ext4_iloc iloc;
1453 struct ext4_dir_entry_2 *de;
1454
1455 ret = ext4_get_inode_loc(inode, &iloc);
1456 if (ret)
1457 return ret;
1458
1459 ret = ext4_prepare_inline_data(handle, inode, inline_size);
1460 if (ret)
1461 goto out;
1462
1463 /*
1464 * For inline dir, we only save the inode information for the ".."
1465 * and create a fake dentry to cover the left space.
1466 */
1467 de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block;
1468 de->inode = cpu_to_le32(parent->i_ino);
1469 de = (struct ext4_dir_entry_2 *)((void *)de + EXT4_INLINE_DOTDOT_SIZE);
1470 de->inode = 0;
1471 de->rec_len = ext4_rec_len_to_disk(
1472 inline_size - EXT4_INLINE_DOTDOT_SIZE,
1473 inline_size);
1474 set_nlink(inode, 2);
1475 inode->i_size = EXT4_I(inode)->i_disksize = inline_size;
1476out:
1477 brelse(iloc.bh);
1478 return ret;
1479}
1480
1481struct buffer_head *ext4_find_inline_entry(struct inode *dir,
1482 const struct qstr *d_name,
1483 struct ext4_dir_entry_2 **res_dir,
1484 int *has_inline_data)
1485{
1486 int ret;
1487 struct ext4_iloc iloc;
1488 void *inline_start;
1489 int inline_size;
1490
1491 if (ext4_get_inode_loc(dir, &iloc))
1492 return NULL;
1493
1494 down_read(&EXT4_I(dir)->xattr_sem);
1495 if (!ext4_has_inline_data(dir)) {
1496 *has_inline_data = 0;
1497 goto out;
1498 }
1499
1500 inline_start = (void *)ext4_raw_inode(&iloc)->i_block +
1501 EXT4_INLINE_DOTDOT_SIZE;
1502 inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
1503 ret = search_dir(iloc.bh, inline_start, inline_size,
1504 dir, d_name, 0, res_dir);
1505 if (ret == 1)
1506 goto out_find;
1507 if (ret < 0)
1508 goto out;
1509
1510 if (ext4_get_inline_size(dir) == EXT4_MIN_INLINE_DATA_SIZE)
1511 goto out;
1512
1513 inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
1514 inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE;
1515
1516 ret = search_dir(iloc.bh, inline_start, inline_size,
1517 dir, d_name, 0, res_dir);
1518 if (ret == 1)
1519 goto out_find;
1520
1521out:
1522 brelse(iloc.bh);
1523 iloc.bh = NULL;
1524out_find:
1525 up_read(&EXT4_I(dir)->xattr_sem);
1526 return iloc.bh;
1527}
1528
1529int ext4_delete_inline_entry(handle_t *handle,
1530 struct inode *dir,
1531 struct ext4_dir_entry_2 *de_del,
1532 struct buffer_head *bh,
1533 int *has_inline_data)
1534{
1535 int err, inline_size;
1536 struct ext4_iloc iloc;
1537 void *inline_start;
1538
1539 err = ext4_get_inode_loc(dir, &iloc);
1540 if (err)
1541 return err;
1542
1543 down_write(&EXT4_I(dir)->xattr_sem);
1544 if (!ext4_has_inline_data(dir)) {
1545 *has_inline_data = 0;
1546 goto out;
1547 }
1548
1549 if ((void *)de_del - ((void *)ext4_raw_inode(&iloc)->i_block) <
1550 EXT4_MIN_INLINE_DATA_SIZE) {
1551 inline_start = (void *)ext4_raw_inode(&iloc)->i_block +
1552 EXT4_INLINE_DOTDOT_SIZE;
1553 inline_size = EXT4_MIN_INLINE_DATA_SIZE -
1554 EXT4_INLINE_DOTDOT_SIZE;
1555 } else {
1556 inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
1557 inline_size = ext4_get_inline_size(dir) -
1558 EXT4_MIN_INLINE_DATA_SIZE;
1559 }
1560
1561 err = ext4_journal_get_write_access(handle, bh);
1562 if (err)
1563 goto out;
1564
1565 err = ext4_generic_delete_entry(handle, dir, de_del, bh,
1566 inline_start, inline_size, 0);
1567 if (err)
1568 goto out;
1569
1570 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
1571 err = ext4_mark_inode_dirty(handle, dir);
1572 if (unlikely(err))
1573 goto out;
1574
1575 ext4_show_inline_dir(dir, iloc.bh, inline_start, inline_size);
1576out:
1577 up_write(&EXT4_I(dir)->xattr_sem);
1578 brelse(iloc.bh);
1579 if (err != -ENOENT)
1580 ext4_std_error(dir->i_sb, err);
1581 return err;
1582}
1583
1584/*
1585 * Get the inline dentry at offset.
1586 */
1587static inline struct ext4_dir_entry_2 *
1588ext4_get_inline_entry(struct inode *inode,
1589 struct ext4_iloc *iloc,
1590 unsigned int offset,
1591 void **inline_start,
1592 int *inline_size)
1593{
1594 void *inline_pos;
1595
1596 BUG_ON(offset > ext4_get_inline_size(inode));
1597
1598 if (offset < EXT4_MIN_INLINE_DATA_SIZE) {
1599 inline_pos = (void *)ext4_raw_inode(iloc)->i_block;
1600 *inline_size = EXT4_MIN_INLINE_DATA_SIZE;
1601 } else {
1602 inline_pos = ext4_get_inline_xattr_pos(inode, iloc);
1603 offset -= EXT4_MIN_INLINE_DATA_SIZE;
1604 *inline_size = ext4_get_inline_size(inode) -
1605 EXT4_MIN_INLINE_DATA_SIZE;
1606 }
1607
1608 if (inline_start)
1609 *inline_start = inline_pos;
1610 return (struct ext4_dir_entry_2 *)(inline_pos + offset);
1611}
1612
1613int empty_inline_dir(struct inode *dir, int *has_inline_data)
1614{
1615 int err, inline_size;
1616 struct ext4_iloc iloc;
1617 void *inline_pos;
1618 unsigned int offset;
1619 struct ext4_dir_entry_2 *de;
1620 int ret = 1;
1621
1622 err = ext4_get_inode_loc(dir, &iloc);
1623 if (err) {
1624 EXT4_ERROR_INODE(dir, "error %d getting inode %lu block",
1625 err, dir->i_ino);
1626 return 1;
1627 }
1628
1629 down_read(&EXT4_I(dir)->xattr_sem);
1630 if (!ext4_has_inline_data(dir)) {
1631 *has_inline_data = 0;
1632 goto out;
1633 }
1634
1635 de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block;
1636 if (!le32_to_cpu(de->inode)) {
1637 ext4_warning(dir->i_sb,
1638 "bad inline directory (dir #%lu) - no `..'",
1639 dir->i_ino);
1640 ret = 1;
1641 goto out;
1642 }
1643
1644 offset = EXT4_INLINE_DOTDOT_SIZE;
1645 while (offset < dir->i_size) {
1646 de = ext4_get_inline_entry(dir, &iloc, offset,
1647 &inline_pos, &inline_size);
1648 if (ext4_check_dir_entry(dir, NULL, de,
1649 iloc.bh, inline_pos,
1650 inline_size, offset)) {
1651 ext4_warning(dir->i_sb,
1652 "bad inline directory (dir #%lu) - "
1653 "inode %u, rec_len %u, name_len %d"
1654 "inline size %d\n",
1655 dir->i_ino, le32_to_cpu(de->inode),
1656 le16_to_cpu(de->rec_len), de->name_len,
1657 inline_size);
1658 ret = 1;
1659 goto out;
1660 }
1661 if (le32_to_cpu(de->inode)) {
1662 ret = 0;
1663 goto out;
1664 }
1665 offset += ext4_rec_len_from_disk(de->rec_len, inline_size);
1666 }
1667
1668out:
1669 up_read(&EXT4_I(dir)->xattr_sem);
1670 brelse(iloc.bh);
1671 return ret;
1672}
1673
1674int ext4_destroy_inline_data(handle_t *handle, struct inode *inode)
1675{
1676 int ret;
1677
1678 down_write(&EXT4_I(inode)->xattr_sem);
1679 ret = ext4_destroy_inline_data_nolock(handle, inode);
1680 up_write(&EXT4_I(inode)->xattr_sem);
1681
1682 return ret;
1683}
1684
1685int ext4_inline_data_fiemap(struct inode *inode,
1686 struct fiemap_extent_info *fieinfo,
1687 int *has_inline)
1688{
1689 __u64 physical = 0;
1690 __u64 length;
1691 __u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_LAST;
1692 int error = 0;
1693 struct ext4_iloc iloc;
1694
1695 down_read(&EXT4_I(inode)->xattr_sem);
1696 if (!ext4_has_inline_data(inode)) {
1697 *has_inline = 0;
1698 goto out;
1699 }
1700
1701 error = ext4_get_inode_loc(inode, &iloc);
1702 if (error)
1703 goto out;
1704
1705 physical = iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits;
1706 physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data;
1707 physical += offsetof(struct ext4_inode, i_block);
1708 length = i_size_read(inode);
1709
1710 if (physical)
1711 error = fiemap_fill_next_extent(fieinfo, 0, physical,
1712 length, flags);
1713 brelse(iloc.bh);
1714out:
1715 up_read(&EXT4_I(inode)->xattr_sem);
1716 return (error < 0 ? error : 0);
1717}
1718
1719/*
1720 * Called during xattr set, and if we can sparse space 'needed',
1721 * just create the extent tree evict the data to the outer block.
1722 *
1723 * We use jbd2 instead of page cache to move data to the 1st block
1724 * so that the whole transaction can be committed as a whole and
1725 * the data isn't lost because of the delayed page cache write.
1726 */
1727int ext4_try_to_evict_inline_data(handle_t *handle,
1728 struct inode *inode,
1729 int needed)
1730{
1731 int error;
1732 struct ext4_xattr_entry *entry;
1733 struct ext4_xattr_ibody_header *header;
1734 struct ext4_inode *raw_inode;
1735 struct ext4_iloc iloc;
1736
1737 error = ext4_get_inode_loc(inode, &iloc);
1738 if (error)
1739 return error;
1740
1741 raw_inode = ext4_raw_inode(&iloc);
1742 header = IHDR(inode, raw_inode);
1743 entry = (struct ext4_xattr_entry *)((void *)raw_inode +
1744 EXT4_I(inode)->i_inline_off);
1745 if (EXT4_XATTR_LEN(entry->e_name_len) +
1746 EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)) < needed) {
1747 error = -ENOSPC;
1748 goto out;
1749 }
1750
1751 error = ext4_convert_inline_data_nolock(handle, inode, &iloc);
1752out:
1753 brelse(iloc.bh);
1754 return error;
1755}
1756
1757void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
1758{
1759 handle_t *handle;
1760 int inline_size, value_len, needed_blocks;
1761 size_t i_size;
1762 void *value = NULL;
1763 struct ext4_xattr_ibody_find is = {
1764 .s = { .not_found = -ENODATA, },
1765 };
1766 struct ext4_xattr_info i = {
1767 .name_index = EXT4_XATTR_INDEX_SYSTEM,
1768 .name = EXT4_XATTR_SYSTEM_DATA,
1769 };
1770
1771
1772 needed_blocks = ext4_writepage_trans_blocks(inode);
1773 handle = ext4_journal_start(inode, needed_blocks);
1774 if (IS_ERR(handle))
1775 return;
1776
1777 down_write(&EXT4_I(inode)->xattr_sem);
1778 if (!ext4_has_inline_data(inode)) {
1779 *has_inline = 0;
1780 ext4_journal_stop(handle);
1781 return;
1782 }
1783
1784 if (ext4_orphan_add(handle, inode))
1785 goto out;
1786
1787 if (ext4_get_inode_loc(inode, &is.iloc))
1788 goto out;
1789
1790 down_write(&EXT4_I(inode)->i_data_sem);
1791 i_size = inode->i_size;
1792 inline_size = ext4_get_inline_size(inode);
1793 EXT4_I(inode)->i_disksize = i_size;
1794
1795 if (i_size < inline_size) {
1796 /* Clear the content in the xattr space. */
1797 if (inline_size > EXT4_MIN_INLINE_DATA_SIZE) {
1798 if (ext4_xattr_ibody_find(inode, &i, &is))
1799 goto out_error;
1800
1801 BUG_ON(is.s.not_found);
1802
1803 value_len = le32_to_cpu(is.s.here->e_value_size);
1804 value = kmalloc(value_len, GFP_NOFS);
1805 if (!value)
1806 goto out_error;
1807
1808 if (ext4_xattr_ibody_get(inode, i.name_index, i.name,
1809 value, value_len))
1810 goto out_error;
1811
1812 i.value = value;
1813 i.value_len = i_size > EXT4_MIN_INLINE_DATA_SIZE ?
1814 i_size - EXT4_MIN_INLINE_DATA_SIZE : 0;
1815 if (ext4_xattr_ibody_inline_set(handle, inode, &i, &is))
1816 goto out_error;
1817 }
1818
1819 /* Clear the content within i_blocks. */
1820 if (i_size < EXT4_MIN_INLINE_DATA_SIZE)
1821 memset(ext4_raw_inode(&is.iloc)->i_block + i_size, 0,
1822 EXT4_MIN_INLINE_DATA_SIZE - i_size);
1823
1824 EXT4_I(inode)->i_inline_size = i_size <
1825 EXT4_MIN_INLINE_DATA_SIZE ?
1826 EXT4_MIN_INLINE_DATA_SIZE : i_size;
1827 }
1828
1829out_error:
1830 up_write(&EXT4_I(inode)->i_data_sem);
1831out:
1832 brelse(is.iloc.bh);
1833 up_write(&EXT4_I(inode)->xattr_sem);
1834 kfree(value);
1835 if (inode->i_nlink)
1836 ext4_orphan_del(handle, inode);
1837
1838 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
1839 ext4_mark_inode_dirty(handle, inode);
1840 if (IS_SYNC(inode))
1841 ext4_handle_sync(handle);
1842
1843 ext4_journal_stop(handle);
1844 return;
1845}
1846
1847int ext4_convert_inline_data(struct inode *inode)
1848{
1849 int error, needed_blocks;
1850 handle_t *handle;
1851 struct ext4_iloc iloc;
1852
1853 if (!ext4_has_inline_data(inode)) {
1854 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
1855 return 0;
1856 }
1857
1858 needed_blocks = ext4_writepage_trans_blocks(inode);
1859
1860 iloc.bh = NULL;
1861 error = ext4_get_inode_loc(inode, &iloc);
1862 if (error)
1863 return error;
1864
1865 handle = ext4_journal_start(inode, needed_blocks);
1866 if (IS_ERR(handle)) {
1867 error = PTR_ERR(handle);
1868 goto out_free;
1869 }
1870
1871 down_write(&EXT4_I(inode)->xattr_sem);
1872 if (!ext4_has_inline_data(inode)) {
1873 up_write(&EXT4_I(inode)->xattr_sem);
1874 goto out;
1875 }
1876
1877 error = ext4_convert_inline_data_nolock(handle, inode, &iloc);
1878 up_write(&EXT4_I(inode)->xattr_sem);
1879out:
1880 ext4_journal_stop(handle);
1881out_free:
1882 brelse(iloc.bh);
1883 return error;
1884}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b3c243b9afa5..cb1c1ab2720b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -484,49 +484,6 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
484} 484}
485 485
486/* 486/*
487 * Sets the BH_Da_Mapped bit on the buffer heads corresponding to the given map.
488 */
489static void set_buffers_da_mapped(struct inode *inode,
490 struct ext4_map_blocks *map)
491{
492 struct address_space *mapping = inode->i_mapping;
493 struct pagevec pvec;
494 int i, nr_pages;
495 pgoff_t index, end;
496
497 index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
498 end = (map->m_lblk + map->m_len - 1) >>
499 (PAGE_CACHE_SHIFT - inode->i_blkbits);
500
501 pagevec_init(&pvec, 0);
502 while (index <= end) {
503 nr_pages = pagevec_lookup(&pvec, mapping, index,
504 min(end - index + 1,
505 (pgoff_t)PAGEVEC_SIZE));
506 if (nr_pages == 0)
507 break;
508 for (i = 0; i < nr_pages; i++) {
509 struct page *page = pvec.pages[i];
510 struct buffer_head *bh, *head;
511
512 if (unlikely(page->mapping != mapping) ||
513 !PageDirty(page))
514 break;
515
516 if (page_has_buffers(page)) {
517 bh = head = page_buffers(page);
518 do {
519 set_buffer_da_mapped(bh);
520 bh = bh->b_this_page;
521 } while (bh != head);
522 }
523 index++;
524 }
525 pagevec_release(&pvec);
526 }
527}
528
529/*
530 * The ext4_map_blocks() function tries to look up the requested blocks, 487 * The ext4_map_blocks() function tries to look up the requested blocks,
531 * and returns if the blocks are already mapped. 488 * and returns if the blocks are already mapped.
532 * 489 *
@@ -574,7 +531,16 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
574 up_read((&EXT4_I(inode)->i_data_sem)); 531 up_read((&EXT4_I(inode)->i_data_sem));
575 532
576 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 533 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
577 int ret = check_block_validity(inode, map); 534 int ret;
535 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
536 /* delayed alloc may be allocated by fallocate and
537 * coverted to initialized by directIO.
538 * we need to handle delayed extent here.
539 */
540 down_write((&EXT4_I(inode)->i_data_sem));
541 goto delayed_mapped;
542 }
543 ret = check_block_validity(inode, map);
578 if (ret != 0) 544 if (ret != 0)
579 return ret; 545 return ret;
580 } 546 }
@@ -652,12 +618,15 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
652 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) { 618 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
653 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); 619 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
654 620
655 /* If we have successfully mapped the delayed allocated blocks, 621 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
656 * set the BH_Da_Mapped bit on them. Its important to do this 622 int ret;
657 * under the protection of i_data_sem. 623delayed_mapped:
658 */ 624 /* delayed allocation blocks has been allocated */
659 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) 625 ret = ext4_es_remove_extent(inode, map->m_lblk,
660 set_buffers_da_mapped(inode, map); 626 map->m_len);
627 if (ret < 0)
628 retval = ret;
629 }
661 } 630 }
662 631
663 up_write((&EXT4_I(inode)->i_data_sem)); 632 up_write((&EXT4_I(inode)->i_data_sem));
@@ -680,10 +649,13 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
680 int ret = 0, started = 0; 649 int ret = 0, started = 0;
681 int dio_credits; 650 int dio_credits;
682 651
652 if (ext4_has_inline_data(inode))
653 return -ERANGE;
654
683 map.m_lblk = iblock; 655 map.m_lblk = iblock;
684 map.m_len = bh->b_size >> inode->i_blkbits; 656 map.m_len = bh->b_size >> inode->i_blkbits;
685 657
686 if (flags && !handle) { 658 if (flags && !(flags & EXT4_GET_BLOCKS_NO_LOCK) && !handle) {
687 /* Direct IO write... */ 659 /* Direct IO write... */
688 if (map.m_len > DIO_MAX_BLOCKS) 660 if (map.m_len > DIO_MAX_BLOCKS)
689 map.m_len = DIO_MAX_BLOCKS; 661 map.m_len = DIO_MAX_BLOCKS;
@@ -798,13 +770,13 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
798 return NULL; 770 return NULL;
799} 771}
800 772
801static int walk_page_buffers(handle_t *handle, 773int ext4_walk_page_buffers(handle_t *handle,
802 struct buffer_head *head, 774 struct buffer_head *head,
803 unsigned from, 775 unsigned from,
804 unsigned to, 776 unsigned to,
805 int *partial, 777 int *partial,
806 int (*fn)(handle_t *handle, 778 int (*fn)(handle_t *handle,
807 struct buffer_head *bh)) 779 struct buffer_head *bh))
808{ 780{
809 struct buffer_head *bh; 781 struct buffer_head *bh;
810 unsigned block_start, block_end; 782 unsigned block_start, block_end;
@@ -854,8 +826,8 @@ static int walk_page_buffers(handle_t *handle,
854 * is elevated. We'll still have enough credits for the tiny quotafile 826 * is elevated. We'll still have enough credits for the tiny quotafile
855 * write. 827 * write.
856 */ 828 */
857static int do_journal_get_write_access(handle_t *handle, 829int do_journal_get_write_access(handle_t *handle,
858 struct buffer_head *bh) 830 struct buffer_head *bh)
859{ 831{
860 int dirty = buffer_dirty(bh); 832 int dirty = buffer_dirty(bh);
861 int ret; 833 int ret;
@@ -878,7 +850,7 @@ static int do_journal_get_write_access(handle_t *handle,
878 return ret; 850 return ret;
879} 851}
880 852
881static int ext4_get_block_write(struct inode *inode, sector_t iblock, 853static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
882 struct buffer_head *bh_result, int create); 854 struct buffer_head *bh_result, int create);
883static int ext4_write_begin(struct file *file, struct address_space *mapping, 855static int ext4_write_begin(struct file *file, struct address_space *mapping,
884 loff_t pos, unsigned len, unsigned flags, 856 loff_t pos, unsigned len, unsigned flags,
@@ -902,6 +874,17 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
902 from = pos & (PAGE_CACHE_SIZE - 1); 874 from = pos & (PAGE_CACHE_SIZE - 1);
903 to = from + len; 875 to = from + len;
904 876
877 if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
878 ret = ext4_try_to_write_inline_data(mapping, inode, pos, len,
879 flags, pagep);
880 if (ret < 0)
881 goto out;
882 if (ret == 1) {
883 ret = 0;
884 goto out;
885 }
886 }
887
905retry: 888retry:
906 handle = ext4_journal_start(inode, needed_blocks); 889 handle = ext4_journal_start(inode, needed_blocks);
907 if (IS_ERR(handle)) { 890 if (IS_ERR(handle)) {
@@ -919,6 +902,7 @@ retry:
919 ret = -ENOMEM; 902 ret = -ENOMEM;
920 goto out; 903 goto out;
921 } 904 }
905
922 *pagep = page; 906 *pagep = page;
923 907
924 if (ext4_should_dioread_nolock(inode)) 908 if (ext4_should_dioread_nolock(inode))
@@ -927,8 +911,9 @@ retry:
927 ret = __block_write_begin(page, pos, len, ext4_get_block); 911 ret = __block_write_begin(page, pos, len, ext4_get_block);
928 912
929 if (!ret && ext4_should_journal_data(inode)) { 913 if (!ret && ext4_should_journal_data(inode)) {
930 ret = walk_page_buffers(handle, page_buffers(page), 914 ret = ext4_walk_page_buffers(handle, page_buffers(page),
931 from, to, NULL, do_journal_get_write_access); 915 from, to, NULL,
916 do_journal_get_write_access);
932 } 917 }
933 918
934 if (ret) { 919 if (ret) {
@@ -983,7 +968,12 @@ static int ext4_generic_write_end(struct file *file,
983 struct inode *inode = mapping->host; 968 struct inode *inode = mapping->host;
984 handle_t *handle = ext4_journal_current_handle(); 969 handle_t *handle = ext4_journal_current_handle();
985 970
986 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); 971 if (ext4_has_inline_data(inode))
972 copied = ext4_write_inline_data_end(inode, pos, len,
973 copied, page);
974 else
975 copied = block_write_end(file, mapping, pos,
976 len, copied, page, fsdata);
987 977
988 /* 978 /*
989 * No need to use i_size_read() here, the i_size 979 * No need to use i_size_read() here, the i_size
@@ -1134,16 +1124,21 @@ static int ext4_journalled_write_end(struct file *file,
1134 1124
1135 BUG_ON(!ext4_handle_valid(handle)); 1125 BUG_ON(!ext4_handle_valid(handle));
1136 1126
1137 if (copied < len) { 1127 if (ext4_has_inline_data(inode))
1138 if (!PageUptodate(page)) 1128 copied = ext4_write_inline_data_end(inode, pos, len,
1139 copied = 0; 1129 copied, page);
1140 page_zero_new_buffers(page, from+copied, to); 1130 else {
1141 } 1131 if (copied < len) {
1132 if (!PageUptodate(page))
1133 copied = 0;
1134 page_zero_new_buffers(page, from+copied, to);
1135 }
1142 1136
1143 ret = walk_page_buffers(handle, page_buffers(page), from, 1137 ret = ext4_walk_page_buffers(handle, page_buffers(page), from,
1144 to, &partial, write_end_fn); 1138 to, &partial, write_end_fn);
1145 if (!partial) 1139 if (!partial)
1146 SetPageUptodate(page); 1140 SetPageUptodate(page);
1141 }
1147 new_i_size = pos + copied; 1142 new_i_size = pos + copied;
1148 if (new_i_size > inode->i_size) 1143 if (new_i_size > inode->i_size)
1149 i_size_write(inode, pos+copied); 1144 i_size_write(inode, pos+copied);
@@ -1301,6 +1296,7 @@ static void ext4_da_page_release_reservation(struct page *page,
1301 struct inode *inode = page->mapping->host; 1296 struct inode *inode = page->mapping->host;
1302 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1297 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1303 int num_clusters; 1298 int num_clusters;
1299 ext4_fsblk_t lblk;
1304 1300
1305 head = page_buffers(page); 1301 head = page_buffers(page);
1306 bh = head; 1302 bh = head;
@@ -1310,20 +1306,23 @@ static void ext4_da_page_release_reservation(struct page *page,
1310 if ((offset <= curr_off) && (buffer_delay(bh))) { 1306 if ((offset <= curr_off) && (buffer_delay(bh))) {
1311 to_release++; 1307 to_release++;
1312 clear_buffer_delay(bh); 1308 clear_buffer_delay(bh);
1313 clear_buffer_da_mapped(bh);
1314 } 1309 }
1315 curr_off = next_off; 1310 curr_off = next_off;
1316 } while ((bh = bh->b_this_page) != head); 1311 } while ((bh = bh->b_this_page) != head);
1317 1312
1313 if (to_release) {
1314 lblk = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1315 ext4_es_remove_extent(inode, lblk, to_release);
1316 }
1317
1318 /* If we have released all the blocks belonging to a cluster, then we 1318 /* If we have released all the blocks belonging to a cluster, then we
1319 * need to release the reserved space for that cluster. */ 1319 * need to release the reserved space for that cluster. */
1320 num_clusters = EXT4_NUM_B2C(sbi, to_release); 1320 num_clusters = EXT4_NUM_B2C(sbi, to_release);
1321 while (num_clusters > 0) { 1321 while (num_clusters > 0) {
1322 ext4_fsblk_t lblk;
1323 lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) + 1322 lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) +
1324 ((num_clusters - 1) << sbi->s_cluster_bits); 1323 ((num_clusters - 1) << sbi->s_cluster_bits);
1325 if (sbi->s_cluster_ratio == 1 || 1324 if (sbi->s_cluster_ratio == 1 ||
1326 !ext4_find_delalloc_cluster(inode, lblk, 1)) 1325 !ext4_find_delalloc_cluster(inode, lblk))
1327 ext4_da_release_space(inode, 1); 1326 ext4_da_release_space(inode, 1);
1328 1327
1329 num_clusters--; 1328 num_clusters--;
@@ -1429,8 +1428,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1429 clear_buffer_delay(bh); 1428 clear_buffer_delay(bh);
1430 bh->b_blocknr = pblock; 1429 bh->b_blocknr = pblock;
1431 } 1430 }
1432 if (buffer_da_mapped(bh))
1433 clear_buffer_da_mapped(bh);
1434 if (buffer_unwritten(bh) || 1431 if (buffer_unwritten(bh) ||
1435 buffer_mapped(bh)) 1432 buffer_mapped(bh))
1436 BUG_ON(bh->b_blocknr != pblock); 1433 BUG_ON(bh->b_blocknr != pblock);
@@ -1500,9 +1497,16 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd)
1500 struct pagevec pvec; 1497 struct pagevec pvec;
1501 struct inode *inode = mpd->inode; 1498 struct inode *inode = mpd->inode;
1502 struct address_space *mapping = inode->i_mapping; 1499 struct address_space *mapping = inode->i_mapping;
1500 ext4_lblk_t start, last;
1503 1501
1504 index = mpd->first_page; 1502 index = mpd->first_page;
1505 end = mpd->next_page - 1; 1503 end = mpd->next_page - 1;
1504
1505 start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1506 last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1507 ext4_es_remove_extent(inode, start, last - start + 1);
1508
1509 pagevec_init(&pvec, 0);
1506 while (index <= end) { 1510 while (index <= end) {
1507 nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); 1511 nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
1508 if (nr_pages == 0) 1512 if (nr_pages == 0)
@@ -1656,15 +1660,6 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
1656 1660
1657 for (i = 0; i < map.m_len; i++) 1661 for (i = 0; i < map.m_len; i++)
1658 unmap_underlying_metadata(bdev, map.m_pblk + i); 1662 unmap_underlying_metadata(bdev, map.m_pblk + i);
1659
1660 if (ext4_should_order_data(mpd->inode)) {
1661 err = ext4_jbd2_file_inode(handle, mpd->inode);
1662 if (err) {
1663 /* Only if the journal is aborted */
1664 mpd->retval = err;
1665 goto submit_io;
1666 }
1667 }
1668 } 1663 }
1669 1664
1670 /* 1665 /*
@@ -1795,7 +1790,19 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1795 * file system block. 1790 * file system block.
1796 */ 1791 */
1797 down_read((&EXT4_I(inode)->i_data_sem)); 1792 down_read((&EXT4_I(inode)->i_data_sem));
1798 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 1793 if (ext4_has_inline_data(inode)) {
1794 /*
1795 * We will soon create blocks for this page, and let
1796 * us pretend as if the blocks aren't allocated yet.
1797 * In case of clusters, we have to handle the work
1798 * of mapping from cluster so that the reserved space
1799 * is calculated properly.
1800 */
1801 if ((EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) &&
1802 ext4_find_delalloc_cluster(inode, map->m_lblk))
1803 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
1804 retval = 0;
1805 } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
1799 retval = ext4_ext_map_blocks(NULL, inode, map, 0); 1806 retval = ext4_ext_map_blocks(NULL, inode, map, 0);
1800 else 1807 else
1801 retval = ext4_ind_map_blocks(NULL, inode, map, 0); 1808 retval = ext4_ind_map_blocks(NULL, inode, map, 0);
@@ -1814,6 +1821,10 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1814 goto out_unlock; 1821 goto out_unlock;
1815 } 1822 }
1816 1823
1824 retval = ext4_es_insert_extent(inode, map->m_lblk, map->m_len);
1825 if (retval)
1826 goto out_unlock;
1827
1817 /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served 1828 /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served
1818 * and it should not appear on the bh->b_state. 1829 * and it should not appear on the bh->b_state.
1819 */ 1830 */
@@ -1842,8 +1853,8 @@ out_unlock:
1842 * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev 1853 * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev
1843 * initialized properly. 1854 * initialized properly.
1844 */ 1855 */
1845static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, 1856int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
1846 struct buffer_head *bh, int create) 1857 struct buffer_head *bh, int create)
1847{ 1858{
1848 struct ext4_map_blocks map; 1859 struct ext4_map_blocks map;
1849 int ret = 0; 1860 int ret = 0;
@@ -1917,15 +1928,29 @@ static int __ext4_journalled_writepage(struct page *page,
1917{ 1928{
1918 struct address_space *mapping = page->mapping; 1929 struct address_space *mapping = page->mapping;
1919 struct inode *inode = mapping->host; 1930 struct inode *inode = mapping->host;
1920 struct buffer_head *page_bufs; 1931 struct buffer_head *page_bufs = NULL;
1921 handle_t *handle = NULL; 1932 handle_t *handle = NULL;
1922 int ret = 0; 1933 int ret = 0, err = 0;
1923 int err; 1934 int inline_data = ext4_has_inline_data(inode);
1935 struct buffer_head *inode_bh = NULL;
1924 1936
1925 ClearPageChecked(page); 1937 ClearPageChecked(page);
1926 page_bufs = page_buffers(page); 1938
1927 BUG_ON(!page_bufs); 1939 if (inline_data) {
1928 walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); 1940 BUG_ON(page->index != 0);
1941 BUG_ON(len > ext4_get_max_inline_size(inode));
1942 inode_bh = ext4_journalled_write_inline_data(inode, len, page);
1943 if (inode_bh == NULL)
1944 goto out;
1945 } else {
1946 page_bufs = page_buffers(page);
1947 if (!page_bufs) {
1948 BUG();
1949 goto out;
1950 }
1951 ext4_walk_page_buffers(handle, page_bufs, 0, len,
1952 NULL, bget_one);
1953 }
1929 /* As soon as we unlock the page, it can go away, but we have 1954 /* As soon as we unlock the page, it can go away, but we have
1930 * references to buffers so we are safe */ 1955 * references to buffers so we are safe */
1931 unlock_page(page); 1956 unlock_page(page);
@@ -1938,11 +1963,18 @@ static int __ext4_journalled_writepage(struct page *page,
1938 1963
1939 BUG_ON(!ext4_handle_valid(handle)); 1964 BUG_ON(!ext4_handle_valid(handle));
1940 1965
1941 ret = walk_page_buffers(handle, page_bufs, 0, len, NULL, 1966 if (inline_data) {
1942 do_journal_get_write_access); 1967 ret = ext4_journal_get_write_access(handle, inode_bh);
1968
1969 err = ext4_handle_dirty_metadata(handle, inode, inode_bh);
1943 1970
1944 err = walk_page_buffers(handle, page_bufs, 0, len, NULL, 1971 } else {
1945 write_end_fn); 1972 ret = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL,
1973 do_journal_get_write_access);
1974
1975 err = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL,
1976 write_end_fn);
1977 }
1946 if (ret == 0) 1978 if (ret == 0)
1947 ret = err; 1979 ret = err;
1948 EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; 1980 EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
@@ -1950,9 +1982,12 @@ static int __ext4_journalled_writepage(struct page *page,
1950 if (!ret) 1982 if (!ret)
1951 ret = err; 1983 ret = err;
1952 1984
1953 walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one); 1985 if (!ext4_has_inline_data(inode))
1986 ext4_walk_page_buffers(handle, page_bufs, 0, len,
1987 NULL, bput_one);
1954 ext4_set_inode_state(inode, EXT4_STATE_JDATA); 1988 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
1955out: 1989out:
1990 brelse(inode_bh);
1956 return ret; 1991 return ret;
1957} 1992}
1958 1993
@@ -2029,8 +2064,8 @@ static int ext4_writepage(struct page *page,
2029 commit_write = 1; 2064 commit_write = 1;
2030 } 2065 }
2031 page_bufs = page_buffers(page); 2066 page_bufs = page_buffers(page);
2032 if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, 2067 if (ext4_walk_page_buffers(NULL, page_bufs, 0, len, NULL,
2033 ext4_bh_delay_or_unwritten)) { 2068 ext4_bh_delay_or_unwritten)) {
2034 /* 2069 /*
2035 * We don't want to do block allocation, so redirty 2070 * We don't want to do block allocation, so redirty
2036 * the page and return. We may reach here when we do 2071 * the page and return. We may reach here when we do
@@ -2096,7 +2131,8 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
2096 * mpage_da_map_and_submit to map a single contiguous memory region 2131 * mpage_da_map_and_submit to map a single contiguous memory region
2097 * and then write them. 2132 * and then write them.
2098 */ 2133 */
2099static int write_cache_pages_da(struct address_space *mapping, 2134static int write_cache_pages_da(handle_t *handle,
2135 struct address_space *mapping,
2100 struct writeback_control *wbc, 2136 struct writeback_control *wbc,
2101 struct mpage_da_data *mpd, 2137 struct mpage_da_data *mpd,
2102 pgoff_t *done_index) 2138 pgoff_t *done_index)
@@ -2175,6 +2211,17 @@ static int write_cache_pages_da(struct address_space *mapping,
2175 wait_on_page_writeback(page); 2211 wait_on_page_writeback(page);
2176 BUG_ON(PageWriteback(page)); 2212 BUG_ON(PageWriteback(page));
2177 2213
2214 /*
2215 * If we have inline data and arrive here, it means that
2216 * we will soon create the block for the 1st page, so
2217 * we'd better clear the inline data here.
2218 */
2219 if (ext4_has_inline_data(inode)) {
2220 BUG_ON(ext4_test_inode_state(inode,
2221 EXT4_STATE_MAY_INLINE_DATA));
2222 ext4_destroy_inline_data(handle, inode);
2223 }
2224
2178 if (mpd->next_page != page->index) 2225 if (mpd->next_page != page->index)
2179 mpd->first_page = page->index; 2226 mpd->first_page = page->index;
2180 mpd->next_page = page->index + 1; 2227 mpd->next_page = page->index + 1;
@@ -2381,7 +2428,8 @@ retry:
2381 * contiguous region of logical blocks that need 2428 * contiguous region of logical blocks that need
2382 * blocks to be allocated by ext4 and submit them. 2429 * blocks to be allocated by ext4 and submit them.
2383 */ 2430 */
2384 ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index); 2431 ret = write_cache_pages_da(handle, mapping,
2432 wbc, &mpd, &done_index);
2385 /* 2433 /*
2386 * If we have a contiguous extent of pages and we 2434 * If we have a contiguous extent of pages and we
2387 * haven't done the I/O yet, map the blocks and submit 2435 * haven't done the I/O yet, map the blocks and submit
@@ -2445,7 +2493,6 @@ out_writepages:
2445 return ret; 2493 return ret;
2446} 2494}
2447 2495
2448#define FALL_BACK_TO_NONDELALLOC 1
2449static int ext4_nonda_switch(struct super_block *sb) 2496static int ext4_nonda_switch(struct super_block *sb)
2450{ 2497{
2451 s64 free_blocks, dirty_blocks; 2498 s64 free_blocks, dirty_blocks;
@@ -2502,6 +2549,19 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2502 } 2549 }
2503 *fsdata = (void *)0; 2550 *fsdata = (void *)0;
2504 trace_ext4_da_write_begin(inode, pos, len, flags); 2551 trace_ext4_da_write_begin(inode, pos, len, flags);
2552
2553 if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
2554 ret = ext4_da_write_inline_data_begin(mapping, inode,
2555 pos, len, flags,
2556 pagep, fsdata);
2557 if (ret < 0)
2558 goto out;
2559 if (ret == 1) {
2560 ret = 0;
2561 goto out;
2562 }
2563 }
2564
2505retry: 2565retry:
2506 /* 2566 /*
2507 * With delayed allocation, we don't log the i_disksize update 2567 * With delayed allocation, we don't log the i_disksize update
@@ -2603,22 +2663,13 @@ static int ext4_da_write_end(struct file *file,
2603 * changes. So let's piggyback the i_disksize mark_inode_dirty 2663 * changes. So let's piggyback the i_disksize mark_inode_dirty
2604 * into that. 2664 * into that.
2605 */ 2665 */
2606
2607 new_i_size = pos + copied; 2666 new_i_size = pos + copied;
2608 if (copied && new_i_size > EXT4_I(inode)->i_disksize) { 2667 if (copied && new_i_size > EXT4_I(inode)->i_disksize) {
2609 if (ext4_da_should_update_i_disksize(page, end)) { 2668 if (ext4_has_inline_data(inode) ||
2669 ext4_da_should_update_i_disksize(page, end)) {
2610 down_write(&EXT4_I(inode)->i_data_sem); 2670 down_write(&EXT4_I(inode)->i_data_sem);
2611 if (new_i_size > EXT4_I(inode)->i_disksize) { 2671 if (new_i_size > EXT4_I(inode)->i_disksize)
2612 /*
2613 * Updating i_disksize when extending file
2614 * without needing block allocation
2615 */
2616 if (ext4_should_order_data(inode))
2617 ret = ext4_jbd2_file_inode(handle,
2618 inode);
2619
2620 EXT4_I(inode)->i_disksize = new_i_size; 2672 EXT4_I(inode)->i_disksize = new_i_size;
2621 }
2622 up_write(&EXT4_I(inode)->i_data_sem); 2673 up_write(&EXT4_I(inode)->i_data_sem);
2623 /* We need to mark inode dirty even if 2674 /* We need to mark inode dirty even if
2624 * new_i_size is less that inode->i_size 2675 * new_i_size is less that inode->i_size
@@ -2627,8 +2678,16 @@ static int ext4_da_write_end(struct file *file,
2627 ext4_mark_inode_dirty(handle, inode); 2678 ext4_mark_inode_dirty(handle, inode);
2628 } 2679 }
2629 } 2680 }
2630 ret2 = generic_write_end(file, mapping, pos, len, copied, 2681
2682 if (write_mode != CONVERT_INLINE_DATA &&
2683 ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) &&
2684 ext4_has_inline_data(inode))
2685 ret2 = ext4_da_write_inline_data_end(inode, pos, len, copied,
2686 page);
2687 else
2688 ret2 = generic_write_end(file, mapping, pos, len, copied,
2631 page, fsdata); 2689 page, fsdata);
2690
2632 copied = ret2; 2691 copied = ret2;
2633 if (ret2 < 0) 2692 if (ret2 < 0)
2634 ret = ret2; 2693 ret = ret2;
@@ -2721,6 +2780,12 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
2721 journal_t *journal; 2780 journal_t *journal;
2722 int err; 2781 int err;
2723 2782
2783 /*
2784 * We can get here for an inline file via the FIBMAP ioctl
2785 */
2786 if (ext4_has_inline_data(inode))
2787 return 0;
2788
2724 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && 2789 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
2725 test_opt(inode->i_sb, DELALLOC)) { 2790 test_opt(inode->i_sb, DELALLOC)) {
2726 /* 2791 /*
@@ -2766,14 +2831,30 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
2766 2831
2767static int ext4_readpage(struct file *file, struct page *page) 2832static int ext4_readpage(struct file *file, struct page *page)
2768{ 2833{
2834 int ret = -EAGAIN;
2835 struct inode *inode = page->mapping->host;
2836
2769 trace_ext4_readpage(page); 2837 trace_ext4_readpage(page);
2770 return mpage_readpage(page, ext4_get_block); 2838
2839 if (ext4_has_inline_data(inode))
2840 ret = ext4_readpage_inline(inode, page);
2841
2842 if (ret == -EAGAIN)
2843 return mpage_readpage(page, ext4_get_block);
2844
2845 return ret;
2771} 2846}
2772 2847
2773static int 2848static int
2774ext4_readpages(struct file *file, struct address_space *mapping, 2849ext4_readpages(struct file *file, struct address_space *mapping,
2775 struct list_head *pages, unsigned nr_pages) 2850 struct list_head *pages, unsigned nr_pages)
2776{ 2851{
2852 struct inode *inode = mapping->host;
2853
2854 /* If the file has inline data, no need to do readpages. */
2855 if (ext4_has_inline_data(inode))
2856 return 0;
2857
2777 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); 2858 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
2778} 2859}
2779 2860
@@ -2840,7 +2921,7 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
2840 * We allocate an uinitialized extent if blocks haven't been allocated. 2921 * We allocate an uinitialized extent if blocks haven't been allocated.
2841 * The extent will be converted to initialized after the IO is complete. 2922 * The extent will be converted to initialized after the IO is complete.
2842 */ 2923 */
2843static int ext4_get_block_write(struct inode *inode, sector_t iblock, 2924int ext4_get_block_write(struct inode *inode, sector_t iblock,
2844 struct buffer_head *bh_result, int create) 2925 struct buffer_head *bh_result, int create)
2845{ 2926{
2846 ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", 2927 ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n",
@@ -2850,29 +2931,12 @@ static int ext4_get_block_write(struct inode *inode, sector_t iblock,
2850} 2931}
2851 2932
2852static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock, 2933static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
2853 struct buffer_head *bh_result, int flags) 2934 struct buffer_head *bh_result, int create)
2854{ 2935{
2855 handle_t *handle = ext4_journal_current_handle(); 2936 ext4_debug("ext4_get_block_write_nolock: inode %lu, create flag %d\n",
2856 struct ext4_map_blocks map; 2937 inode->i_ino, create);
2857 int ret = 0; 2938 return _ext4_get_block(inode, iblock, bh_result,
2858 2939 EXT4_GET_BLOCKS_NO_LOCK);
2859 ext4_debug("ext4_get_block_write_nolock: inode %lu, flag %d\n",
2860 inode->i_ino, flags);
2861
2862 flags = EXT4_GET_BLOCKS_NO_LOCK;
2863
2864 map.m_lblk = iblock;
2865 map.m_len = bh_result->b_size >> inode->i_blkbits;
2866
2867 ret = ext4_map_blocks(handle, inode, &map, flags);
2868 if (ret > 0) {
2869 map_bh(bh_result, inode->i_sb, map.m_pblk);
2870 bh_result->b_state = (bh_result->b_state & ~EXT4_MAP_FLAGS) |
2871 map.m_flags;
2872 bh_result->b_size = inode->i_sb->s_blocksize * map.m_len;
2873 ret = 0;
2874 }
2875 return ret;
2876} 2940}
2877 2941
2878static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, 2942static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
@@ -2978,10 +3042,10 @@ retry:
2978 * fall back to buffered IO. 3042 * fall back to buffered IO.
2979 * 3043 *
2980 * For holes, we fallocate those blocks, mark them as uninitialized 3044 * For holes, we fallocate those blocks, mark them as uninitialized
2981 * If those blocks were preallocated, we mark sure they are splited, but 3045 * If those blocks were preallocated, we mark sure they are split, but
2982 * still keep the range to write as uninitialized. 3046 * still keep the range to write as uninitialized.
2983 * 3047 *
2984 * The unwrritten extents will be converted to written when DIO is completed. 3048 * The unwritten extents will be converted to written when DIO is completed.
2985 * For async direct IO, since the IO may still pending when return, we 3049 * For async direct IO, since the IO may still pending when return, we
2986 * set up an end_io call back function, which will do the conversion 3050 * set up an end_io call back function, which will do the conversion
2987 * when async direct IO completed. 3051 * when async direct IO completed.
@@ -2999,125 +3063,120 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
2999 struct inode *inode = file->f_mapping->host; 3063 struct inode *inode = file->f_mapping->host;
3000 ssize_t ret; 3064 ssize_t ret;
3001 size_t count = iov_length(iov, nr_segs); 3065 size_t count = iov_length(iov, nr_segs);
3002 3066 int overwrite = 0;
3067 get_block_t *get_block_func = NULL;
3068 int dio_flags = 0;
3003 loff_t final_size = offset + count; 3069 loff_t final_size = offset + count;
3004 if (rw == WRITE && final_size <= inode->i_size) {
3005 int overwrite = 0;
3006 3070
3007 BUG_ON(iocb->private == NULL); 3071 /* Use the old path for reads and writes beyond i_size. */
3072 if (rw != WRITE || final_size > inode->i_size)
3073 return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
3008 3074
3009 /* If we do a overwrite dio, i_mutex locking can be released */ 3075 BUG_ON(iocb->private == NULL);
3010 overwrite = *((int *)iocb->private);
3011 3076
3012 if (overwrite) { 3077 /* If we do a overwrite dio, i_mutex locking can be released */
3013 atomic_inc(&inode->i_dio_count); 3078 overwrite = *((int *)iocb->private);
3014 down_read(&EXT4_I(inode)->i_data_sem);
3015 mutex_unlock(&inode->i_mutex);
3016 }
3017 3079
3018 /* 3080 if (overwrite) {
3019 * We could direct write to holes and fallocate. 3081 atomic_inc(&inode->i_dio_count);
3020 * 3082 down_read(&EXT4_I(inode)->i_data_sem);
3021 * Allocated blocks to fill the hole are marked as uninitialized 3083 mutex_unlock(&inode->i_mutex);
3022 * to prevent parallel buffered read to expose the stale data 3084 }
3023 * before DIO complete the data IO.
3024 *
3025 * As to previously fallocated extents, ext4 get_block
3026 * will just simply mark the buffer mapped but still
3027 * keep the extents uninitialized.
3028 *
3029 * for non AIO case, we will convert those unwritten extents
3030 * to written after return back from blockdev_direct_IO.
3031 *
3032 * for async DIO, the conversion needs to be defered when
3033 * the IO is completed. The ext4 end_io callback function
3034 * will be called to take care of the conversion work.
3035 * Here for async case, we allocate an io_end structure to
3036 * hook to the iocb.
3037 */
3038 iocb->private = NULL;
3039 ext4_inode_aio_set(inode, NULL);
3040 if (!is_sync_kiocb(iocb)) {
3041 ext4_io_end_t *io_end =
3042 ext4_init_io_end(inode, GFP_NOFS);
3043 if (!io_end) {
3044 ret = -ENOMEM;
3045 goto retake_lock;
3046 }
3047 io_end->flag |= EXT4_IO_END_DIRECT;
3048 iocb->private = io_end;
3049 /*
3050 * we save the io structure for current async
3051 * direct IO, so that later ext4_map_blocks()
3052 * could flag the io structure whether there
3053 * is a unwritten extents needs to be converted
3054 * when IO is completed.
3055 */
3056 ext4_inode_aio_set(inode, io_end);
3057 }
3058 3085
3059 if (overwrite) 3086 /*
3060 ret = __blockdev_direct_IO(rw, iocb, inode, 3087 * We could direct write to holes and fallocate.
3061 inode->i_sb->s_bdev, iov, 3088 *
3062 offset, nr_segs, 3089 * Allocated blocks to fill the hole are marked as
3063 ext4_get_block_write_nolock, 3090 * uninitialized to prevent parallel buffered read to expose
3064 ext4_end_io_dio, 3091 * the stale data before DIO complete the data IO.
3065 NULL, 3092 *
3066 0); 3093 * As to previously fallocated extents, ext4 get_block will
3067 else 3094 * just simply mark the buffer mapped but still keep the
3068 ret = __blockdev_direct_IO(rw, iocb, inode, 3095 * extents uninitialized.
3069 inode->i_sb->s_bdev, iov, 3096 *
3070 offset, nr_segs, 3097 * For non AIO case, we will convert those unwritten extents
3071 ext4_get_block_write, 3098 * to written after return back from blockdev_direct_IO.
3072 ext4_end_io_dio, 3099 *
3073 NULL, 3100 * For async DIO, the conversion needs to be deferred when the
3074 DIO_LOCKING); 3101 * IO is completed. The ext4 end_io callback function will be
3075 if (iocb->private) 3102 * called to take care of the conversion work. Here for async
3076 ext4_inode_aio_set(inode, NULL); 3103 * case, we allocate an io_end structure to hook to the iocb.
3104 */
3105 iocb->private = NULL;
3106 ext4_inode_aio_set(inode, NULL);
3107 if (!is_sync_kiocb(iocb)) {
3108 ext4_io_end_t *io_end = ext4_init_io_end(inode, GFP_NOFS);
3109 if (!io_end) {
3110 ret = -ENOMEM;
3111 goto retake_lock;
3112 }
3113 io_end->flag |= EXT4_IO_END_DIRECT;
3114 iocb->private = io_end;
3077 /* 3115 /*
3078 * The io_end structure takes a reference to the inode, 3116 * we save the io structure for current async direct
3079 * that structure needs to be destroyed and the 3117 * IO, so that later ext4_map_blocks() could flag the
3080 * reference to the inode need to be dropped, when IO is 3118 * io structure whether there is a unwritten extents
3081 * complete, even with 0 byte write, or failed. 3119 * needs to be converted when IO is completed.
3082 *
3083 * In the successful AIO DIO case, the io_end structure will be
3084 * desctroyed and the reference to the inode will be dropped
3085 * after the end_io call back function is called.
3086 *
3087 * In the case there is 0 byte write, or error case, since
3088 * VFS direct IO won't invoke the end_io call back function,
3089 * we need to free the end_io structure here.
3090 */ 3120 */
3091 if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { 3121 ext4_inode_aio_set(inode, io_end);
3092 ext4_free_io_end(iocb->private); 3122 }
3093 iocb->private = NULL;
3094 } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
3095 EXT4_STATE_DIO_UNWRITTEN)) {
3096 int err;
3097 /*
3098 * for non AIO case, since the IO is already
3099 * completed, we could do the conversion right here
3100 */
3101 err = ext4_convert_unwritten_extents(inode,
3102 offset, ret);
3103 if (err < 0)
3104 ret = err;
3105 ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3106 }
3107 3123
3108 retake_lock: 3124 if (overwrite) {
3109 /* take i_mutex locking again if we do a ovewrite dio */ 3125 get_block_func = ext4_get_block_write_nolock;
3110 if (overwrite) { 3126 } else {
3111 inode_dio_done(inode); 3127 get_block_func = ext4_get_block_write;
3112 up_read(&EXT4_I(inode)->i_data_sem); 3128 dio_flags = DIO_LOCKING;
3113 mutex_lock(&inode->i_mutex); 3129 }
3114 } 3130 ret = __blockdev_direct_IO(rw, iocb, inode,
3131 inode->i_sb->s_bdev, iov,
3132 offset, nr_segs,
3133 get_block_func,
3134 ext4_end_io_dio,
3135 NULL,
3136 dio_flags);
3137
3138 if (iocb->private)
3139 ext4_inode_aio_set(inode, NULL);
3140 /*
3141 * The io_end structure takes a reference to the inode, that
3142 * structure needs to be destroyed and the reference to the
3143 * inode need to be dropped, when IO is complete, even with 0
3144 * byte write, or failed.
3145 *
3146 * In the successful AIO DIO case, the io_end structure will
3147 * be destroyed and the reference to the inode will be dropped
3148 * after the end_io call back function is called.
3149 *
3150 * In the case there is 0 byte write, or error case, since VFS
3151 * direct IO won't invoke the end_io call back function, we
3152 * need to free the end_io structure here.
3153 */
3154 if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) {
3155 ext4_free_io_end(iocb->private);
3156 iocb->private = NULL;
3157 } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
3158 EXT4_STATE_DIO_UNWRITTEN)) {
3159 int err;
3160 /*
3161 * for non AIO case, since the IO is already
3162 * completed, we could do the conversion right here
3163 */
3164 err = ext4_convert_unwritten_extents(inode,
3165 offset, ret);
3166 if (err < 0)
3167 ret = err;
3168 ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3169 }
3115 3170
3116 return ret; 3171retake_lock:
3172 /* take i_mutex locking again if we do a ovewrite dio */
3173 if (overwrite) {
3174 inode_dio_done(inode);
3175 up_read(&EXT4_I(inode)->i_data_sem);
3176 mutex_lock(&inode->i_mutex);
3117 } 3177 }
3118 3178
3119 /* for write the the end of file case, we fall back to old way */ 3179 return ret;
3120 return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
3121} 3180}
3122 3181
3123static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, 3182static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
@@ -3134,6 +3193,10 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
3134 if (ext4_should_journal_data(inode)) 3193 if (ext4_should_journal_data(inode))
3135 return 0; 3194 return 0;
3136 3195
3196 /* Let buffer I/O handle the inline data case. */
3197 if (ext4_has_inline_data(inode))
3198 return 0;
3199
3137 trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); 3200 trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
3138 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 3201 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3139 ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); 3202 ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
@@ -3531,6 +3594,14 @@ void ext4_truncate(struct inode *inode)
3531 if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) 3594 if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
3532 ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); 3595 ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
3533 3596
3597 if (ext4_has_inline_data(inode)) {
3598 int has_inline = 1;
3599
3600 ext4_inline_data_truncate(inode, &has_inline);
3601 if (has_inline)
3602 return;
3603 }
3604
3534 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 3605 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3535 ext4_ext_truncate(inode); 3606 ext4_ext_truncate(inode);
3536 else 3607 else
@@ -3756,6 +3827,19 @@ static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
3756 } 3827 }
3757} 3828}
3758 3829
3830static inline void ext4_iget_extra_inode(struct inode *inode,
3831 struct ext4_inode *raw_inode,
3832 struct ext4_inode_info *ei)
3833{
3834 __le32 *magic = (void *)raw_inode +
3835 EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize;
3836 if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
3837 ext4_set_inode_state(inode, EXT4_STATE_XATTR);
3838 ext4_find_inline_data_nolock(inode);
3839 } else
3840 EXT4_I(inode)->i_inline_off = 0;
3841}
3842
3759struct inode *ext4_iget(struct super_block *sb, unsigned long ino) 3843struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
3760{ 3844{
3761 struct ext4_iloc iloc; 3845 struct ext4_iloc iloc;
@@ -3826,6 +3910,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
3826 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); 3910 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
3827 3911
3828 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ 3912 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
3913 ei->i_inline_off = 0;
3829 ei->i_dir_start_lookup = 0; 3914 ei->i_dir_start_lookup = 0;
3830 ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); 3915 ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
3831 /* We now have enough fields to check if the inode was active or not. 3916 /* We now have enough fields to check if the inode was active or not.
@@ -3898,11 +3983,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
3898 ei->i_extra_isize = sizeof(struct ext4_inode) - 3983 ei->i_extra_isize = sizeof(struct ext4_inode) -
3899 EXT4_GOOD_OLD_INODE_SIZE; 3984 EXT4_GOOD_OLD_INODE_SIZE;
3900 } else { 3985 } else {
3901 __le32 *magic = (void *)raw_inode + 3986 ext4_iget_extra_inode(inode, raw_inode, ei);
3902 EXT4_GOOD_OLD_INODE_SIZE +
3903 ei->i_extra_isize;
3904 if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC))
3905 ext4_set_inode_state(inode, EXT4_STATE_XATTR);
3906 } 3987 }
3907 } 3988 }
3908 3989
@@ -3925,17 +4006,19 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
3925 ei->i_file_acl); 4006 ei->i_file_acl);
3926 ret = -EIO; 4007 ret = -EIO;
3927 goto bad_inode; 4008 goto bad_inode;
3928 } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 4009 } else if (!ext4_has_inline_data(inode)) {
3929 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 4010 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
3930 (S_ISLNK(inode->i_mode) && 4011 if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
3931 !ext4_inode_is_fast_symlink(inode))) 4012 (S_ISLNK(inode->i_mode) &&
3932 /* Validate extent which is part of inode */ 4013 !ext4_inode_is_fast_symlink(inode))))
3933 ret = ext4_ext_check_inode(inode); 4014 /* Validate extent which is part of inode */
3934 } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 4015 ret = ext4_ext_check_inode(inode);
3935 (S_ISLNK(inode->i_mode) && 4016 } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
3936 !ext4_inode_is_fast_symlink(inode))) { 4017 (S_ISLNK(inode->i_mode) &&
3937 /* Validate block references which are part of inode */ 4018 !ext4_inode_is_fast_symlink(inode))) {
3938 ret = ext4_ind_check_inode(inode); 4019 /* Validate block references which are part of inode */
4020 ret = ext4_ind_check_inode(inode);
4021 }
3939 } 4022 }
3940 if (ret) 4023 if (ret)
3941 goto bad_inode; 4024 goto bad_inode;
@@ -4122,9 +4205,10 @@ static int ext4_do_update_inode(handle_t *handle,
4122 cpu_to_le32(new_encode_dev(inode->i_rdev)); 4205 cpu_to_le32(new_encode_dev(inode->i_rdev));
4123 raw_inode->i_block[2] = 0; 4206 raw_inode->i_block[2] = 0;
4124 } 4207 }
4125 } else 4208 } else if (!ext4_has_inline_data(inode)) {
4126 for (block = 0; block < EXT4_N_BLOCKS; block++) 4209 for (block = 0; block < EXT4_N_BLOCKS; block++)
4127 raw_inode->i_block[block] = ei->i_data[block]; 4210 raw_inode->i_block[block] = ei->i_data[block];
4211 }
4128 4212
4129 raw_inode->i_disk_version = cpu_to_le32(inode->i_version); 4213 raw_inode->i_disk_version = cpu_to_le32(inode->i_version);
4130 if (ei->i_extra_isize) { 4214 if (ei->i_extra_isize) {
@@ -4811,8 +4895,9 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
4811 * journal_start/journal_stop which can block and take a long time 4895 * journal_start/journal_stop which can block and take a long time
4812 */ 4896 */
4813 if (page_has_buffers(page)) { 4897 if (page_has_buffers(page)) {
4814 if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, 4898 if (!ext4_walk_page_buffers(NULL, page_buffers(page),
4815 ext4_bh_unmapped)) { 4899 0, len, NULL,
4900 ext4_bh_unmapped)) {
4816 /* Wait so that we don't change page under IO */ 4901 /* Wait so that we don't change page under IO */
4817 wait_on_page_writeback(page); 4902 wait_on_page_writeback(page);
4818 ret = VM_FAULT_LOCKED; 4903 ret = VM_FAULT_LOCKED;
@@ -4833,7 +4918,7 @@ retry_alloc:
4833 } 4918 }
4834 ret = __block_page_mkwrite(vma, vmf, get_block); 4919 ret = __block_page_mkwrite(vma, vmf, get_block);
4835 if (!ret && ext4_should_journal_data(inode)) { 4920 if (!ret && ext4_should_journal_data(inode)) {
4836 if (walk_page_buffers(handle, page_buffers(page), 0, 4921 if (ext4_walk_page_buffers(handle, page_buffers(page), 0,
4837 PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { 4922 PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
4838 unlock_page(page); 4923 unlock_page(page);
4839 ret = VM_FAULT_SIGBUS; 4924 ret = VM_FAULT_SIGBUS;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 526e55358606..1bf6fe785c4f 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1373,7 +1373,7 @@ static int mb_find_extent(struct ext4_buddy *e4b, int block,
1373 ex->fe_start += next; 1373 ex->fe_start += next;
1374 1374
1375 while (needed > ex->fe_len && 1375 while (needed > ex->fe_len &&
1376 (buddy = mb_find_buddy(e4b, order, &max))) { 1376 mb_find_buddy(e4b, order, &max)) {
1377 1377
1378 if (block + 1 >= max) 1378 if (block + 1 >= max)
1379 break; 1379 break;
@@ -2607,9 +2607,17 @@ static void ext4_free_data_callback(struct super_block *sb,
2607 mb_debug(1, "gonna free %u blocks in group %u (0x%p):", 2607 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2608 entry->efd_count, entry->efd_group, entry); 2608 entry->efd_count, entry->efd_group, entry);
2609 2609
2610 if (test_opt(sb, DISCARD)) 2610 if (test_opt(sb, DISCARD)) {
2611 ext4_issue_discard(sb, entry->efd_group, 2611 err = ext4_issue_discard(sb, entry->efd_group,
2612 entry->efd_start_cluster, entry->efd_count); 2612 entry->efd_start_cluster,
2613 entry->efd_count);
2614 if (err && err != -EOPNOTSUPP)
2615 ext4_msg(sb, KERN_WARNING, "discard request in"
2616 " group:%d block:%d count:%d failed"
2617 " with %d", entry->efd_group,
2618 entry->efd_start_cluster,
2619 entry->efd_count, err);
2620 }
2613 2621
2614 err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b); 2622 err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
2615 /* we expect to find existing buddy because it's pinned */ 2623 /* we expect to find existing buddy because it's pinned */
@@ -4310,8 +4318,10 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4310repeat: 4318repeat:
4311 /* allocate space in core */ 4319 /* allocate space in core */
4312 *errp = ext4_mb_regular_allocator(ac); 4320 *errp = ext4_mb_regular_allocator(ac);
4313 if (*errp) 4321 if (*errp) {
4322 ext4_discard_allocated_blocks(ac);
4314 goto errout; 4323 goto errout;
4324 }
4315 4325
4316 /* as we've just preallocated more space than 4326 /* as we've just preallocated more space than
4317 * user requested orinally, we store allocated 4327 * user requested orinally, we store allocated
@@ -4333,10 +4343,10 @@ repeat:
4333 ac->ac_b_ex.fe_len = 0; 4343 ac->ac_b_ex.fe_len = 0;
4334 ac->ac_status = AC_STATUS_CONTINUE; 4344 ac->ac_status = AC_STATUS_CONTINUE;
4335 goto repeat; 4345 goto repeat;
4336 } else if (*errp) 4346 } else if (*errp) {
4337 errout:
4338 ext4_discard_allocated_blocks(ac); 4347 ext4_discard_allocated_blocks(ac);
4339 else { 4348 goto errout;
4349 } else {
4340 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); 4350 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
4341 ar->len = ac->ac_b_ex.fe_len; 4351 ar->len = ac->ac_b_ex.fe_len;
4342 } 4352 }
@@ -4347,6 +4357,7 @@ repeat:
4347 *errp = -ENOSPC; 4357 *errp = -ENOSPC;
4348 } 4358 }
4349 4359
4360errout:
4350 if (*errp) { 4361 if (*errp) {
4351 ac->ac_b_ex.fe_len = 0; 4362 ac->ac_b_ex.fe_len = 0;
4352 ar->len = 0; 4363 ar->len = 0;
@@ -4656,8 +4667,16 @@ do_more:
4656 * with group lock held. generate_buddy look at 4667 * with group lock held. generate_buddy look at
4657 * them with group lock_held 4668 * them with group lock_held
4658 */ 4669 */
4659 if (test_opt(sb, DISCARD)) 4670 if (test_opt(sb, DISCARD)) {
4660 ext4_issue_discard(sb, block_group, bit, count); 4671 err = ext4_issue_discard(sb, block_group, bit, count);
4672 if (err && err != -EOPNOTSUPP)
4673 ext4_msg(sb, KERN_WARNING, "discard request in"
4674 " group:%d block:%d count:%lu failed"
4675 " with %d", block_group, bit, count,
4676 err);
4677 }
4678
4679
4661 ext4_lock_group(sb, block_group); 4680 ext4_lock_group(sb, block_group);
4662 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); 4681 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4663 mb_free_blocks(inode, &e4b, bit, count_clusters); 4682 mb_free_blocks(inode, &e4b, bit, count_clusters);
@@ -4851,10 +4870,11 @@ error_return:
4851 * one will allocate those blocks, mark it as used in buddy bitmap. This must 4870 * one will allocate those blocks, mark it as used in buddy bitmap. This must
4852 * be called with under the group lock. 4871 * be called with under the group lock.
4853 */ 4872 */
4854static void ext4_trim_extent(struct super_block *sb, int start, int count, 4873static int ext4_trim_extent(struct super_block *sb, int start, int count,
4855 ext4_group_t group, struct ext4_buddy *e4b) 4874 ext4_group_t group, struct ext4_buddy *e4b)
4856{ 4875{
4857 struct ext4_free_extent ex; 4876 struct ext4_free_extent ex;
4877 int ret = 0;
4858 4878
4859 trace_ext4_trim_extent(sb, group, start, count); 4879 trace_ext4_trim_extent(sb, group, start, count);
4860 4880
@@ -4870,9 +4890,10 @@ static void ext4_trim_extent(struct super_block *sb, int start, int count,
4870 */ 4890 */
4871 mb_mark_used(e4b, &ex); 4891 mb_mark_used(e4b, &ex);
4872 ext4_unlock_group(sb, group); 4892 ext4_unlock_group(sb, group);
4873 ext4_issue_discard(sb, group, start, count); 4893 ret = ext4_issue_discard(sb, group, start, count);
4874 ext4_lock_group(sb, group); 4894 ext4_lock_group(sb, group);
4875 mb_free_blocks(NULL, e4b, start, ex.fe_len); 4895 mb_free_blocks(NULL, e4b, start, ex.fe_len);
4896 return ret;
4876} 4897}
4877 4898
4878/** 4899/**
@@ -4901,7 +4922,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
4901 void *bitmap; 4922 void *bitmap;
4902 ext4_grpblk_t next, count = 0, free_count = 0; 4923 ext4_grpblk_t next, count = 0, free_count = 0;
4903 struct ext4_buddy e4b; 4924 struct ext4_buddy e4b;
4904 int ret; 4925 int ret = 0;
4905 4926
4906 trace_ext4_trim_all_free(sb, group, start, max); 4927 trace_ext4_trim_all_free(sb, group, start, max);
4907 4928
@@ -4928,8 +4949,11 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
4928 next = mb_find_next_bit(bitmap, max + 1, start); 4949 next = mb_find_next_bit(bitmap, max + 1, start);
4929 4950
4930 if ((next - start) >= minblocks) { 4951 if ((next - start) >= minblocks) {
4931 ext4_trim_extent(sb, start, 4952 ret = ext4_trim_extent(sb, start,
4932 next - start, group, &e4b); 4953 next - start, group, &e4b);
4954 if (ret && ret != -EOPNOTSUPP)
4955 break;
4956 ret = 0;
4933 count += next - start; 4957 count += next - start;
4934 } 4958 }
4935 free_count += next - start; 4959 free_count += next - start;
@@ -4950,8 +4974,10 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
4950 break; 4974 break;
4951 } 4975 }
4952 4976
4953 if (!ret) 4977 if (!ret) {
4978 ret = count;
4954 EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info); 4979 EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
4980 }
4955out: 4981out:
4956 ext4_unlock_group(sb, group); 4982 ext4_unlock_group(sb, group);
4957 ext4_mb_unload_buddy(&e4b); 4983 ext4_mb_unload_buddy(&e4b);
@@ -4959,7 +4985,7 @@ out:
4959 ext4_debug("trimmed %d blocks in the group %d\n", 4985 ext4_debug("trimmed %d blocks in the group %d\n",
4960 count, group); 4986 count, group);
4961 4987
4962 return count; 4988 return ret;
4963} 4989}
4964 4990
4965/** 4991/**
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index f1bb32ec0169..db8226d595fa 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -14,6 +14,7 @@
14 14
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include "ext4_jbd2.h" 16#include "ext4_jbd2.h"
17#include "ext4_extents.h"
17 18
18/* 19/*
19 * The contiguous blocks details which can be 20 * The contiguous blocks details which can be
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 292daeeed455..d9cc5ee42f53 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -18,6 +18,7 @@
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include "ext4_jbd2.h" 19#include "ext4_jbd2.h"
20#include "ext4.h" 20#include "ext4.h"
21#include "ext4_extents.h"
21 22
22/** 23/**
23 * get_ext_path - Find an extent path for designated logical block number. 24 * get_ext_path - Find an extent path for designated logical block number.
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 6d600a69fc9d..cac448282331 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -202,13 +202,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
202 struct inode *inode); 202 struct inode *inode);
203 203
204/* checksumming functions */ 204/* checksumming functions */
205#define EXT4_DIRENT_TAIL(block, blocksize) \ 205void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
206 ((struct ext4_dir_entry_tail *)(((void *)(block)) + \ 206 unsigned int blocksize)
207 ((blocksize) - \
208 sizeof(struct ext4_dir_entry_tail))))
209
210static void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
211 unsigned int blocksize)
212{ 207{
213 memset(t, 0, sizeof(struct ext4_dir_entry_tail)); 208 memset(t, 0, sizeof(struct ext4_dir_entry_tail));
214 t->det_rec_len = ext4_rec_len_to_disk( 209 t->det_rec_len = ext4_rec_len_to_disk(
@@ -261,6 +256,12 @@ static __le32 ext4_dirent_csum(struct inode *inode,
261 return cpu_to_le32(csum); 256 return cpu_to_le32(csum);
262} 257}
263 258
259static void warn_no_space_for_csum(struct inode *inode)
260{
261 ext4_warning(inode->i_sb, "no space in directory inode %lu leaf for "
262 "checksum. Please run e2fsck -D.", inode->i_ino);
263}
264
264int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent) 265int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent)
265{ 266{
266 struct ext4_dir_entry_tail *t; 267 struct ext4_dir_entry_tail *t;
@@ -271,8 +272,7 @@ int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent)
271 272
272 t = get_dirent_tail(inode, dirent); 273 t = get_dirent_tail(inode, dirent);
273 if (!t) { 274 if (!t) {
274 EXT4_ERROR_INODE(inode, "metadata_csum set but no space in dir " 275 warn_no_space_for_csum(inode);
275 "leaf for checksum. Please run e2fsck -D.");
276 return 0; 276 return 0;
277 } 277 }
278 278
@@ -294,8 +294,7 @@ static void ext4_dirent_csum_set(struct inode *inode,
294 294
295 t = get_dirent_tail(inode, dirent); 295 t = get_dirent_tail(inode, dirent);
296 if (!t) { 296 if (!t) {
297 EXT4_ERROR_INODE(inode, "metadata_csum set but no space in dir " 297 warn_no_space_for_csum(inode);
298 "leaf for checksum. Please run e2fsck -D.");
299 return; 298 return;
300 } 299 }
301 300
@@ -303,9 +302,9 @@ static void ext4_dirent_csum_set(struct inode *inode,
303 (void *)t - (void *)dirent); 302 (void *)t - (void *)dirent);
304} 303}
305 304
306static inline int ext4_handle_dirty_dirent_node(handle_t *handle, 305int ext4_handle_dirty_dirent_node(handle_t *handle,
307 struct inode *inode, 306 struct inode *inode,
308 struct buffer_head *bh) 307 struct buffer_head *bh)
309{ 308{
310 ext4_dirent_csum_set(inode, (struct ext4_dir_entry *)bh->b_data); 309 ext4_dirent_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
311 return ext4_handle_dirty_metadata(handle, inode, bh); 310 return ext4_handle_dirty_metadata(handle, inode, bh);
@@ -377,8 +376,7 @@ static int ext4_dx_csum_verify(struct inode *inode,
377 count = le16_to_cpu(c->count); 376 count = le16_to_cpu(c->count);
378 if (count_offset + (limit * sizeof(struct dx_entry)) > 377 if (count_offset + (limit * sizeof(struct dx_entry)) >
379 EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) { 378 EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
380 EXT4_ERROR_INODE(inode, "metadata_csum set but no space for " 379 warn_no_space_for_csum(inode);
381 "tree checksum found. Run e2fsck -D.");
382 return 1; 380 return 1;
383 } 381 }
384 t = (struct dx_tail *)(((struct dx_entry *)c) + limit); 382 t = (struct dx_tail *)(((struct dx_entry *)c) + limit);
@@ -408,8 +406,7 @@ static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent)
408 count = le16_to_cpu(c->count); 406 count = le16_to_cpu(c->count);
409 if (count_offset + (limit * sizeof(struct dx_entry)) > 407 if (count_offset + (limit * sizeof(struct dx_entry)) >
410 EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) { 408 EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
411 EXT4_ERROR_INODE(inode, "metadata_csum set but no space for " 409 warn_no_space_for_csum(inode);
412 "tree checksum. Run e2fsck -D.");
413 return; 410 return;
414 } 411 }
415 t = (struct dx_tail *)(((struct dx_entry *)c) + limit); 412 t = (struct dx_tail *)(((struct dx_entry *)c) + limit);
@@ -890,6 +887,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
890 EXT4_DIR_REC_LEN(0)); 887 EXT4_DIR_REC_LEN(0));
891 for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) { 888 for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
892 if (ext4_check_dir_entry(dir, NULL, de, bh, 889 if (ext4_check_dir_entry(dir, NULL, de, bh,
890 bh->b_data, bh->b_size,
893 (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) 891 (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
894 + ((char *)de - bh->b_data))) { 892 + ((char *)de - bh->b_data))) {
895 /* On error, skip the f_pos to the next block. */ 893 /* On error, skip the f_pos to the next block. */
@@ -1007,6 +1005,15 @@ errout:
1007 return (err); 1005 return (err);
1008} 1006}
1009 1007
1008static inline int search_dirblock(struct buffer_head *bh,
1009 struct inode *dir,
1010 const struct qstr *d_name,
1011 unsigned int offset,
1012 struct ext4_dir_entry_2 **res_dir)
1013{
1014 return search_dir(bh, bh->b_data, dir->i_sb->s_blocksize, dir,
1015 d_name, offset, res_dir);
1016}
1010 1017
1011/* 1018/*
1012 * Directory block splitting, compacting 1019 * Directory block splitting, compacting
@@ -1081,13 +1088,6 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
1081 dx_set_count(entries, count + 1); 1088 dx_set_count(entries, count + 1);
1082} 1089}
1083 1090
1084static void ext4_update_dx_flag(struct inode *inode)
1085{
1086 if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
1087 EXT4_FEATURE_COMPAT_DIR_INDEX))
1088 ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
1089}
1090
1091/* 1091/*
1092 * NOTE! unlike strncmp, ext4_match returns 1 for success, 0 for failure. 1092 * NOTE! unlike strncmp, ext4_match returns 1 for success, 0 for failure.
1093 * 1093 *
@@ -1107,11 +1107,13 @@ static inline int ext4_match (int len, const char * const name,
1107/* 1107/*
1108 * Returns 0 if not found, -1 on failure, and 1 on success 1108 * Returns 0 if not found, -1 on failure, and 1 on success
1109 */ 1109 */
1110static inline int search_dirblock(struct buffer_head *bh, 1110int search_dir(struct buffer_head *bh,
1111 struct inode *dir, 1111 char *search_buf,
1112 const struct qstr *d_name, 1112 int buf_size,
1113 unsigned int offset, 1113 struct inode *dir,
1114 struct ext4_dir_entry_2 ** res_dir) 1114 const struct qstr *d_name,
1115 unsigned int offset,
1116 struct ext4_dir_entry_2 **res_dir)
1115{ 1117{
1116 struct ext4_dir_entry_2 * de; 1118 struct ext4_dir_entry_2 * de;
1117 char * dlimit; 1119 char * dlimit;
@@ -1119,8 +1121,8 @@ static inline int search_dirblock(struct buffer_head *bh,
1119 const char *name = d_name->name; 1121 const char *name = d_name->name;
1120 int namelen = d_name->len; 1122 int namelen = d_name->len;
1121 1123
1122 de = (struct ext4_dir_entry_2 *) bh->b_data; 1124 de = (struct ext4_dir_entry_2 *)search_buf;
1123 dlimit = bh->b_data + dir->i_sb->s_blocksize; 1125 dlimit = search_buf + buf_size;
1124 while ((char *) de < dlimit) { 1126 while ((char *) de < dlimit) {
1125 /* this code is executed quadratically often */ 1127 /* this code is executed quadratically often */
1126 /* do minimal checking `by hand' */ 1128 /* do minimal checking `by hand' */
@@ -1128,7 +1130,8 @@ static inline int search_dirblock(struct buffer_head *bh,
1128 if ((char *) de + namelen <= dlimit && 1130 if ((char *) de + namelen <= dlimit &&
1129 ext4_match (namelen, name, de)) { 1131 ext4_match (namelen, name, de)) {
1130 /* found a match - just to be sure, do a full check */ 1132 /* found a match - just to be sure, do a full check */
1131 if (ext4_check_dir_entry(dir, NULL, de, bh, offset)) 1133 if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data,
1134 bh->b_size, offset))
1132 return -1; 1135 return -1;
1133 *res_dir = de; 1136 *res_dir = de;
1134 return 1; 1137 return 1;
@@ -1144,6 +1147,21 @@ static inline int search_dirblock(struct buffer_head *bh,
1144 return 0; 1147 return 0;
1145} 1148}
1146 1149
1150static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
1151 struct ext4_dir_entry *de)
1152{
1153 struct super_block *sb = dir->i_sb;
1154
1155 if (!is_dx(dir))
1156 return 0;
1157 if (block == 0)
1158 return 1;
1159 if (de->inode == 0 &&
1160 ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) ==
1161 sb->s_blocksize)
1162 return 1;
1163 return 0;
1164}
1147 1165
1148/* 1166/*
1149 * ext4_find_entry() 1167 * ext4_find_entry()
@@ -1158,7 +1176,8 @@ static inline int search_dirblock(struct buffer_head *bh,
1158 */ 1176 */
1159static struct buffer_head * ext4_find_entry (struct inode *dir, 1177static struct buffer_head * ext4_find_entry (struct inode *dir,
1160 const struct qstr *d_name, 1178 const struct qstr *d_name,
1161 struct ext4_dir_entry_2 ** res_dir) 1179 struct ext4_dir_entry_2 **res_dir,
1180 int *inlined)
1162{ 1181{
1163 struct super_block *sb; 1182 struct super_block *sb;
1164 struct buffer_head *bh_use[NAMEI_RA_SIZE]; 1183 struct buffer_head *bh_use[NAMEI_RA_SIZE];
@@ -1179,6 +1198,18 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
1179 namelen = d_name->len; 1198 namelen = d_name->len;
1180 if (namelen > EXT4_NAME_LEN) 1199 if (namelen > EXT4_NAME_LEN)
1181 return NULL; 1200 return NULL;
1201
1202 if (ext4_has_inline_data(dir)) {
1203 int has_inline_data = 1;
1204 ret = ext4_find_inline_entry(dir, d_name, res_dir,
1205 &has_inline_data);
1206 if (has_inline_data) {
1207 if (inlined)
1208 *inlined = 1;
1209 return ret;
1210 }
1211 }
1212
1182 if ((namelen <= 2) && (name[0] == '.') && 1213 if ((namelen <= 2) && (name[0] == '.') &&
1183 (name[1] == '.' || name[1] == '\0')) { 1214 (name[1] == '.' || name[1] == '\0')) {
1184 /* 1215 /*
@@ -1244,6 +1275,8 @@ restart:
1244 goto next; 1275 goto next;
1245 } 1276 }
1246 if (!buffer_verified(bh) && 1277 if (!buffer_verified(bh) &&
1278 !is_dx_internal_node(dir, block,
1279 (struct ext4_dir_entry *)bh->b_data) &&
1247 !ext4_dirent_csum_verify(dir, 1280 !ext4_dirent_csum_verify(dir,
1248 (struct ext4_dir_entry *)bh->b_data)) { 1281 (struct ext4_dir_entry *)bh->b_data)) {
1249 EXT4_ERROR_INODE(dir, "checksumming directory " 1282 EXT4_ERROR_INODE(dir, "checksumming directory "
@@ -1361,7 +1394,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
1361 if (dentry->d_name.len > EXT4_NAME_LEN) 1394 if (dentry->d_name.len > EXT4_NAME_LEN)
1362 return ERR_PTR(-ENAMETOOLONG); 1395 return ERR_PTR(-ENAMETOOLONG);
1363 1396
1364 bh = ext4_find_entry(dir, &dentry->d_name, &de); 1397 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
1365 inode = NULL; 1398 inode = NULL;
1366 if (bh) { 1399 if (bh) {
1367 __u32 ino = le32_to_cpu(de->inode); 1400 __u32 ino = le32_to_cpu(de->inode);
@@ -1395,7 +1428,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
1395 struct ext4_dir_entry_2 * de; 1428 struct ext4_dir_entry_2 * de;
1396 struct buffer_head *bh; 1429 struct buffer_head *bh;
1397 1430
1398 bh = ext4_find_entry(child->d_inode, &dotdot, &de); 1431 bh = ext4_find_entry(child->d_inode, &dotdot, &de, NULL);
1399 if (!bh) 1432 if (!bh)
1400 return ERR_PTR(-ENOENT); 1433 return ERR_PTR(-ENOENT);
1401 ino = le32_to_cpu(de->inode); 1434 ino = le32_to_cpu(de->inode);
@@ -1593,6 +1626,63 @@ errout:
1593 return NULL; 1626 return NULL;
1594} 1627}
1595 1628
1629int ext4_find_dest_de(struct inode *dir, struct inode *inode,
1630 struct buffer_head *bh,
1631 void *buf, int buf_size,
1632 const char *name, int namelen,
1633 struct ext4_dir_entry_2 **dest_de)
1634{
1635 struct ext4_dir_entry_2 *de;
1636 unsigned short reclen = EXT4_DIR_REC_LEN(namelen);
1637 int nlen, rlen;
1638 unsigned int offset = 0;
1639 char *top;
1640
1641 de = (struct ext4_dir_entry_2 *)buf;
1642 top = buf + buf_size - reclen;
1643 while ((char *) de <= top) {
1644 if (ext4_check_dir_entry(dir, NULL, de, bh,
1645 buf, buf_size, offset))
1646 return -EIO;
1647 if (ext4_match(namelen, name, de))
1648 return -EEXIST;
1649 nlen = EXT4_DIR_REC_LEN(de->name_len);
1650 rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
1651 if ((de->inode ? rlen - nlen : rlen) >= reclen)
1652 break;
1653 de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
1654 offset += rlen;
1655 }
1656 if ((char *) de > top)
1657 return -ENOSPC;
1658
1659 *dest_de = de;
1660 return 0;
1661}
1662
1663void ext4_insert_dentry(struct inode *inode,
1664 struct ext4_dir_entry_2 *de,
1665 int buf_size,
1666 const char *name, int namelen)
1667{
1668
1669 int nlen, rlen;
1670
1671 nlen = EXT4_DIR_REC_LEN(de->name_len);
1672 rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
1673 if (de->inode) {
1674 struct ext4_dir_entry_2 *de1 =
1675 (struct ext4_dir_entry_2 *)((char *)de + nlen);
1676 de1->rec_len = ext4_rec_len_to_disk(rlen - nlen, buf_size);
1677 de->rec_len = ext4_rec_len_to_disk(nlen, buf_size);
1678 de = de1;
1679 }
1680 de->file_type = EXT4_FT_UNKNOWN;
1681 de->inode = cpu_to_le32(inode->i_ino);
1682 ext4_set_de_type(inode->i_sb, de, inode->i_mode);
1683 de->name_len = namelen;
1684 memcpy(de->name, name, namelen);
1685}
1596/* 1686/*
1597 * Add a new entry into a directory (leaf) block. If de is non-NULL, 1687 * Add a new entry into a directory (leaf) block. If de is non-NULL,
1598 * it points to a directory entry which is guaranteed to be large 1688 * it points to a directory entry which is guaranteed to be large
@@ -1608,12 +1698,10 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1608 struct inode *dir = dentry->d_parent->d_inode; 1698 struct inode *dir = dentry->d_parent->d_inode;
1609 const char *name = dentry->d_name.name; 1699 const char *name = dentry->d_name.name;
1610 int namelen = dentry->d_name.len; 1700 int namelen = dentry->d_name.len;
1611 unsigned int offset = 0;
1612 unsigned int blocksize = dir->i_sb->s_blocksize; 1701 unsigned int blocksize = dir->i_sb->s_blocksize;
1613 unsigned short reclen; 1702 unsigned short reclen;
1614 int nlen, rlen, err;
1615 char *top;
1616 int csum_size = 0; 1703 int csum_size = 0;
1704 int err;
1617 1705
1618 if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 1706 if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
1619 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 1707 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
@@ -1621,22 +1709,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1621 1709
1622 reclen = EXT4_DIR_REC_LEN(namelen); 1710 reclen = EXT4_DIR_REC_LEN(namelen);
1623 if (!de) { 1711 if (!de) {
1624 de = (struct ext4_dir_entry_2 *)bh->b_data; 1712 err = ext4_find_dest_de(dir, inode,
1625 top = bh->b_data + (blocksize - csum_size) - reclen; 1713 bh, bh->b_data, blocksize - csum_size,
1626 while ((char *) de <= top) { 1714 name, namelen, &de);
1627 if (ext4_check_dir_entry(dir, NULL, de, bh, offset)) 1715 if (err)
1628 return -EIO; 1716 return err;
1629 if (ext4_match(namelen, name, de))
1630 return -EEXIST;
1631 nlen = EXT4_DIR_REC_LEN(de->name_len);
1632 rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
1633 if ((de->inode? rlen - nlen: rlen) >= reclen)
1634 break;
1635 de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
1636 offset += rlen;
1637 }
1638 if ((char *) de > top)
1639 return -ENOSPC;
1640 } 1717 }
1641 BUFFER_TRACE(bh, "get_write_access"); 1718 BUFFER_TRACE(bh, "get_write_access");
1642 err = ext4_journal_get_write_access(handle, bh); 1719 err = ext4_journal_get_write_access(handle, bh);
@@ -1646,19 +1723,8 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1646 } 1723 }
1647 1724
1648 /* By now the buffer is marked for journaling */ 1725 /* By now the buffer is marked for journaling */
1649 nlen = EXT4_DIR_REC_LEN(de->name_len); 1726 ext4_insert_dentry(inode, de, blocksize, name, namelen);
1650 rlen = ext4_rec_len_from_disk(de->rec_len, blocksize); 1727
1651 if (de->inode) {
1652 struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen);
1653 de1->rec_len = ext4_rec_len_to_disk(rlen - nlen, blocksize);
1654 de->rec_len = ext4_rec_len_to_disk(nlen, blocksize);
1655 de = de1;
1656 }
1657 de->file_type = EXT4_FT_UNKNOWN;
1658 de->inode = cpu_to_le32(inode->i_ino);
1659 ext4_set_de_type(dir->i_sb, de, inode->i_mode);
1660 de->name_len = namelen;
1661 memcpy(de->name, name, namelen);
1662 /* 1728 /*
1663 * XXX shouldn't update any times until successful 1729 * XXX shouldn't update any times until successful
1664 * completion of syscall, but too many callers depend 1730 * completion of syscall, but too many callers depend
@@ -1831,6 +1897,17 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1831 blocksize = sb->s_blocksize; 1897 blocksize = sb->s_blocksize;
1832 if (!dentry->d_name.len) 1898 if (!dentry->d_name.len)
1833 return -EINVAL; 1899 return -EINVAL;
1900
1901 if (ext4_has_inline_data(dir)) {
1902 retval = ext4_try_add_inline_entry(handle, dentry, inode);
1903 if (retval < 0)
1904 return retval;
1905 if (retval == 1) {
1906 retval = 0;
1907 return retval;
1908 }
1909 }
1910
1834 if (is_dx(dir)) { 1911 if (is_dx(dir)) {
1835 retval = ext4_dx_add_entry(handle, dentry, inode); 1912 retval = ext4_dx_add_entry(handle, dentry, inode);
1836 if (!retval || (retval != ERR_BAD_DX_DIR)) 1913 if (!retval || (retval != ERR_BAD_DX_DIR))
@@ -2036,36 +2113,29 @@ cleanup:
2036} 2113}
2037 2114
2038/* 2115/*
2039 * ext4_delete_entry deletes a directory entry by merging it with the 2116 * ext4_generic_delete_entry deletes a directory entry by merging it
2040 * previous entry 2117 * with the previous entry
2041 */ 2118 */
2042static int ext4_delete_entry(handle_t *handle, 2119int ext4_generic_delete_entry(handle_t *handle,
2043 struct inode *dir, 2120 struct inode *dir,
2044 struct ext4_dir_entry_2 *de_del, 2121 struct ext4_dir_entry_2 *de_del,
2045 struct buffer_head *bh) 2122 struct buffer_head *bh,
2123 void *entry_buf,
2124 int buf_size,
2125 int csum_size)
2046{ 2126{
2047 struct ext4_dir_entry_2 *de, *pde; 2127 struct ext4_dir_entry_2 *de, *pde;
2048 unsigned int blocksize = dir->i_sb->s_blocksize; 2128 unsigned int blocksize = dir->i_sb->s_blocksize;
2049 int csum_size = 0; 2129 int i;
2050 int i, err;
2051
2052 if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
2053 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
2054 csum_size = sizeof(struct ext4_dir_entry_tail);
2055 2130
2056 i = 0; 2131 i = 0;
2057 pde = NULL; 2132 pde = NULL;
2058 de = (struct ext4_dir_entry_2 *) bh->b_data; 2133 de = (struct ext4_dir_entry_2 *)entry_buf;
2059 while (i < bh->b_size - csum_size) { 2134 while (i < buf_size - csum_size) {
2060 if (ext4_check_dir_entry(dir, NULL, de, bh, i)) 2135 if (ext4_check_dir_entry(dir, NULL, de, bh,
2136 bh->b_data, bh->b_size, i))
2061 return -EIO; 2137 return -EIO;
2062 if (de == de_del) { 2138 if (de == de_del) {
2063 BUFFER_TRACE(bh, "get_write_access");
2064 err = ext4_journal_get_write_access(handle, bh);
2065 if (unlikely(err)) {
2066 ext4_std_error(dir->i_sb, err);
2067 return err;
2068 }
2069 if (pde) 2139 if (pde)
2070 pde->rec_len = ext4_rec_len_to_disk( 2140 pde->rec_len = ext4_rec_len_to_disk(
2071 ext4_rec_len_from_disk(pde->rec_len, 2141 ext4_rec_len_from_disk(pde->rec_len,
@@ -2076,12 +2146,6 @@ static int ext4_delete_entry(handle_t *handle,
2076 else 2146 else
2077 de->inode = 0; 2147 de->inode = 0;
2078 dir->i_version++; 2148 dir->i_version++;
2079 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
2080 err = ext4_handle_dirty_dirent_node(handle, dir, bh);
2081 if (unlikely(err)) {
2082 ext4_std_error(dir->i_sb, err);
2083 return err;
2084 }
2085 return 0; 2149 return 0;
2086 } 2150 }
2087 i += ext4_rec_len_from_disk(de->rec_len, blocksize); 2151 i += ext4_rec_len_from_disk(de->rec_len, blocksize);
@@ -2091,6 +2155,48 @@ static int ext4_delete_entry(handle_t *handle,
2091 return -ENOENT; 2155 return -ENOENT;
2092} 2156}
2093 2157
2158static int ext4_delete_entry(handle_t *handle,
2159 struct inode *dir,
2160 struct ext4_dir_entry_2 *de_del,
2161 struct buffer_head *bh)
2162{
2163 int err, csum_size = 0;
2164
2165 if (ext4_has_inline_data(dir)) {
2166 int has_inline_data = 1;
2167 err = ext4_delete_inline_entry(handle, dir, de_del, bh,
2168 &has_inline_data);
2169 if (has_inline_data)
2170 return err;
2171 }
2172
2173 if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
2174 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
2175 csum_size = sizeof(struct ext4_dir_entry_tail);
2176
2177 BUFFER_TRACE(bh, "get_write_access");
2178 err = ext4_journal_get_write_access(handle, bh);
2179 if (unlikely(err))
2180 goto out;
2181
2182 err = ext4_generic_delete_entry(handle, dir, de_del,
2183 bh, bh->b_data,
2184 dir->i_sb->s_blocksize, csum_size);
2185 if (err)
2186 goto out;
2187
2188 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
2189 err = ext4_handle_dirty_dirent_node(handle, dir, bh);
2190 if (unlikely(err))
2191 goto out;
2192
2193 return 0;
2194out:
2195 if (err != -ENOENT)
2196 ext4_std_error(dir->i_sb, err);
2197 return err;
2198}
2199
2094/* 2200/*
2095 * DIR_NLINK feature is set if 1) nlinks > EXT4_LINK_MAX or 2) nlinks == 2, 2201 * DIR_NLINK feature is set if 1) nlinks > EXT4_LINK_MAX or 2) nlinks == 2,
2096 * since this indicates that nlinks count was previously 1. 2202 * since this indicates that nlinks count was previously 1.
@@ -2211,21 +2317,95 @@ retry:
2211 return err; 2317 return err;
2212} 2318}
2213 2319
2214static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) 2320struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
2321 struct ext4_dir_entry_2 *de,
2322 int blocksize, int csum_size,
2323 unsigned int parent_ino, int dotdot_real_len)
2324{
2325 de->inode = cpu_to_le32(inode->i_ino);
2326 de->name_len = 1;
2327 de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len),
2328 blocksize);
2329 strcpy(de->name, ".");
2330 ext4_set_de_type(inode->i_sb, de, S_IFDIR);
2331
2332 de = ext4_next_entry(de, blocksize);
2333 de->inode = cpu_to_le32(parent_ino);
2334 de->name_len = 2;
2335 if (!dotdot_real_len)
2336 de->rec_len = ext4_rec_len_to_disk(blocksize -
2337 (csum_size + EXT4_DIR_REC_LEN(1)),
2338 blocksize);
2339 else
2340 de->rec_len = ext4_rec_len_to_disk(
2341 EXT4_DIR_REC_LEN(de->name_len), blocksize);
2342 strcpy(de->name, "..");
2343 ext4_set_de_type(inode->i_sb, de, S_IFDIR);
2344
2345 return ext4_next_entry(de, blocksize);
2346}
2347
2348static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
2349 struct inode *inode)
2215{ 2350{
2216 handle_t *handle;
2217 struct inode *inode;
2218 struct buffer_head *dir_block = NULL; 2351 struct buffer_head *dir_block = NULL;
2219 struct ext4_dir_entry_2 *de; 2352 struct ext4_dir_entry_2 *de;
2220 struct ext4_dir_entry_tail *t; 2353 struct ext4_dir_entry_tail *t;
2221 unsigned int blocksize = dir->i_sb->s_blocksize; 2354 unsigned int blocksize = dir->i_sb->s_blocksize;
2222 int csum_size = 0; 2355 int csum_size = 0;
2223 int err, retries = 0; 2356 int err;
2224 2357
2225 if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, 2358 if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
2226 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 2359 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
2227 csum_size = sizeof(struct ext4_dir_entry_tail); 2360 csum_size = sizeof(struct ext4_dir_entry_tail);
2228 2361
2362 if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
2363 err = ext4_try_create_inline_dir(handle, dir, inode);
2364 if (err < 0 && err != -ENOSPC)
2365 goto out;
2366 if (!err)
2367 goto out;
2368 }
2369
2370 inode->i_size = EXT4_I(inode)->i_disksize = blocksize;
2371 dir_block = ext4_bread(handle, inode, 0, 1, &err);
2372 if (!(dir_block = ext4_bread(handle, inode, 0, 1, &err))) {
2373 if (!err) {
2374 err = -EIO;
2375 ext4_error(inode->i_sb,
2376 "Directory hole detected on inode %lu\n",
2377 inode->i_ino);
2378 }
2379 goto out;
2380 }
2381 BUFFER_TRACE(dir_block, "get_write_access");
2382 err = ext4_journal_get_write_access(handle, dir_block);
2383 if (err)
2384 goto out;
2385 de = (struct ext4_dir_entry_2 *)dir_block->b_data;
2386 ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0);
2387 set_nlink(inode, 2);
2388 if (csum_size) {
2389 t = EXT4_DIRENT_TAIL(dir_block->b_data, blocksize);
2390 initialize_dirent_tail(t, blocksize);
2391 }
2392
2393 BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
2394 err = ext4_handle_dirty_dirent_node(handle, inode, dir_block);
2395 if (err)
2396 goto out;
2397 set_buffer_verified(dir_block);
2398out:
2399 brelse(dir_block);
2400 return err;
2401}
2402
2403static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
2404{
2405 handle_t *handle;
2406 struct inode *inode;
2407 int err, retries = 0;
2408
2229 if (EXT4_DIR_LINK_MAX(dir)) 2409 if (EXT4_DIR_LINK_MAX(dir))
2230 return -EMLINK; 2410 return -EMLINK;
2231 2411
@@ -2249,47 +2429,9 @@ retry:
2249 2429
2250 inode->i_op = &ext4_dir_inode_operations; 2430 inode->i_op = &ext4_dir_inode_operations;
2251 inode->i_fop = &ext4_dir_operations; 2431 inode->i_fop = &ext4_dir_operations;
2252 inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; 2432 err = ext4_init_new_dir(handle, dir, inode);
2253 if (!(dir_block = ext4_bread(handle, inode, 0, 1, &err))) {
2254 if (!err) {
2255 err = -EIO;
2256 ext4_error(inode->i_sb,
2257 "Directory hole detected on inode %lu\n",
2258 inode->i_ino);
2259 }
2260 goto out_clear_inode;
2261 }
2262 BUFFER_TRACE(dir_block, "get_write_access");
2263 err = ext4_journal_get_write_access(handle, dir_block);
2264 if (err)
2265 goto out_clear_inode;
2266 de = (struct ext4_dir_entry_2 *) dir_block->b_data;
2267 de->inode = cpu_to_le32(inode->i_ino);
2268 de->name_len = 1;
2269 de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len),
2270 blocksize);
2271 strcpy(de->name, ".");
2272 ext4_set_de_type(dir->i_sb, de, S_IFDIR);
2273 de = ext4_next_entry(de, blocksize);
2274 de->inode = cpu_to_le32(dir->i_ino);
2275 de->rec_len = ext4_rec_len_to_disk(blocksize -
2276 (csum_size + EXT4_DIR_REC_LEN(1)),
2277 blocksize);
2278 de->name_len = 2;
2279 strcpy(de->name, "..");
2280 ext4_set_de_type(dir->i_sb, de, S_IFDIR);
2281 set_nlink(inode, 2);
2282
2283 if (csum_size) {
2284 t = EXT4_DIRENT_TAIL(dir_block->b_data, blocksize);
2285 initialize_dirent_tail(t, blocksize);
2286 }
2287
2288 BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
2289 err = ext4_handle_dirty_dirent_node(handle, inode, dir_block);
2290 if (err) 2433 if (err)
2291 goto out_clear_inode; 2434 goto out_clear_inode;
2292 set_buffer_verified(dir_block);
2293 err = ext4_mark_inode_dirty(handle, inode); 2435 err = ext4_mark_inode_dirty(handle, inode);
2294 if (!err) 2436 if (!err)
2295 err = ext4_add_entry(handle, dentry, inode); 2437 err = ext4_add_entry(handle, dentry, inode);
@@ -2309,7 +2451,6 @@ out_clear_inode:
2309 unlock_new_inode(inode); 2451 unlock_new_inode(inode);
2310 d_instantiate(dentry, inode); 2452 d_instantiate(dentry, inode);
2311out_stop: 2453out_stop:
2312 brelse(dir_block);
2313 ext4_journal_stop(handle); 2454 ext4_journal_stop(handle);
2314 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) 2455 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2315 goto retry; 2456 goto retry;
@@ -2327,6 +2468,14 @@ static int empty_dir(struct inode *inode)
2327 struct super_block *sb; 2468 struct super_block *sb;
2328 int err = 0; 2469 int err = 0;
2329 2470
2471 if (ext4_has_inline_data(inode)) {
2472 int has_inline_data = 1;
2473
2474 err = empty_inline_dir(inode, &has_inline_data);
2475 if (has_inline_data)
2476 return err;
2477 }
2478
2330 sb = inode->i_sb; 2479 sb = inode->i_sb;
2331 if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || 2480 if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
2332 !(bh = ext4_bread(NULL, inode, 0, 0, &err))) { 2481 !(bh = ext4_bread(NULL, inode, 0, 0, &err))) {
@@ -2393,7 +2542,8 @@ static int empty_dir(struct inode *inode)
2393 set_buffer_verified(bh); 2542 set_buffer_verified(bh);
2394 de = (struct ext4_dir_entry_2 *) bh->b_data; 2543 de = (struct ext4_dir_entry_2 *) bh->b_data;
2395 } 2544 }
2396 if (ext4_check_dir_entry(inode, NULL, de, bh, offset)) { 2545 if (ext4_check_dir_entry(inode, NULL, de, bh,
2546 bh->b_data, bh->b_size, offset)) {
2397 de = (struct ext4_dir_entry_2 *)(bh->b_data + 2547 de = (struct ext4_dir_entry_2 *)(bh->b_data +
2398 sb->s_blocksize); 2548 sb->s_blocksize);
2399 offset = (offset | (sb->s_blocksize - 1)) + 1; 2549 offset = (offset | (sb->s_blocksize - 1)) + 1;
@@ -2579,7 +2729,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
2579 return PTR_ERR(handle); 2729 return PTR_ERR(handle);
2580 2730
2581 retval = -ENOENT; 2731 retval = -ENOENT;
2582 bh = ext4_find_entry(dir, &dentry->d_name, &de); 2732 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
2583 if (!bh) 2733 if (!bh)
2584 goto end_rmdir; 2734 goto end_rmdir;
2585 2735
@@ -2644,7 +2794,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2644 ext4_handle_sync(handle); 2794 ext4_handle_sync(handle);
2645 2795
2646 retval = -ENOENT; 2796 retval = -ENOENT;
2647 bh = ext4_find_entry(dir, &dentry->d_name, &de); 2797 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
2648 if (!bh) 2798 if (!bh)
2649 goto end_unlink; 2799 goto end_unlink;
2650 2800
@@ -2826,8 +2976,39 @@ retry:
2826 return err; 2976 return err;
2827} 2977}
2828 2978
2829#define PARENT_INO(buffer, size) \ 2979
2830 (ext4_next_entry((struct ext4_dir_entry_2 *)(buffer), size)->inode) 2980/*
2981 * Try to find buffer head where contains the parent block.
2982 * It should be the inode block if it is inlined or the 1st block
2983 * if it is a normal dir.
2984 */
2985static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
2986 struct inode *inode,
2987 int *retval,
2988 struct ext4_dir_entry_2 **parent_de,
2989 int *inlined)
2990{
2991 struct buffer_head *bh;
2992
2993 if (!ext4_has_inline_data(inode)) {
2994 if (!(bh = ext4_bread(handle, inode, 0, 0, retval))) {
2995 if (!*retval) {
2996 *retval = -EIO;
2997 ext4_error(inode->i_sb,
2998 "Directory hole detected on inode %lu\n",
2999 inode->i_ino);
3000 }
3001 return NULL;
3002 }
3003 *parent_de = ext4_next_entry(
3004 (struct ext4_dir_entry_2 *)bh->b_data,
3005 inode->i_sb->s_blocksize);
3006 return bh;
3007 }
3008
3009 *inlined = 1;
3010 return ext4_get_first_inline_block(inode, parent_de, retval);
3011}
2831 3012
2832/* 3013/*
2833 * Anybody can rename anything with this: the permission checks are left to the 3014 * Anybody can rename anything with this: the permission checks are left to the
@@ -2841,6 +3022,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2841 struct buffer_head *old_bh, *new_bh, *dir_bh; 3022 struct buffer_head *old_bh, *new_bh, *dir_bh;
2842 struct ext4_dir_entry_2 *old_de, *new_de; 3023 struct ext4_dir_entry_2 *old_de, *new_de;
2843 int retval, force_da_alloc = 0; 3024 int retval, force_da_alloc = 0;
3025 int inlined = 0, new_inlined = 0;
3026 struct ext4_dir_entry_2 *parent_de;
2844 3027
2845 dquot_initialize(old_dir); 3028 dquot_initialize(old_dir);
2846 dquot_initialize(new_dir); 3029 dquot_initialize(new_dir);
@@ -2860,7 +3043,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2860 if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) 3043 if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
2861 ext4_handle_sync(handle); 3044 ext4_handle_sync(handle);
2862 3045
2863 old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de); 3046 old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de, NULL);
2864 /* 3047 /*
2865 * Check for inode number is _not_ due to possible IO errors. 3048 * Check for inode number is _not_ due to possible IO errors.
2866 * We might rmdir the source, keep it as pwd of some process 3049 * We might rmdir the source, keep it as pwd of some process
@@ -2873,7 +3056,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2873 goto end_rename; 3056 goto end_rename;
2874 3057
2875 new_inode = new_dentry->d_inode; 3058 new_inode = new_dentry->d_inode;
2876 new_bh = ext4_find_entry(new_dir, &new_dentry->d_name, &new_de); 3059 new_bh = ext4_find_entry(new_dir, &new_dentry->d_name,
3060 &new_de, &new_inlined);
2877 if (new_bh) { 3061 if (new_bh) {
2878 if (!new_inode) { 3062 if (!new_inode) {
2879 brelse(new_bh); 3063 brelse(new_bh);
@@ -2887,22 +3071,17 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2887 goto end_rename; 3071 goto end_rename;
2888 } 3072 }
2889 retval = -EIO; 3073 retval = -EIO;
2890 if (!(dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval))) { 3074 dir_bh = ext4_get_first_dir_block(handle, old_inode,
2891 if (!retval) { 3075 &retval, &parent_de,
2892 retval = -EIO; 3076 &inlined);
2893 ext4_error(old_inode->i_sb, 3077 if (!dir_bh)
2894 "Directory hole detected on inode %lu\n",
2895 old_inode->i_ino);
2896 }
2897 goto end_rename; 3078 goto end_rename;
2898 } 3079 if (!inlined && !buffer_verified(dir_bh) &&
2899 if (!buffer_verified(dir_bh) &&
2900 !ext4_dirent_csum_verify(old_inode, 3080 !ext4_dirent_csum_verify(old_inode,
2901 (struct ext4_dir_entry *)dir_bh->b_data)) 3081 (struct ext4_dir_entry *)dir_bh->b_data))
2902 goto end_rename; 3082 goto end_rename;
2903 set_buffer_verified(dir_bh); 3083 set_buffer_verified(dir_bh);
2904 if (le32_to_cpu(PARENT_INO(dir_bh->b_data, 3084 if (le32_to_cpu(parent_de->inode) != old_dir->i_ino)
2905 old_dir->i_sb->s_blocksize)) != old_dir->i_ino)
2906 goto end_rename; 3085 goto end_rename;
2907 retval = -EMLINK; 3086 retval = -EMLINK;
2908 if (!new_inode && new_dir != old_dir && 3087 if (!new_inode && new_dir != old_dir &&
@@ -2931,10 +3110,13 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2931 ext4_current_time(new_dir); 3110 ext4_current_time(new_dir);
2932 ext4_mark_inode_dirty(handle, new_dir); 3111 ext4_mark_inode_dirty(handle, new_dir);
2933 BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata"); 3112 BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata");
2934 retval = ext4_handle_dirty_dirent_node(handle, new_dir, new_bh); 3113 if (!new_inlined) {
2935 if (unlikely(retval)) { 3114 retval = ext4_handle_dirty_dirent_node(handle,
2936 ext4_std_error(new_dir->i_sb, retval); 3115 new_dir, new_bh);
2937 goto end_rename; 3116 if (unlikely(retval)) {
3117 ext4_std_error(new_dir->i_sb, retval);
3118 goto end_rename;
3119 }
2938 } 3120 }
2939 brelse(new_bh); 3121 brelse(new_bh);
2940 new_bh = NULL; 3122 new_bh = NULL;
@@ -2962,7 +3144,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2962 struct buffer_head *old_bh2; 3144 struct buffer_head *old_bh2;
2963 struct ext4_dir_entry_2 *old_de2; 3145 struct ext4_dir_entry_2 *old_de2;
2964 3146
2965 old_bh2 = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de2); 3147 old_bh2 = ext4_find_entry(old_dir, &old_dentry->d_name,
3148 &old_de2, NULL);
2966 if (old_bh2) { 3149 if (old_bh2) {
2967 retval = ext4_delete_entry(handle, old_dir, 3150 retval = ext4_delete_entry(handle, old_dir,
2968 old_de2, old_bh2); 3151 old_de2, old_bh2);
@@ -2982,17 +3165,19 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2982 old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir); 3165 old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir);
2983 ext4_update_dx_flag(old_dir); 3166 ext4_update_dx_flag(old_dir);
2984 if (dir_bh) { 3167 if (dir_bh) {
2985 PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = 3168 parent_de->inode = cpu_to_le32(new_dir->i_ino);
2986 cpu_to_le32(new_dir->i_ino);
2987 BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); 3169 BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
2988 if (is_dx(old_inode)) { 3170 if (!inlined) {
2989 retval = ext4_handle_dirty_dx_node(handle, 3171 if (is_dx(old_inode)) {
2990 old_inode, 3172 retval = ext4_handle_dirty_dx_node(handle,
2991 dir_bh); 3173 old_inode,
3174 dir_bh);
3175 } else {
3176 retval = ext4_handle_dirty_dirent_node(handle,
3177 old_inode, dir_bh);
3178 }
2992 } else { 3179 } else {
2993 retval = ext4_handle_dirty_dirent_node(handle, 3180 retval = ext4_mark_inode_dirty(handle, old_inode);
2994 old_inode,
2995 dir_bh);
2996 } 3181 }
2997 if (retval) { 3182 if (retval) {
2998 ext4_std_error(old_dir->i_sb, retval); 3183 ext4_std_error(old_dir->i_sb, retval);
@@ -3043,23 +3228,19 @@ const struct inode_operations ext4_dir_inode_operations = {
3043 .mknod = ext4_mknod, 3228 .mknod = ext4_mknod,
3044 .rename = ext4_rename, 3229 .rename = ext4_rename,
3045 .setattr = ext4_setattr, 3230 .setattr = ext4_setattr,
3046#ifdef CONFIG_EXT4_FS_XATTR
3047 .setxattr = generic_setxattr, 3231 .setxattr = generic_setxattr,
3048 .getxattr = generic_getxattr, 3232 .getxattr = generic_getxattr,
3049 .listxattr = ext4_listxattr, 3233 .listxattr = ext4_listxattr,
3050 .removexattr = generic_removexattr, 3234 .removexattr = generic_removexattr,
3051#endif
3052 .get_acl = ext4_get_acl, 3235 .get_acl = ext4_get_acl,
3053 .fiemap = ext4_fiemap, 3236 .fiemap = ext4_fiemap,
3054}; 3237};
3055 3238
3056const struct inode_operations ext4_special_inode_operations = { 3239const struct inode_operations ext4_special_inode_operations = {
3057 .setattr = ext4_setattr, 3240 .setattr = ext4_setattr,
3058#ifdef CONFIG_EXT4_FS_XATTR
3059 .setxattr = generic_setxattr, 3241 .setxattr = generic_setxattr,
3060 .getxattr = generic_getxattr, 3242 .getxattr = generic_getxattr,
3061 .listxattr = ext4_listxattr, 3243 .listxattr = ext4_listxattr,
3062 .removexattr = generic_removexattr, 3244 .removexattr = generic_removexattr,
3063#endif
3064 .get_acl = ext4_get_acl, 3245 .get_acl = ext4_get_acl,
3065}; 3246};
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 68e896e12a67..0016fbca2a40 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -27,7 +27,6 @@
27#include "ext4_jbd2.h" 27#include "ext4_jbd2.h"
28#include "xattr.h" 28#include "xattr.h"
29#include "acl.h" 29#include "acl.h"
30#include "ext4_extents.h"
31 30
32static struct kmem_cache *io_page_cachep, *io_end_cachep; 31static struct kmem_cache *io_page_cachep, *io_end_cachep;
33 32
@@ -111,7 +110,7 @@ static int ext4_end_io(ext4_io_end_t *io)
111 inode_dio_done(inode); 110 inode_dio_done(inode);
112 /* Wake up anyone waiting on unwritten extent conversion */ 111 /* Wake up anyone waiting on unwritten extent conversion */
113 if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) 112 if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
114 wake_up_all(ext4_ioend_wq(io->inode)); 113 wake_up_all(ext4_ioend_wq(inode));
115 return ret; 114 return ret;
116} 115}
117 116
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 47bf06a2765d..d99387b89edd 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -783,7 +783,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
783 783
784 err = ext4_journal_get_write_access(handle, gdb_bh); 784 err = ext4_journal_get_write_access(handle, gdb_bh);
785 if (unlikely(err)) 785 if (unlikely(err))
786 goto exit_sbh; 786 goto exit_dind;
787 787
788 err = ext4_journal_get_write_access(handle, dind); 788 err = ext4_journal_get_write_access(handle, dind);
789 if (unlikely(err)) 789 if (unlikely(err))
@@ -792,7 +792,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
792 /* ext4_reserve_inode_write() gets a reference on the iloc */ 792 /* ext4_reserve_inode_write() gets a reference on the iloc */
793 err = ext4_reserve_inode_write(handle, inode, &iloc); 793 err = ext4_reserve_inode_write(handle, inode, &iloc);
794 if (unlikely(err)) 794 if (unlikely(err))
795 goto exit_dindj; 795 goto exit_dind;
796 796
797 n_group_desc = ext4_kvmalloc((gdb_num + 1) * 797 n_group_desc = ext4_kvmalloc((gdb_num + 1) *
798 sizeof(struct buffer_head *), 798 sizeof(struct buffer_head *),
@@ -846,12 +846,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
846 846
847exit_inode: 847exit_inode:
848 ext4_kvfree(n_group_desc); 848 ext4_kvfree(n_group_desc);
849 /* ext4_handle_release_buffer(handle, iloc.bh); */
850 brelse(iloc.bh); 849 brelse(iloc.bh);
851exit_dindj:
852 /* ext4_handle_release_buffer(handle, dind); */
853exit_sbh:
854 /* ext4_handle_release_buffer(handle, EXT4_SB(sb)->s_sbh); */
855exit_dind: 850exit_dind:
856 brelse(dind); 851 brelse(dind);
857exit_bh: 852exit_bh:
@@ -969,14 +964,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
969 } 964 }
970 965
971 for (i = 0; i < reserved_gdb; i++) { 966 for (i = 0; i < reserved_gdb; i++) {
972 if ((err = ext4_journal_get_write_access(handle, primary[i]))) { 967 if ((err = ext4_journal_get_write_access(handle, primary[i])))
973 /*
974 int j;
975 for (j = 0; j < i; j++)
976 ext4_handle_release_buffer(handle, primary[j]);
977 */
978 goto exit_bh; 968 goto exit_bh;
979 }
980 } 969 }
981 970
982 if ((err = ext4_reserve_inode_write(handle, inode, &iloc))) 971 if ((err = ext4_reserve_inode_write(handle, inode, &iloc)))
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 80928f716850..3cdb0a2fc648 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -45,7 +45,7 @@
45#include <linux/freezer.h> 45#include <linux/freezer.h>
46 46
47#include "ext4.h" 47#include "ext4.h"
48#include "ext4_extents.h" 48#include "ext4_extents.h" /* Needed for trace points definition */
49#include "ext4_jbd2.h" 49#include "ext4_jbd2.h"
50#include "xattr.h" 50#include "xattr.h"
51#include "acl.h" 51#include "acl.h"
@@ -939,10 +939,11 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
939 return NULL; 939 return NULL;
940 940
941 ei->vfs_inode.i_version = 1; 941 ei->vfs_inode.i_version = 1;
942 ei->vfs_inode.i_data.writeback_index = 0;
943 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); 942 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
944 INIT_LIST_HEAD(&ei->i_prealloc_list); 943 INIT_LIST_HEAD(&ei->i_prealloc_list);
945 spin_lock_init(&ei->i_prealloc_lock); 944 spin_lock_init(&ei->i_prealloc_lock);
945 ext4_es_init_tree(&ei->i_es_tree);
946 rwlock_init(&ei->i_es_lock);
946 ei->i_reserved_data_blocks = 0; 947 ei->i_reserved_data_blocks = 0;
947 ei->i_reserved_meta_blocks = 0; 948 ei->i_reserved_meta_blocks = 0;
948 ei->i_allocated_meta_blocks = 0; 949 ei->i_allocated_meta_blocks = 0;
@@ -996,9 +997,7 @@ static void init_once(void *foo)
996 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; 997 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
997 998
998 INIT_LIST_HEAD(&ei->i_orphan); 999 INIT_LIST_HEAD(&ei->i_orphan);
999#ifdef CONFIG_EXT4_FS_XATTR
1000 init_rwsem(&ei->xattr_sem); 1000 init_rwsem(&ei->xattr_sem);
1001#endif
1002 init_rwsem(&ei->i_data_sem); 1001 init_rwsem(&ei->i_data_sem);
1003 inode_init_once(&ei->vfs_inode); 1002 inode_init_once(&ei->vfs_inode);
1004} 1003}
@@ -1031,6 +1030,7 @@ void ext4_clear_inode(struct inode *inode)
1031 clear_inode(inode); 1030 clear_inode(inode);
1032 dquot_drop(inode); 1031 dquot_drop(inode);
1033 ext4_discard_preallocations(inode); 1032 ext4_discard_preallocations(inode);
1033 ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
1034 if (EXT4_I(inode)->jinode) { 1034 if (EXT4_I(inode)->jinode) {
1035 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), 1035 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
1036 EXT4_I(inode)->jinode); 1036 EXT4_I(inode)->jinode);
@@ -1447,13 +1447,8 @@ static const struct mount_opts {
1447 {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_DATAJ}, 1447 {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_DATAJ},
1448 {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_DATAJ}, 1448 {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_DATAJ},
1449 {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, MOPT_DATAJ}, 1449 {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, MOPT_DATAJ},
1450#ifdef CONFIG_EXT4_FS_XATTR
1451 {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET}, 1450 {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
1452 {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR}, 1451 {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
1453#else
1454 {Opt_user_xattr, 0, MOPT_NOSUPPORT},
1455 {Opt_nouser_xattr, 0, MOPT_NOSUPPORT},
1456#endif
1457#ifdef CONFIG_EXT4_FS_POSIX_ACL 1452#ifdef CONFIG_EXT4_FS_POSIX_ACL
1458 {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET}, 1453 {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
1459 {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR}, 1454 {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR},
@@ -3202,7 +3197,6 @@ int ext4_calculate_overhead(struct super_block *sb)
3202 ext4_fsblk_t overhead = 0; 3197 ext4_fsblk_t overhead = 0;
3203 char *buf = (char *) get_zeroed_page(GFP_KERNEL); 3198 char *buf = (char *) get_zeroed_page(GFP_KERNEL);
3204 3199
3205 memset(buf, 0, PAGE_SIZE);
3206 if (!buf) 3200 if (!buf)
3207 return -ENOMEM; 3201 return -ENOMEM;
3208 3202
@@ -3256,7 +3250,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3256 unsigned int i; 3250 unsigned int i;
3257 int needs_recovery, has_huge_files, has_bigalloc; 3251 int needs_recovery, has_huge_files, has_bigalloc;
3258 __u64 blocks_count; 3252 __u64 blocks_count;
3259 int err; 3253 int err = 0;
3260 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 3254 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
3261 ext4_group_t first_not_zeroed; 3255 ext4_group_t first_not_zeroed;
3262 3256
@@ -3272,9 +3266,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3272 } 3266 }
3273 sb->s_fs_info = sbi; 3267 sb->s_fs_info = sbi;
3274 sbi->s_sb = sb; 3268 sbi->s_sb = sb;
3275 sbi->s_mount_opt = 0;
3276 sbi->s_resuid = make_kuid(&init_user_ns, EXT4_DEF_RESUID);
3277 sbi->s_resgid = make_kgid(&init_user_ns, EXT4_DEF_RESGID);
3278 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; 3269 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
3279 sbi->s_sb_block = sb_block; 3270 sbi->s_sb_block = sb_block;
3280 if (sb->s_bdev->bd_part) 3271 if (sb->s_bdev->bd_part)
@@ -3285,6 +3276,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3285 for (cp = sb->s_id; (cp = strchr(cp, '/'));) 3276 for (cp = sb->s_id; (cp = strchr(cp, '/'));)
3286 *cp = '!'; 3277 *cp = '!';
3287 3278
3279 /* -EINVAL is default */
3288 ret = -EINVAL; 3280 ret = -EINVAL;
3289 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); 3281 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
3290 if (!blocksize) { 3282 if (!blocksize) {
@@ -3369,9 +3361,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3369 if (def_mount_opts & EXT4_DEFM_UID16) 3361 if (def_mount_opts & EXT4_DEFM_UID16)
3370 set_opt(sb, NO_UID32); 3362 set_opt(sb, NO_UID32);
3371 /* xattr user namespace & acls are now defaulted on */ 3363 /* xattr user namespace & acls are now defaulted on */
3372#ifdef CONFIG_EXT4_FS_XATTR
3373 set_opt(sb, XATTR_USER); 3364 set_opt(sb, XATTR_USER);
3374#endif
3375#ifdef CONFIG_EXT4_FS_POSIX_ACL 3365#ifdef CONFIG_EXT4_FS_POSIX_ACL
3376 set_opt(sb, POSIX_ACL); 3366 set_opt(sb, POSIX_ACL);
3377#endif 3367#endif
@@ -3662,7 +3652,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3662 " too large to mount safely on this system"); 3652 " too large to mount safely on this system");
3663 if (sizeof(sector_t) < 8) 3653 if (sizeof(sector_t) < 8)
3664 ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); 3654 ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
3665 ret = err;
3666 goto failed_mount; 3655 goto failed_mount;
3667 } 3656 }
3668 3657
@@ -3770,7 +3759,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3770 } 3759 }
3771 if (err) { 3760 if (err) {
3772 ext4_msg(sb, KERN_ERR, "insufficient memory"); 3761 ext4_msg(sb, KERN_ERR, "insufficient memory");
3773 ret = err;
3774 goto failed_mount3; 3762 goto failed_mount3;
3775 } 3763 }
3776 3764
@@ -3801,7 +3789,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3801 3789
3802 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 3790 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
3803 mutex_init(&sbi->s_orphan_lock); 3791 mutex_init(&sbi->s_orphan_lock);
3804 sbi->s_resize_flags = 0;
3805 3792
3806 sb->s_root = NULL; 3793 sb->s_root = NULL;
3807 3794
@@ -3897,8 +3884,8 @@ no_journal:
3897 if (es->s_overhead_clusters) 3884 if (es->s_overhead_clusters)
3898 sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters); 3885 sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
3899 else { 3886 else {
3900 ret = ext4_calculate_overhead(sb); 3887 err = ext4_calculate_overhead(sb);
3901 if (ret) 3888 if (err)
3902 goto failed_mount_wq; 3889 goto failed_mount_wq;
3903 } 3890 }
3904 3891
@@ -3910,6 +3897,7 @@ no_journal:
3910 alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); 3897 alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
3911 if (!EXT4_SB(sb)->dio_unwritten_wq) { 3898 if (!EXT4_SB(sb)->dio_unwritten_wq) {
3912 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); 3899 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
3900 ret = -ENOMEM;
3913 goto failed_mount_wq; 3901 goto failed_mount_wq;
3914 } 3902 }
3915 3903
@@ -4012,12 +4000,20 @@ no_journal:
4012 /* Enable quota usage during mount. */ 4000 /* Enable quota usage during mount. */
4013 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && 4001 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
4014 !(sb->s_flags & MS_RDONLY)) { 4002 !(sb->s_flags & MS_RDONLY)) {
4015 ret = ext4_enable_quotas(sb); 4003 err = ext4_enable_quotas(sb);
4016 if (ret) 4004 if (err)
4017 goto failed_mount7; 4005 goto failed_mount7;
4018 } 4006 }
4019#endif /* CONFIG_QUOTA */ 4007#endif /* CONFIG_QUOTA */
4020 4008
4009 if (test_opt(sb, DISCARD)) {
4010 struct request_queue *q = bdev_get_queue(sb->s_bdev);
4011 if (!blk_queue_discard(q))
4012 ext4_msg(sb, KERN_WARNING,
4013 "mounting with \"discard\" option, but "
4014 "the device does not support discard");
4015 }
4016
4021 ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " 4017 ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
4022 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, 4018 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
4023 *sbi->s_es->s_mount_opts ? "; " : "", orig_data); 4019 *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
@@ -4084,7 +4080,7 @@ out_fail:
4084 kfree(sbi); 4080 kfree(sbi);
4085out_free_orig: 4081out_free_orig:
4086 kfree(orig_data); 4082 kfree(orig_data);
4087 return ret; 4083 return err ? err : ret;
4088} 4084}
4089 4085
4090/* 4086/*
@@ -4790,7 +4786,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
4790 4786
4791 buf->f_type = EXT4_SUPER_MAGIC; 4787 buf->f_type = EXT4_SUPER_MAGIC;
4792 buf->f_bsize = sb->s_blocksize; 4788 buf->f_bsize = sb->s_blocksize;
4793 buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, sbi->s_overhead); 4789 buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead);
4794 bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) - 4790 bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
4795 percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter); 4791 percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
4796 /* prevent underflow in case that few free space is available */ 4792 /* prevent underflow in case that few free space is available */
@@ -5282,6 +5278,7 @@ static int __init ext4_init_fs(void)
5282 ext4_li_info = NULL; 5278 ext4_li_info = NULL;
5283 mutex_init(&ext4_li_mtx); 5279 mutex_init(&ext4_li_mtx);
5284 5280
5281 /* Build-time check for flags consistency */
5285 ext4_check_flag_values(); 5282 ext4_check_flag_values();
5286 5283
5287 for (i = 0; i < EXT4_WQ_HASH_SZ; i++) { 5284 for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
@@ -5289,9 +5286,14 @@ static int __init ext4_init_fs(void)
5289 init_waitqueue_head(&ext4__ioend_wq[i]); 5286 init_waitqueue_head(&ext4__ioend_wq[i]);
5290 } 5287 }
5291 5288
5292 err = ext4_init_pageio(); 5289 err = ext4_init_es();
5293 if (err) 5290 if (err)
5294 return err; 5291 return err;
5292
5293 err = ext4_init_pageio();
5294 if (err)
5295 goto out7;
5296
5295 err = ext4_init_system_zone(); 5297 err = ext4_init_system_zone();
5296 if (err) 5298 if (err)
5297 goto out6; 5299 goto out6;
@@ -5341,6 +5343,9 @@ out5:
5341 ext4_exit_system_zone(); 5343 ext4_exit_system_zone();
5342out6: 5344out6:
5343 ext4_exit_pageio(); 5345 ext4_exit_pageio();
5346out7:
5347 ext4_exit_es();
5348
5344 return err; 5349 return err;
5345} 5350}
5346 5351
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index ed9354aff279..ff3711932018 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -35,22 +35,18 @@ const struct inode_operations ext4_symlink_inode_operations = {
35 .follow_link = page_follow_link_light, 35 .follow_link = page_follow_link_light,
36 .put_link = page_put_link, 36 .put_link = page_put_link,
37 .setattr = ext4_setattr, 37 .setattr = ext4_setattr,
38#ifdef CONFIG_EXT4_FS_XATTR
39 .setxattr = generic_setxattr, 38 .setxattr = generic_setxattr,
40 .getxattr = generic_getxattr, 39 .getxattr = generic_getxattr,
41 .listxattr = ext4_listxattr, 40 .listxattr = ext4_listxattr,
42 .removexattr = generic_removexattr, 41 .removexattr = generic_removexattr,
43#endif
44}; 42};
45 43
46const struct inode_operations ext4_fast_symlink_inode_operations = { 44const struct inode_operations ext4_fast_symlink_inode_operations = {
47 .readlink = generic_readlink, 45 .readlink = generic_readlink,
48 .follow_link = ext4_follow_link, 46 .follow_link = ext4_follow_link,
49 .setattr = ext4_setattr, 47 .setattr = ext4_setattr,
50#ifdef CONFIG_EXT4_FS_XATTR
51 .setxattr = generic_setxattr, 48 .setxattr = generic_setxattr,
52 .getxattr = generic_getxattr, 49 .getxattr = generic_getxattr,
53 .listxattr = ext4_listxattr, 50 .listxattr = ext4_listxattr,
54 .removexattr = generic_removexattr, 51 .removexattr = generic_removexattr,
55#endif
56}; 52};
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 2cdb98d62980..3a91ebc2b66f 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -61,11 +61,6 @@
61#include "xattr.h" 61#include "xattr.h"
62#include "acl.h" 62#include "acl.h"
63 63
64#define BHDR(bh) ((struct ext4_xattr_header *)((bh)->b_data))
65#define ENTRY(ptr) ((struct ext4_xattr_entry *)(ptr))
66#define BFIRST(bh) ENTRY(BHDR(bh)+1)
67#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
68
69#ifdef EXT4_XATTR_DEBUG 64#ifdef EXT4_XATTR_DEBUG
70# define ea_idebug(inode, f...) do { \ 65# define ea_idebug(inode, f...) do { \
71 printk(KERN_DEBUG "inode %s:%lu: ", \ 66 printk(KERN_DEBUG "inode %s:%lu: ", \
@@ -312,7 +307,7 @@ cleanup:
312 return error; 307 return error;
313} 308}
314 309
315static int 310int
316ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name, 311ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
317 void *buffer, size_t buffer_size) 312 void *buffer, size_t buffer_size)
318{ 313{
@@ -581,21 +576,6 @@ static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last,
581 return (*min_offs - ((void *)last - base) - sizeof(__u32)); 576 return (*min_offs - ((void *)last - base) - sizeof(__u32));
582} 577}
583 578
584struct ext4_xattr_info {
585 int name_index;
586 const char *name;
587 const void *value;
588 size_t value_len;
589};
590
591struct ext4_xattr_search {
592 struct ext4_xattr_entry *first;
593 void *base;
594 void *end;
595 struct ext4_xattr_entry *here;
596 int not_found;
597};
598
599static int 579static int
600ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s) 580ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
601{ 581{
@@ -648,9 +628,14 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
648 size. Just replace. */ 628 size. Just replace. */
649 s->here->e_value_size = 629 s->here->e_value_size =
650 cpu_to_le32(i->value_len); 630 cpu_to_le32(i->value_len);
651 memset(val + size - EXT4_XATTR_PAD, 0, 631 if (i->value == EXT4_ZERO_XATTR_VALUE) {
652 EXT4_XATTR_PAD); /* Clear pad bytes. */ 632 memset(val, 0, size);
653 memcpy(val, i->value, i->value_len); 633 } else {
634 /* Clear pad bytes first. */
635 memset(val + size - EXT4_XATTR_PAD, 0,
636 EXT4_XATTR_PAD);
637 memcpy(val, i->value, i->value_len);
638 }
654 return 0; 639 return 0;
655 } 640 }
656 641
@@ -689,9 +674,14 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
689 size_t size = EXT4_XATTR_SIZE(i->value_len); 674 size_t size = EXT4_XATTR_SIZE(i->value_len);
690 void *val = s->base + min_offs - size; 675 void *val = s->base + min_offs - size;
691 s->here->e_value_offs = cpu_to_le16(min_offs - size); 676 s->here->e_value_offs = cpu_to_le16(min_offs - size);
692 memset(val + size - EXT4_XATTR_PAD, 0, 677 if (i->value == EXT4_ZERO_XATTR_VALUE) {
693 EXT4_XATTR_PAD); /* Clear the pad bytes. */ 678 memset(val, 0, size);
694 memcpy(val, i->value, i->value_len); 679 } else {
680 /* Clear the pad bytes first. */
681 memset(val + size - EXT4_XATTR_PAD, 0,
682 EXT4_XATTR_PAD);
683 memcpy(val, i->value, i->value_len);
684 }
695 } 685 }
696 } 686 }
697 return 0; 687 return 0;
@@ -794,7 +784,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
794 int offset = (char *)s->here - bs->bh->b_data; 784 int offset = (char *)s->here - bs->bh->b_data;
795 785
796 unlock_buffer(bs->bh); 786 unlock_buffer(bs->bh);
797 ext4_handle_release_buffer(handle, bs->bh);
798 if (ce) { 787 if (ce) {
799 mb_cache_entry_release(ce); 788 mb_cache_entry_release(ce);
800 ce = NULL; 789 ce = NULL;
@@ -950,14 +939,8 @@ bad_block:
950#undef header 939#undef header
951} 940}
952 941
953struct ext4_xattr_ibody_find { 942int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
954 struct ext4_xattr_search s; 943 struct ext4_xattr_ibody_find *is)
955 struct ext4_iloc iloc;
956};
957
958static int
959ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
960 struct ext4_xattr_ibody_find *is)
961{ 944{
962 struct ext4_xattr_ibody_header *header; 945 struct ext4_xattr_ibody_header *header;
963 struct ext4_inode *raw_inode; 946 struct ext4_inode *raw_inode;
@@ -985,10 +968,47 @@ ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
985 return 0; 968 return 0;
986} 969}
987 970
988static int 971int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
989ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, 972 struct ext4_xattr_info *i,
990 struct ext4_xattr_info *i, 973 struct ext4_xattr_ibody_find *is)
991 struct ext4_xattr_ibody_find *is) 974{
975 struct ext4_xattr_ibody_header *header;
976 struct ext4_xattr_search *s = &is->s;
977 int error;
978
979 if (EXT4_I(inode)->i_extra_isize == 0)
980 return -ENOSPC;
981 error = ext4_xattr_set_entry(i, s);
982 if (error) {
983 if (error == -ENOSPC &&
984 ext4_has_inline_data(inode)) {
985 error = ext4_try_to_evict_inline_data(handle, inode,
986 EXT4_XATTR_LEN(strlen(i->name) +
987 EXT4_XATTR_SIZE(i->value_len)));
988 if (error)
989 return error;
990 error = ext4_xattr_ibody_find(inode, i, is);
991 if (error)
992 return error;
993 error = ext4_xattr_set_entry(i, s);
994 }
995 if (error)
996 return error;
997 }
998 header = IHDR(inode, ext4_raw_inode(&is->iloc));
999 if (!IS_LAST_ENTRY(s->first)) {
1000 header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
1001 ext4_set_inode_state(inode, EXT4_STATE_XATTR);
1002 } else {
1003 header->h_magic = cpu_to_le32(0);
1004 ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
1005 }
1006 return 0;
1007}
1008
1009static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
1010 struct ext4_xattr_info *i,
1011 struct ext4_xattr_ibody_find *is)
992{ 1012{
993 struct ext4_xattr_ibody_header *header; 1013 struct ext4_xattr_ibody_header *header;
994 struct ext4_xattr_search *s = &is->s; 1014 struct ext4_xattr_search *s = &is->s;
@@ -1144,9 +1164,17 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
1144{ 1164{
1145 handle_t *handle; 1165 handle_t *handle;
1146 int error, retries = 0; 1166 int error, retries = 0;
1167 int credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb);
1147 1168
1148retry: 1169retry:
1149 handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb)); 1170 /*
1171 * In case of inline data, we may push out the data to a block,
1172 * So reserve the journal space first.
1173 */
1174 if (ext4_has_inline_data(inode))
1175 credits += ext4_writepage_trans_blocks(inode) + 1;
1176
1177 handle = ext4_journal_start(inode, credits);
1150 if (IS_ERR(handle)) { 1178 if (IS_ERR(handle)) {
1151 error = PTR_ERR(handle); 1179 error = PTR_ERR(handle);
1152 } else { 1180 } else {
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 91f31ca7d9af..69eda787a96a 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -21,6 +21,7 @@
21#define EXT4_XATTR_INDEX_TRUSTED 4 21#define EXT4_XATTR_INDEX_TRUSTED 4
22#define EXT4_XATTR_INDEX_LUSTRE 5 22#define EXT4_XATTR_INDEX_LUSTRE 5
23#define EXT4_XATTR_INDEX_SECURITY 6 23#define EXT4_XATTR_INDEX_SECURITY 6
24#define EXT4_XATTR_INDEX_SYSTEM 7
24 25
25struct ext4_xattr_header { 26struct ext4_xattr_header {
26 __le32 h_magic; /* magic number for identification */ 27 __le32 h_magic; /* magic number for identification */
@@ -65,7 +66,32 @@ struct ext4_xattr_entry {
65 EXT4_I(inode)->i_extra_isize)) 66 EXT4_I(inode)->i_extra_isize))
66#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) 67#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
67 68
68# ifdef CONFIG_EXT4_FS_XATTR 69#define BHDR(bh) ((struct ext4_xattr_header *)((bh)->b_data))
70#define ENTRY(ptr) ((struct ext4_xattr_entry *)(ptr))
71#define BFIRST(bh) ENTRY(BHDR(bh)+1)
72#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
73
74#define EXT4_ZERO_XATTR_VALUE ((void *)-1)
75
76struct ext4_xattr_info {
77 int name_index;
78 const char *name;
79 const void *value;
80 size_t value_len;
81};
82
83struct ext4_xattr_search {
84 struct ext4_xattr_entry *first;
85 void *base;
86 void *end;
87 struct ext4_xattr_entry *here;
88 int not_found;
89};
90
91struct ext4_xattr_ibody_find {
92 struct ext4_xattr_search s;
93 struct ext4_iloc iloc;
94};
69 95
70extern const struct xattr_handler ext4_xattr_user_handler; 96extern const struct xattr_handler ext4_xattr_user_handler;
71extern const struct xattr_handler ext4_xattr_trusted_handler; 97extern const struct xattr_handler ext4_xattr_trusted_handler;
@@ -90,60 +116,82 @@ extern void ext4_exit_xattr(void);
90 116
91extern const struct xattr_handler *ext4_xattr_handlers[]; 117extern const struct xattr_handler *ext4_xattr_handlers[];
92 118
93# else /* CONFIG_EXT4_FS_XATTR */ 119extern int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
94 120 struct ext4_xattr_ibody_find *is);
95static inline int 121extern int ext4_xattr_ibody_get(struct inode *inode, int name_index,
96ext4_xattr_get(struct inode *inode, int name_index, const char *name, 122 const char *name,
97 void *buffer, size_t size, int flags) 123 void *buffer, size_t buffer_size);
98{ 124extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
99 return -EOPNOTSUPP; 125 struct ext4_xattr_info *i,
100} 126 struct ext4_xattr_ibody_find *is);
101 127
102static inline int 128extern int ext4_has_inline_data(struct inode *inode);
103ext4_xattr_set(struct inode *inode, int name_index, const char *name, 129extern int ext4_get_inline_size(struct inode *inode);
104 const void *value, size_t size, int flags) 130extern int ext4_get_max_inline_size(struct inode *inode);
105{ 131extern int ext4_find_inline_data_nolock(struct inode *inode);
106 return -EOPNOTSUPP; 132extern void ext4_write_inline_data(struct inode *inode,
107} 133 struct ext4_iloc *iloc,
108 134 void *buffer, loff_t pos,
109static inline int 135 unsigned int len);
110ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, 136extern int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
111 const char *name, const void *value, size_t size, int flags) 137 unsigned int len);
112{ 138extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
113 return -EOPNOTSUPP; 139 unsigned int len);
114} 140extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);
115 141
116static inline void 142extern int ext4_readpage_inline(struct inode *inode, struct page *page);
117ext4_xattr_delete_inode(handle_t *handle, struct inode *inode) 143extern int ext4_try_to_write_inline_data(struct address_space *mapping,
118{ 144 struct inode *inode,
119} 145 loff_t pos, unsigned len,
120 146 unsigned flags,
121static inline void 147 struct page **pagep);
122ext4_xattr_put_super(struct super_block *sb) 148extern int ext4_write_inline_data_end(struct inode *inode,
123{ 149 loff_t pos, unsigned len,
124} 150 unsigned copied,
125 151 struct page *page);
126static __init inline int 152extern struct buffer_head *
127ext4_init_xattr(void) 153ext4_journalled_write_inline_data(struct inode *inode,
128{ 154 unsigned len,
129 return 0; 155 struct page *page);
130} 156extern int ext4_da_write_inline_data_begin(struct address_space *mapping,
131 157 struct inode *inode,
132static inline void 158 loff_t pos, unsigned len,
133ext4_exit_xattr(void) 159 unsigned flags,
134{ 160 struct page **pagep,
135} 161 void **fsdata);
136 162extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
137static inline int 163 unsigned len, unsigned copied,
138ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, 164 struct page *page);
139 struct ext4_inode *raw_inode, handle_t *handle) 165extern int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
140{ 166 struct inode *inode);
141 return -EOPNOTSUPP; 167extern int ext4_try_create_inline_dir(handle_t *handle,
142} 168 struct inode *parent,
143 169 struct inode *inode);
144#define ext4_xattr_handlers NULL 170extern int ext4_read_inline_dir(struct file *filp,
145 171 void *dirent, filldir_t filldir,
146# endif /* CONFIG_EXT4_FS_XATTR */ 172 int *has_inline_data);
173extern struct buffer_head *ext4_find_inline_entry(struct inode *dir,
174 const struct qstr *d_name,
175 struct ext4_dir_entry_2 **res_dir,
176 int *has_inline_data);
177extern int ext4_delete_inline_entry(handle_t *handle,
178 struct inode *dir,
179 struct ext4_dir_entry_2 *de_del,
180 struct buffer_head *bh,
181 int *has_inline_data);
182extern int empty_inline_dir(struct inode *dir, int *has_inline_data);
183extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
184 struct ext4_dir_entry_2 **parent_de,
185 int *retval);
186extern int ext4_inline_data_fiemap(struct inode *inode,
187 struct fiemap_extent_info *fieinfo,
188 int *has_inline);
189extern int ext4_try_to_evict_inline_data(handle_t *handle,
190 struct inode *inode,
191 int needed);
192extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline);
193
194extern int ext4_convert_inline_data(struct inode *inode);
147 195
148#ifdef CONFIG_EXT4_FS_SECURITY 196#ifdef CONFIG_EXT4_FS_SECURITY
149extern int ext4_init_security(handle_t *handle, struct inode *inode, 197extern int ext4_init_security(handle_t *handle, struct inode *inode,