aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-09 12:31:22 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-09 12:31:22 -0400
commitbc2c6421cbb420677c4bb56adaf434414770ce8a (patch)
treedc488ded5d21f28c82ca62acd23dbea299aaa5e1
parent58f587cb0b603de3d8869e021d4fa704e065afa8 (diff)
parentff95015648df445999c8483270905f7d3dec51e1 (diff)
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o: "The first major feature for ext4 this merge window is the largedir feature, which allows ext4 directories to support over 2 billion directory entries (assuming ~64 byte file names; in practice, users will run into practical performance limits first.) This feature was originally written by the Lustre team, and credit goes to Artem Blagodarenko from Seagate for getting this feature upstream. The second major major feature allows ext4 to support extended attribute values up to 64k. This feature was also originally from Lustre, and has been enhanced by Tahsin Erdogan from Google with a deduplication feature so that if multiple files have the same xattr value (for example, Windows ACL's stored by Samba), only one copy will be stored on disk for encoding and caching efficiency. We also have the usual set of bug fixes, cleanups, and optimizations" * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (47 commits) ext4: fix spelling mistake: "prellocated" -> "preallocated" ext4: fix __ext4_new_inode() journal credits calculation ext4: skip ext4_init_security() and encryption on ea_inodes fs: generic_block_bmap(): initialize all of the fields in the temp bh ext4: change fast symlink test to not rely on i_blocks ext4: require key for truncate(2) of encrypted file ext4: don't bother checking for encryption key in ->mmap() ext4: check return value of kstrtoull correctly in reserved_clusters_store ext4: fix off-by-one fsmap error on 1k block filesystems ext4: return EFSBADCRC if a bad checksum error is found in ext4_find_entry() ext4: return EIO on read error in ext4_find_entry ext4: forbid encrypting root directory ext4: send parallel discards on commit completions ext4: avoid unnecessary stalls in ext4_evict_inode() ext4: add nombcache mount option ext4: strong binding of xattr inode references ext4: eliminate xattr entry e_hash recalculation for removes ext4: reserve space for xattr entries/names quota: add get_inode_usage callback to transfer multi-inode charges ext4: xattr inode deduplication ...
-rw-r--r--fs/buffer.c8
-rw-r--r--fs/crypto/policy.c1
-rw-r--r--fs/ext2/ext2.h2
-rw-r--r--fs/ext2/super.c16
-rw-r--r--fs/ext2/xattr.c48
-rw-r--r--fs/ext4/acl.c21
-rw-r--r--fs/ext4/ext4.h63
-rw-r--r--fs/ext4/ext4_jbd2.h23
-rw-r--r--fs/ext4/extents.c3
-rw-r--r--fs/ext4/file.c7
-rw-r--r--fs/ext4/fsmap.c4
-rw-r--r--fs/ext4/ialloc.c76
-rw-r--r--fs/ext4/indirect.c3
-rw-r--r--fs/ext4/inline.c2
-rw-r--r--fs/ext4/inode.c92
-rw-r--r--fs/ext4/ioctl.c10
-rw-r--r--fs/ext4/mballoc.c145
-rw-r--r--fs/ext4/mballoc.h6
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/move_extent.c2
-rw-r--r--fs/ext4/namei.c131
-rw-r--r--fs/ext4/super.c109
-rw-r--r--fs/ext4/sysfs.c2
-rw-r--r--fs/ext4/xattr.c1699
-rw-r--r--fs/ext4/xattr.h35
-rw-r--r--fs/mbcache.c52
-rw-r--r--fs/quota/dquot.c16
-rw-r--r--include/linux/fscrypt_common.h3
-rw-r--r--include/linux/mbcache.h11
-rw-r--r--include/linux/quota.h2
30 files changed, 2081 insertions, 513 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 5234b15377c2..233e2983c5db 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3031,11 +3031,11 @@ EXPORT_SYMBOL(block_write_full_page);
3031sector_t generic_block_bmap(struct address_space *mapping, sector_t block, 3031sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
3032 get_block_t *get_block) 3032 get_block_t *get_block)
3033{ 3033{
3034 struct buffer_head tmp;
3035 struct inode *inode = mapping->host; 3034 struct inode *inode = mapping->host;
3036 tmp.b_state = 0; 3035 struct buffer_head tmp = {
3037 tmp.b_blocknr = 0; 3036 .b_size = i_blocksize(inode),
3038 tmp.b_size = i_blocksize(inode); 3037 };
3038
3039 get_block(inode, block, &tmp, 0); 3039 get_block(inode, block, &tmp, 0);
3040 return tmp.b_blocknr; 3040 return tmp.b_blocknr;
3041} 3041}
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index 9914d51dff86..ce07a86200f3 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -256,6 +256,7 @@ int fscrypt_inherit_context(struct inode *parent, struct inode *child,
256 memcpy(ctx.master_key_descriptor, ci->ci_master_key, 256 memcpy(ctx.master_key_descriptor, ci->ci_master_key,
257 FS_KEY_DESCRIPTOR_SIZE); 257 FS_KEY_DESCRIPTOR_SIZE);
258 get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE); 258 get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
259 BUILD_BUG_ON(sizeof(ctx) != FSCRYPT_SET_CONTEXT_MAX_SIZE);
259 res = parent->i_sb->s_cop->set_context(child, &ctx, 260 res = parent->i_sb->s_cop->set_context(child, &ctx,
260 sizeof(ctx), fs_data); 261 sizeof(ctx), fs_data);
261 if (res) 262 if (res)
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 03f5ce1d3dbe..23ebb92484c6 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -113,7 +113,7 @@ struct ext2_sb_info {
113 * of the mount options. 113 * of the mount options.
114 */ 114 */
115 spinlock_t s_lock; 115 spinlock_t s_lock;
116 struct mb_cache *s_mb_cache; 116 struct mb_cache *s_ea_block_cache;
117}; 117};
118 118
119static inline spinlock_t * 119static inline spinlock_t *
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 9c2028b50e5c..7b1bc9059863 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -147,9 +147,9 @@ static void ext2_put_super (struct super_block * sb)
147 147
148 ext2_quota_off_umount(sb); 148 ext2_quota_off_umount(sb);
149 149
150 if (sbi->s_mb_cache) { 150 if (sbi->s_ea_block_cache) {
151 ext2_xattr_destroy_cache(sbi->s_mb_cache); 151 ext2_xattr_destroy_cache(sbi->s_ea_block_cache);
152 sbi->s_mb_cache = NULL; 152 sbi->s_ea_block_cache = NULL;
153 } 153 }
154 if (!(sb->s_flags & MS_RDONLY)) { 154 if (!(sb->s_flags & MS_RDONLY)) {
155 struct ext2_super_block *es = sbi->s_es; 155 struct ext2_super_block *es = sbi->s_es;
@@ -1131,9 +1131,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
1131 } 1131 }
1132 1132
1133#ifdef CONFIG_EXT2_FS_XATTR 1133#ifdef CONFIG_EXT2_FS_XATTR
1134 sbi->s_mb_cache = ext2_xattr_create_cache(); 1134 sbi->s_ea_block_cache = ext2_xattr_create_cache();
1135 if (!sbi->s_mb_cache) { 1135 if (!sbi->s_ea_block_cache) {
1136 ext2_msg(sb, KERN_ERR, "Failed to create an mb_cache"); 1136 ext2_msg(sb, KERN_ERR, "Failed to create ea_block_cache");
1137 goto failed_mount3; 1137 goto failed_mount3;
1138 } 1138 }
1139#endif 1139#endif
@@ -1182,8 +1182,8 @@ cantfind_ext2:
1182 sb->s_id); 1182 sb->s_id);
1183 goto failed_mount; 1183 goto failed_mount;
1184failed_mount3: 1184failed_mount3:
1185 if (sbi->s_mb_cache) 1185 if (sbi->s_ea_block_cache)
1186 ext2_xattr_destroy_cache(sbi->s_mb_cache); 1186 ext2_xattr_destroy_cache(sbi->s_ea_block_cache);
1187 percpu_counter_destroy(&sbi->s_freeblocks_counter); 1187 percpu_counter_destroy(&sbi->s_freeblocks_counter);
1188 percpu_counter_destroy(&sbi->s_freeinodes_counter); 1188 percpu_counter_destroy(&sbi->s_freeinodes_counter);
1189 percpu_counter_destroy(&sbi->s_dirs_counter); 1189 percpu_counter_destroy(&sbi->s_dirs_counter);
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index fbdb8f171893..1b9b1268d418 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -121,6 +121,8 @@ const struct xattr_handler *ext2_xattr_handlers[] = {
121 NULL 121 NULL
122}; 122};
123 123
124#define EA_BLOCK_CACHE(inode) (EXT2_SB(inode->i_sb)->s_ea_block_cache)
125
124static inline const struct xattr_handler * 126static inline const struct xattr_handler *
125ext2_xattr_handler(int name_index) 127ext2_xattr_handler(int name_index)
126{ 128{
@@ -150,7 +152,7 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name,
150 size_t name_len, size; 152 size_t name_len, size;
151 char *end; 153 char *end;
152 int error; 154 int error;
153 struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache; 155 struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
154 156
155 ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", 157 ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
156 name_index, name, buffer, (long)buffer_size); 158 name_index, name, buffer, (long)buffer_size);
@@ -195,7 +197,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
195 goto found; 197 goto found;
196 entry = next; 198 entry = next;
197 } 199 }
198 if (ext2_xattr_cache_insert(ext2_mb_cache, bh)) 200 if (ext2_xattr_cache_insert(ea_block_cache, bh))
199 ea_idebug(inode, "cache insert failed"); 201 ea_idebug(inode, "cache insert failed");
200 error = -ENODATA; 202 error = -ENODATA;
201 goto cleanup; 203 goto cleanup;
@@ -208,7 +210,7 @@ found:
208 le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) 210 le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
209 goto bad_block; 211 goto bad_block;
210 212
211 if (ext2_xattr_cache_insert(ext2_mb_cache, bh)) 213 if (ext2_xattr_cache_insert(ea_block_cache, bh))
212 ea_idebug(inode, "cache insert failed"); 214 ea_idebug(inode, "cache insert failed");
213 if (buffer) { 215 if (buffer) {
214 error = -ERANGE; 216 error = -ERANGE;
@@ -246,7 +248,7 @@ ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
246 char *end; 248 char *end;
247 size_t rest = buffer_size; 249 size_t rest = buffer_size;
248 int error; 250 int error;
249 struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache; 251 struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
250 252
251 ea_idebug(inode, "buffer=%p, buffer_size=%ld", 253 ea_idebug(inode, "buffer=%p, buffer_size=%ld",
252 buffer, (long)buffer_size); 254 buffer, (long)buffer_size);
@@ -281,7 +283,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
281 goto bad_block; 283 goto bad_block;
282 entry = next; 284 entry = next;
283 } 285 }
284 if (ext2_xattr_cache_insert(ext2_mb_cache, bh)) 286 if (ext2_xattr_cache_insert(ea_block_cache, bh))
285 ea_idebug(inode, "cache insert failed"); 287 ea_idebug(inode, "cache insert failed");
286 288
287 /* list the attribute names */ 289 /* list the attribute names */
@@ -493,8 +495,8 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
493 * This must happen under buffer lock for 495 * This must happen under buffer lock for
494 * ext2_xattr_set2() to reliably detect modified block 496 * ext2_xattr_set2() to reliably detect modified block
495 */ 497 */
496 mb_cache_entry_delete_block(EXT2_SB(sb)->s_mb_cache, 498 mb_cache_entry_delete(EA_BLOCK_CACHE(inode), hash,
497 hash, bh->b_blocknr); 499 bh->b_blocknr);
498 500
499 /* keep the buffer locked while modifying it. */ 501 /* keep the buffer locked while modifying it. */
500 } else { 502 } else {
@@ -627,7 +629,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
627 struct super_block *sb = inode->i_sb; 629 struct super_block *sb = inode->i_sb;
628 struct buffer_head *new_bh = NULL; 630 struct buffer_head *new_bh = NULL;
629 int error; 631 int error;
630 struct mb_cache *ext2_mb_cache = EXT2_SB(sb)->s_mb_cache; 632 struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
631 633
632 if (header) { 634 if (header) {
633 new_bh = ext2_xattr_cache_find(inode, header); 635 new_bh = ext2_xattr_cache_find(inode, header);
@@ -655,7 +657,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
655 don't need to change the reference count. */ 657 don't need to change the reference count. */
656 new_bh = old_bh; 658 new_bh = old_bh;
657 get_bh(new_bh); 659 get_bh(new_bh);
658 ext2_xattr_cache_insert(ext2_mb_cache, new_bh); 660 ext2_xattr_cache_insert(ea_block_cache, new_bh);
659 } else { 661 } else {
660 /* We need to allocate a new block */ 662 /* We need to allocate a new block */
661 ext2_fsblk_t goal = ext2_group_first_block_no(sb, 663 ext2_fsblk_t goal = ext2_group_first_block_no(sb,
@@ -676,7 +678,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
676 memcpy(new_bh->b_data, header, new_bh->b_size); 678 memcpy(new_bh->b_data, header, new_bh->b_size);
677 set_buffer_uptodate(new_bh); 679 set_buffer_uptodate(new_bh);
678 unlock_buffer(new_bh); 680 unlock_buffer(new_bh);
679 ext2_xattr_cache_insert(ext2_mb_cache, new_bh); 681 ext2_xattr_cache_insert(ea_block_cache, new_bh);
680 682
681 ext2_xattr_update_super_block(sb); 683 ext2_xattr_update_super_block(sb);
682 } 684 }
@@ -721,8 +723,8 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
721 * This must happen under buffer lock for 723 * This must happen under buffer lock for
722 * ext2_xattr_set2() to reliably detect freed block 724 * ext2_xattr_set2() to reliably detect freed block
723 */ 725 */
724 mb_cache_entry_delete_block(ext2_mb_cache, 726 mb_cache_entry_delete(ea_block_cache, hash,
725 hash, old_bh->b_blocknr); 727 old_bh->b_blocknr);
726 /* Free the old block. */ 728 /* Free the old block. */
727 ea_bdebug(old_bh, "freeing"); 729 ea_bdebug(old_bh, "freeing");
728 ext2_free_blocks(inode, old_bh->b_blocknr, 1); 730 ext2_free_blocks(inode, old_bh->b_blocknr, 1);
@@ -795,8 +797,8 @@ ext2_xattr_delete_inode(struct inode *inode)
795 * This must happen under buffer lock for ext2_xattr_set2() to 797 * This must happen under buffer lock for ext2_xattr_set2() to
796 * reliably detect freed block 798 * reliably detect freed block
797 */ 799 */
798 mb_cache_entry_delete_block(EXT2_SB(inode->i_sb)->s_mb_cache, 800 mb_cache_entry_delete(EA_BLOCK_CACHE(inode), hash,
799 hash, bh->b_blocknr); 801 bh->b_blocknr);
800 ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1); 802 ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
801 get_bh(bh); 803 get_bh(bh);
802 bforget(bh); 804 bforget(bh);
@@ -897,21 +899,21 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
897{ 899{
898 __u32 hash = le32_to_cpu(header->h_hash); 900 __u32 hash = le32_to_cpu(header->h_hash);
899 struct mb_cache_entry *ce; 901 struct mb_cache_entry *ce;
900 struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache; 902 struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
901 903
902 if (!header->h_hash) 904 if (!header->h_hash)
903 return NULL; /* never share */ 905 return NULL; /* never share */
904 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); 906 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
905again: 907again:
906 ce = mb_cache_entry_find_first(ext2_mb_cache, hash); 908 ce = mb_cache_entry_find_first(ea_block_cache, hash);
907 while (ce) { 909 while (ce) {
908 struct buffer_head *bh; 910 struct buffer_head *bh;
909 911
910 bh = sb_bread(inode->i_sb, ce->e_block); 912 bh = sb_bread(inode->i_sb, ce->e_value);
911 if (!bh) { 913 if (!bh) {
912 ext2_error(inode->i_sb, "ext2_xattr_cache_find", 914 ext2_error(inode->i_sb, "ext2_xattr_cache_find",
913 "inode %ld: block %ld read error", 915 "inode %ld: block %ld read error",
914 inode->i_ino, (unsigned long) ce->e_block); 916 inode->i_ino, (unsigned long) ce->e_value);
915 } else { 917 } else {
916 lock_buffer(bh); 918 lock_buffer(bh);
917 /* 919 /*
@@ -924,27 +926,27 @@ again:
924 * entry is still hashed is reliable. 926 * entry is still hashed is reliable.
925 */ 927 */
926 if (hlist_bl_unhashed(&ce->e_hash_list)) { 928 if (hlist_bl_unhashed(&ce->e_hash_list)) {
927 mb_cache_entry_put(ext2_mb_cache, ce); 929 mb_cache_entry_put(ea_block_cache, ce);
928 unlock_buffer(bh); 930 unlock_buffer(bh);
929 brelse(bh); 931 brelse(bh);
930 goto again; 932 goto again;
931 } else if (le32_to_cpu(HDR(bh)->h_refcount) > 933 } else if (le32_to_cpu(HDR(bh)->h_refcount) >
932 EXT2_XATTR_REFCOUNT_MAX) { 934 EXT2_XATTR_REFCOUNT_MAX) {
933 ea_idebug(inode, "block %ld refcount %d>%d", 935 ea_idebug(inode, "block %ld refcount %d>%d",
934 (unsigned long) ce->e_block, 936 (unsigned long) ce->e_value,
935 le32_to_cpu(HDR(bh)->h_refcount), 937 le32_to_cpu(HDR(bh)->h_refcount),
936 EXT2_XATTR_REFCOUNT_MAX); 938 EXT2_XATTR_REFCOUNT_MAX);
937 } else if (!ext2_xattr_cmp(header, HDR(bh))) { 939 } else if (!ext2_xattr_cmp(header, HDR(bh))) {
938 ea_bdebug(bh, "b_count=%d", 940 ea_bdebug(bh, "b_count=%d",
939 atomic_read(&(bh->b_count))); 941 atomic_read(&(bh->b_count)));
940 mb_cache_entry_touch(ext2_mb_cache, ce); 942 mb_cache_entry_touch(ea_block_cache, ce);
941 mb_cache_entry_put(ext2_mb_cache, ce); 943 mb_cache_entry_put(ea_block_cache, ce);
942 return bh; 944 return bh;
943 } 945 }
944 unlock_buffer(bh); 946 unlock_buffer(bh);
945 brelse(bh); 947 brelse(bh);
946 } 948 }
947 ce = mb_cache_entry_find_next(ext2_mb_cache, ce); 949 ce = mb_cache_entry_find_next(ea_block_cache, ce);
948 } 950 }
949 return NULL; 951 return NULL;
950} 952}
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 3ec0e46de95f..09441ae07a5b 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -183,7 +183,7 @@ ext4_get_acl(struct inode *inode, int type)
183 */ 183 */
184static int 184static int
185__ext4_set_acl(handle_t *handle, struct inode *inode, int type, 185__ext4_set_acl(handle_t *handle, struct inode *inode, int type,
186 struct posix_acl *acl) 186 struct posix_acl *acl, int xattr_flags)
187{ 187{
188 int name_index; 188 int name_index;
189 void *value = NULL; 189 void *value = NULL;
@@ -218,7 +218,7 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int type,
218 } 218 }
219 219
220 error = ext4_xattr_set_handle(handle, inode, name_index, "", 220 error = ext4_xattr_set_handle(handle, inode, name_index, "",
221 value, size, 0); 221 value, size, xattr_flags);
222 222
223 kfree(value); 223 kfree(value);
224 if (!error) 224 if (!error)
@@ -231,18 +231,23 @@ int
231ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type) 231ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type)
232{ 232{
233 handle_t *handle; 233 handle_t *handle;
234 int error, retries = 0; 234 int error, credits, retries = 0;
235 size_t acl_size = acl ? ext4_acl_size(acl->a_count) : 0;
235 236
236 error = dquot_initialize(inode); 237 error = dquot_initialize(inode);
237 if (error) 238 if (error)
238 return error; 239 return error;
239retry: 240retry:
240 handle = ext4_journal_start(inode, EXT4_HT_XATTR, 241 error = ext4_xattr_set_credits(inode, acl_size, false /* is_create */,
241 ext4_jbd2_credits_xattr(inode)); 242 &credits);
243 if (error)
244 return error;
245
246 handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
242 if (IS_ERR(handle)) 247 if (IS_ERR(handle))
243 return PTR_ERR(handle); 248 return PTR_ERR(handle);
244 249
245 error = __ext4_set_acl(handle, inode, type, acl); 250 error = __ext4_set_acl(handle, inode, type, acl, 0 /* xattr_flags */);
246 ext4_journal_stop(handle); 251 ext4_journal_stop(handle);
247 if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 252 if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
248 goto retry; 253 goto retry;
@@ -267,13 +272,13 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
267 272
268 if (default_acl) { 273 if (default_acl) {
269 error = __ext4_set_acl(handle, inode, ACL_TYPE_DEFAULT, 274 error = __ext4_set_acl(handle, inode, ACL_TYPE_DEFAULT,
270 default_acl); 275 default_acl, XATTR_CREATE);
271 posix_acl_release(default_acl); 276 posix_acl_release(default_acl);
272 } 277 }
273 if (acl) { 278 if (acl) {
274 if (!error) 279 if (!error)
275 error = __ext4_set_acl(handle, inode, ACL_TYPE_ACCESS, 280 error = __ext4_set_acl(handle, inode, ACL_TYPE_ACCESS,
276 acl); 281 acl, XATTR_CREATE);
277 posix_acl_release(acl); 282 posix_acl_release(acl);
278 } 283 }
279 return error; 284 return error;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 32191548abed..9ebde0cd632e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1114,6 +1114,7 @@ struct ext4_inode_info {
1114/* 1114/*
1115 * Mount flags set via mount options or defaults 1115 * Mount flags set via mount options or defaults
1116 */ 1116 */
1117#define EXT4_MOUNT_NO_MBCACHE 0x00001 /* Do not use mbcache */
1117#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ 1118#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
1118#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ 1119#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
1119#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ 1120#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
@@ -1444,6 +1445,8 @@ struct ext4_sb_info {
1444 unsigned int *s_mb_maxs; 1445 unsigned int *s_mb_maxs;
1445 unsigned int s_group_info_size; 1446 unsigned int s_group_info_size;
1446 unsigned int s_mb_free_pending; 1447 unsigned int s_mb_free_pending;
1448 struct list_head s_freed_data_list; /* List of blocks to be freed
1449 after commit completed */
1447 1450
1448 /* tunables */ 1451 /* tunables */
1449 unsigned long s_stripe; 1452 unsigned long s_stripe;
@@ -1516,7 +1519,8 @@ struct ext4_sb_info {
1516 struct list_head s_es_list; /* List of inodes with reclaimable extents */ 1519 struct list_head s_es_list; /* List of inodes with reclaimable extents */
1517 long s_es_nr_inode; 1520 long s_es_nr_inode;
1518 struct ext4_es_stats s_es_stats; 1521 struct ext4_es_stats s_es_stats;
1519 struct mb_cache *s_mb_cache; 1522 struct mb_cache *s_ea_block_cache;
1523 struct mb_cache *s_ea_inode_cache;
1520 spinlock_t s_es_lock ____cacheline_aligned_in_smp; 1524 spinlock_t s_es_lock ____cacheline_aligned_in_smp;
1521 1525
1522 /* Ratelimit ext4 messages. */ 1526 /* Ratelimit ext4 messages. */
@@ -1797,10 +1801,12 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
1797 EXT4_FEATURE_INCOMPAT_EXTENTS| \ 1801 EXT4_FEATURE_INCOMPAT_EXTENTS| \
1798 EXT4_FEATURE_INCOMPAT_64BIT| \ 1802 EXT4_FEATURE_INCOMPAT_64BIT| \
1799 EXT4_FEATURE_INCOMPAT_FLEX_BG| \ 1803 EXT4_FEATURE_INCOMPAT_FLEX_BG| \
1804 EXT4_FEATURE_INCOMPAT_EA_INODE| \
1800 EXT4_FEATURE_INCOMPAT_MMP | \ 1805 EXT4_FEATURE_INCOMPAT_MMP | \
1801 EXT4_FEATURE_INCOMPAT_INLINE_DATA | \ 1806 EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
1802 EXT4_FEATURE_INCOMPAT_ENCRYPT | \ 1807 EXT4_FEATURE_INCOMPAT_ENCRYPT | \
1803 EXT4_FEATURE_INCOMPAT_CSUM_SEED) 1808 EXT4_FEATURE_INCOMPAT_CSUM_SEED | \
1809 EXT4_FEATURE_INCOMPAT_LARGEDIR)
1804#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ 1810#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
1805 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ 1811 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
1806 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ 1812 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
@@ -2098,6 +2104,12 @@ static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc)
2098 return (struct ext4_inode *) (iloc->bh->b_data + iloc->offset); 2104 return (struct ext4_inode *) (iloc->bh->b_data + iloc->offset);
2099} 2105}
2100 2106
2107static inline bool ext4_is_quota_file(struct inode *inode)
2108{
2109 return IS_NOQUOTA(inode) &&
2110 !(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL);
2111}
2112
2101/* 2113/*
2102 * This structure is stuffed into the struct file's private_data field 2114 * This structure is stuffed into the struct file's private_data field
2103 * for directories. It is where we put information so that we can do 2115 * for directories. It is where we put information so that we can do
@@ -2126,6 +2138,16 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
2126 */ 2138 */
2127#define ERR_BAD_DX_DIR (-(MAX_ERRNO - 1)) 2139#define ERR_BAD_DX_DIR (-(MAX_ERRNO - 1))
2128 2140
2141/* htree levels for ext4 */
2142#define EXT4_HTREE_LEVEL_COMPAT 2
2143#define EXT4_HTREE_LEVEL 3
2144
2145static inline int ext4_dir_htree_level(struct super_block *sb)
2146{
2147 return ext4_has_feature_largedir(sb) ?
2148 EXT4_HTREE_LEVEL : EXT4_HTREE_LEVEL_COMPAT;
2149}
2150
2129/* 2151/*
2130 * Timeout and state flag for lazy initialization inode thread. 2152 * Timeout and state flag for lazy initialization inode thread.
2131 */ 2153 */
@@ -2389,16 +2411,17 @@ extern int ext4fs_dirhash(const char *name, int len, struct
2389/* ialloc.c */ 2411/* ialloc.c */
2390extern struct inode *__ext4_new_inode(handle_t *, struct inode *, umode_t, 2412extern struct inode *__ext4_new_inode(handle_t *, struct inode *, umode_t,
2391 const struct qstr *qstr, __u32 goal, 2413 const struct qstr *qstr, __u32 goal,
2392 uid_t *owner, int handle_type, 2414 uid_t *owner, __u32 i_flags,
2393 unsigned int line_no, int nblocks); 2415 int handle_type, unsigned int line_no,
2416 int nblocks);
2394 2417
2395#define ext4_new_inode(handle, dir, mode, qstr, goal, owner) \ 2418#define ext4_new_inode(handle, dir, mode, qstr, goal, owner, i_flags) \
2396 __ext4_new_inode((handle), (dir), (mode), (qstr), (goal), (owner), \ 2419 __ext4_new_inode((handle), (dir), (mode), (qstr), (goal), (owner), \
2397 0, 0, 0) 2420 i_flags, 0, 0, 0)
2398#define ext4_new_inode_start_handle(dir, mode, qstr, goal, owner, \ 2421#define ext4_new_inode_start_handle(dir, mode, qstr, goal, owner, \
2399 type, nblocks) \ 2422 type, nblocks) \
2400 __ext4_new_inode(NULL, (dir), (mode), (qstr), (goal), (owner), \ 2423 __ext4_new_inode(NULL, (dir), (mode), (qstr), (goal), (owner), \
2401 (type), __LINE__, (nblocks)) 2424 0, (type), __LINE__, (nblocks))
2402 2425
2403 2426
2404extern void ext4_free_inode(handle_t *, struct inode *); 2427extern void ext4_free_inode(handle_t *, struct inode *);
@@ -2433,6 +2456,7 @@ extern int ext4_mb_add_groupinfo(struct super_block *sb,
2433extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, 2456extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
2434 ext4_fsblk_t block, unsigned long count); 2457 ext4_fsblk_t block, unsigned long count);
2435extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); 2458extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
2459extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid);
2436 2460
2437/* inode.c */ 2461/* inode.c */
2438int ext4_inode_is_fast_symlink(struct inode *inode); 2462int ext4_inode_is_fast_symlink(struct inode *inode);
@@ -2704,19 +2728,20 @@ extern void ext4_group_desc_csum_set(struct super_block *sb, __u32 group,
2704extern int ext4_register_li_request(struct super_block *sb, 2728extern int ext4_register_li_request(struct super_block *sb,
2705 ext4_group_t first_not_zeroed); 2729 ext4_group_t first_not_zeroed);
2706 2730
2707static inline int ext4_has_group_desc_csum(struct super_block *sb)
2708{
2709 return ext4_has_feature_gdt_csum(sb) ||
2710 EXT4_SB(sb)->s_chksum_driver != NULL;
2711}
2712
2713static inline int ext4_has_metadata_csum(struct super_block *sb) 2731static inline int ext4_has_metadata_csum(struct super_block *sb)
2714{ 2732{
2715 WARN_ON_ONCE(ext4_has_feature_metadata_csum(sb) && 2733 WARN_ON_ONCE(ext4_has_feature_metadata_csum(sb) &&
2716 !EXT4_SB(sb)->s_chksum_driver); 2734 !EXT4_SB(sb)->s_chksum_driver);
2717 2735
2718 return (EXT4_SB(sb)->s_chksum_driver != NULL); 2736 return ext4_has_feature_metadata_csum(sb) &&
2737 (EXT4_SB(sb)->s_chksum_driver != NULL);
2719} 2738}
2739
2740static inline int ext4_has_group_desc_csum(struct super_block *sb)
2741{
2742 return ext4_has_feature_gdt_csum(sb) || ext4_has_metadata_csum(sb);
2743}
2744
2720static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) 2745static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
2721{ 2746{
2722 return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) | 2747 return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) |
@@ -2756,13 +2781,15 @@ static inline void ext4_r_blocks_count_set(struct ext4_super_block *es,
2756 es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32); 2781 es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
2757} 2782}
2758 2783
2759static inline loff_t ext4_isize(struct ext4_inode *raw_inode) 2784static inline loff_t ext4_isize(struct super_block *sb,
2785 struct ext4_inode *raw_inode)
2760{ 2786{
2761 if (S_ISREG(le16_to_cpu(raw_inode->i_mode))) 2787 if (ext4_has_feature_largedir(sb) ||
2788 S_ISREG(le16_to_cpu(raw_inode->i_mode)))
2762 return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) | 2789 return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) |
2763 le32_to_cpu(raw_inode->i_size_lo); 2790 le32_to_cpu(raw_inode->i_size_lo);
2764 else 2791
2765 return (loff_t) le32_to_cpu(raw_inode->i_size_lo); 2792 return (loff_t) le32_to_cpu(raw_inode->i_size_lo);
2766} 2793}
2767 2794
2768static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size) 2795static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index f97611171023..dabad1bc8617 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -77,7 +77,14 @@
77 77
78#define EXT4_RESERVE_TRANS_BLOCKS 12U 78#define EXT4_RESERVE_TRANS_BLOCKS 12U
79 79
80#define EXT4_INDEX_EXTRA_TRANS_BLOCKS 8 80/*
81 * Number of credits needed if we need to insert an entry into a
82 * directory. For each new index block, we need 4 blocks (old index
83 * block, new index block, bitmap block, bg summary). For normal
84 * htree directories there are 2 levels; if the largedir feature
85 * enabled it's 3 levels.
86 */
87#define EXT4_INDEX_EXTRA_TRANS_BLOCKS 12U
81 88
82#ifdef CONFIG_QUOTA 89#ifdef CONFIG_QUOTA
83/* Amount of blocks needed for quota update - we know that the structure was 90/* Amount of blocks needed for quota update - we know that the structure was
@@ -104,20 +111,6 @@
104#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) 111#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
105#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) 112#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
106 113
107static inline int ext4_jbd2_credits_xattr(struct inode *inode)
108{
109 int credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb);
110
111 /*
112 * In case of inline data, we may push out the data to a block,
113 * so we need to reserve credits for this eventuality
114 */
115 if (ext4_has_inline_data(inode))
116 credits += ext4_writepage_trans_blocks(inode) + 1;
117 return credits;
118}
119
120
121/* 114/*
122 * Ext4 handle operation types -- for logging purposes 115 * Ext4 handle operation types -- for logging purposes
123 */ 116 */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 3e36508610b7..e0a8425ff74d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2488,7 +2488,8 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int extents)
2488 2488
2489static inline int get_default_free_blocks_flags(struct inode *inode) 2489static inline int get_default_free_blocks_flags(struct inode *inode)
2490{ 2490{
2491 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 2491 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
2492 ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE))
2492 return EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET; 2493 return EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET;
2493 else if (ext4_should_journal_data(inode)) 2494 else if (ext4_should_journal_data(inode))
2494 return EXT4_FREE_BLOCKS_FORGET; 2495 return EXT4_FREE_BLOCKS_FORGET;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 58e2eeaa0bc4..58294c9a7e1d 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -364,13 +364,6 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
364 if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 364 if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
365 return -EIO; 365 return -EIO;
366 366
367 if (ext4_encrypted_inode(inode)) {
368 int err = fscrypt_get_encryption_info(inode);
369 if (err)
370 return 0;
371 if (!fscrypt_has_encryption_key(inode))
372 return -ENOKEY;
373 }
374 file_accessed(file); 367 file_accessed(file);
375 if (IS_DAX(file_inode(file))) { 368 if (IS_DAX(file_inode(file))) {
376 vma->vm_ops = &ext4_dax_vm_ops; 369 vma->vm_ops = &ext4_dax_vm_ops;
diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c
index b19436098837..7ec340898598 100644
--- a/fs/ext4/fsmap.c
+++ b/fs/ext4/fsmap.c
@@ -480,6 +480,7 @@ static int ext4_getfsmap_datadev(struct super_block *sb,
480 struct ext4_sb_info *sbi = EXT4_SB(sb); 480 struct ext4_sb_info *sbi = EXT4_SB(sb);
481 ext4_fsblk_t start_fsb; 481 ext4_fsblk_t start_fsb;
482 ext4_fsblk_t end_fsb; 482 ext4_fsblk_t end_fsb;
483 ext4_fsblk_t bofs;
483 ext4_fsblk_t eofs; 484 ext4_fsblk_t eofs;
484 ext4_group_t start_ag; 485 ext4_group_t start_ag;
485 ext4_group_t end_ag; 486 ext4_group_t end_ag;
@@ -487,9 +488,12 @@ static int ext4_getfsmap_datadev(struct super_block *sb,
487 ext4_grpblk_t last_cluster; 488 ext4_grpblk_t last_cluster;
488 int error = 0; 489 int error = 0;
489 490
491 bofs = le32_to_cpu(sbi->s_es->s_first_data_block);
490 eofs = ext4_blocks_count(sbi->s_es); 492 eofs = ext4_blocks_count(sbi->s_es);
491 if (keys[0].fmr_physical >= eofs) 493 if (keys[0].fmr_physical >= eofs)
492 return 0; 494 return 0;
495 else if (keys[0].fmr_physical < bofs)
496 keys[0].fmr_physical = bofs;
493 if (keys[1].fmr_physical >= eofs) 497 if (keys[1].fmr_physical >= eofs)
494 keys[1].fmr_physical = eofs - 1; 498 keys[1].fmr_physical = eofs - 1;
495 start_fsb = keys[0].fmr_physical; 499 start_fsb = keys[0].fmr_physical;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 98ac2f1f23b3..507bfb3344d4 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -294,7 +294,6 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
294 * as writing the quota to disk may need the lock as well. 294 * as writing the quota to disk may need the lock as well.
295 */ 295 */
296 dquot_initialize(inode); 296 dquot_initialize(inode);
297 ext4_xattr_delete_inode(handle, inode);
298 dquot_free_inode(inode); 297 dquot_free_inode(inode);
299 dquot_drop(inode); 298 dquot_drop(inode);
300 299
@@ -743,8 +742,9 @@ out:
743 */ 742 */
744struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, 743struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
745 umode_t mode, const struct qstr *qstr, 744 umode_t mode, const struct qstr *qstr,
746 __u32 goal, uid_t *owner, int handle_type, 745 __u32 goal, uid_t *owner, __u32 i_flags,
747 unsigned int line_no, int nblocks) 746 int handle_type, unsigned int line_no,
747 int nblocks)
748{ 748{
749 struct super_block *sb; 749 struct super_block *sb;
750 struct buffer_head *inode_bitmap_bh = NULL; 750 struct buffer_head *inode_bitmap_bh = NULL;
@@ -766,30 +766,69 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
766 if (!dir || !dir->i_nlink) 766 if (!dir || !dir->i_nlink)
767 return ERR_PTR(-EPERM); 767 return ERR_PTR(-EPERM);
768 768
769 if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb)))) 769 sb = dir->i_sb;
770 sbi = EXT4_SB(sb);
771
772 if (unlikely(ext4_forced_shutdown(sbi)))
770 return ERR_PTR(-EIO); 773 return ERR_PTR(-EIO);
771 774
772 if ((ext4_encrypted_inode(dir) || 775 if ((ext4_encrypted_inode(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) &&
773 DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))) && 776 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) &&
774 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) { 777 !(i_flags & EXT4_EA_INODE_FL)) {
775 err = fscrypt_get_encryption_info(dir); 778 err = fscrypt_get_encryption_info(dir);
776 if (err) 779 if (err)
777 return ERR_PTR(err); 780 return ERR_PTR(err);
778 if (!fscrypt_has_encryption_key(dir)) 781 if (!fscrypt_has_encryption_key(dir))
779 return ERR_PTR(-ENOKEY); 782 return ERR_PTR(-ENOKEY);
780 if (!handle)
781 nblocks += EXT4_DATA_TRANS_BLOCKS(dir->i_sb);
782 encrypt = 1; 783 encrypt = 1;
783 } 784 }
784 785
785 sb = dir->i_sb; 786 if (!handle && sbi->s_journal && !(i_flags & EXT4_EA_INODE_FL)) {
787#ifdef CONFIG_EXT4_FS_POSIX_ACL
788 struct posix_acl *p = get_acl(dir, ACL_TYPE_DEFAULT);
789
790 if (p) {
791 int acl_size = p->a_count * sizeof(ext4_acl_entry);
792
793 nblocks += (S_ISDIR(mode) ? 2 : 1) *
794 __ext4_xattr_set_credits(sb, NULL /* inode */,
795 NULL /* block_bh */, acl_size,
796 true /* is_create */);
797 posix_acl_release(p);
798 }
799#endif
800
801#ifdef CONFIG_SECURITY
802 {
803 int num_security_xattrs = 1;
804
805#ifdef CONFIG_INTEGRITY
806 num_security_xattrs++;
807#endif
808 /*
809 * We assume that security xattrs are never
810 * more than 1k. In practice they are under
811 * 128 bytes.
812 */
813 nblocks += num_security_xattrs *
814 __ext4_xattr_set_credits(sb, NULL /* inode */,
815 NULL /* block_bh */, 1024,
816 true /* is_create */);
817 }
818#endif
819 if (encrypt)
820 nblocks += __ext4_xattr_set_credits(sb,
821 NULL /* inode */, NULL /* block_bh */,
822 FSCRYPT_SET_CONTEXT_MAX_SIZE,
823 true /* is_create */);
824 }
825
786 ngroups = ext4_get_groups_count(sb); 826 ngroups = ext4_get_groups_count(sb);
787 trace_ext4_request_inode(dir, mode); 827 trace_ext4_request_inode(dir, mode);
788 inode = new_inode(sb); 828 inode = new_inode(sb);
789 if (!inode) 829 if (!inode)
790 return ERR_PTR(-ENOMEM); 830 return ERR_PTR(-ENOMEM);
791 ei = EXT4_I(inode); 831 ei = EXT4_I(inode);
792 sbi = EXT4_SB(sb);
793 832
794 /* 833 /*
795 * Initialize owners and quota early so that we don't have to account 834 * Initialize owners and quota early so that we don't have to account
@@ -1053,6 +1092,7 @@ got:
1053 /* Don't inherit extent flag from directory, amongst others. */ 1092 /* Don't inherit extent flag from directory, amongst others. */
1054 ei->i_flags = 1093 ei->i_flags =
1055 ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED); 1094 ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED);
1095 ei->i_flags |= i_flags;
1056 ei->i_file_acl = 0; 1096 ei->i_file_acl = 0;
1057 ei->i_dtime = 0; 1097 ei->i_dtime = 0;
1058 ei->i_block_group = group; 1098 ei->i_block_group = group;
@@ -1109,13 +1149,15 @@ got:
1109 goto fail_free_drop; 1149 goto fail_free_drop;
1110 } 1150 }
1111 1151
1112 err = ext4_init_acl(handle, inode, dir); 1152 if (!(ei->i_flags & EXT4_EA_INODE_FL)) {
1113 if (err) 1153 err = ext4_init_acl(handle, inode, dir);
1114 goto fail_free_drop; 1154 if (err)
1155 goto fail_free_drop;
1115 1156
1116 err = ext4_init_security(handle, inode, dir, qstr); 1157 err = ext4_init_security(handle, inode, dir, qstr);
1117 if (err) 1158 if (err)
1118 goto fail_free_drop; 1159 goto fail_free_drop;
1160 }
1119 1161
1120 if (ext4_has_feature_extents(sb)) { 1162 if (ext4_has_feature_extents(sb)) {
1121 /* set extent flag only for directory, file and normal symlink*/ 1163 /* set extent flag only for directory, file and normal symlink*/
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index bc15c2c17633..7ffa290cbb8e 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -829,7 +829,8 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
829 int flags = EXT4_FREE_BLOCKS_VALIDATED; 829 int flags = EXT4_FREE_BLOCKS_VALIDATED;
830 int err; 830 int err;
831 831
832 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 832 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
833 ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE))
833 flags |= EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_METADATA; 834 flags |= EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_METADATA;
834 else if (ext4_should_journal_data(inode)) 835 else if (ext4_should_journal_data(inode))
835 flags |= EXT4_FREE_BLOCKS_FORGET; 836 flags |= EXT4_FREE_BLOCKS_FORGET;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 8d141c0c8ff9..28c5c3abddb3 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -61,7 +61,7 @@ static int get_max_inline_xattr_value_size(struct inode *inode,
61 61
62 /* Compute min_offs. */ 62 /* Compute min_offs. */
63 for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) { 63 for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
64 if (!entry->e_value_block && entry->e_value_size) { 64 if (!entry->e_value_inum && entry->e_value_size) {
65 size_t offs = le16_to_cpu(entry->e_value_offs); 65 size_t offs = le16_to_cpu(entry->e_value_offs);
66 if (offs < min_offs) 66 if (offs < min_offs)
67 min_offs = offs; 67 min_offs = offs;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5cf82d03968c..3c600f02673f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -144,16 +144,12 @@ static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
144 144
145/* 145/*
146 * Test whether an inode is a fast symlink. 146 * Test whether an inode is a fast symlink.
147 * A fast symlink has its symlink data stored in ext4_inode_info->i_data.
147 */ 148 */
148int ext4_inode_is_fast_symlink(struct inode *inode) 149int ext4_inode_is_fast_symlink(struct inode *inode)
149{ 150{
150 int ea_blocks = EXT4_I(inode)->i_file_acl ? 151 return S_ISLNK(inode->i_mode) && inode->i_size &&
151 EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0; 152 (inode->i_size < EXT4_N_BLOCKS * 4);
152
153 if (ext4_has_inline_data(inode))
154 return 0;
155
156 return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
157} 153}
158 154
159/* 155/*
@@ -189,6 +185,8 @@ void ext4_evict_inode(struct inode *inode)
189{ 185{
190 handle_t *handle; 186 handle_t *handle;
191 int err; 187 int err;
188 int extra_credits = 3;
189 struct ext4_xattr_inode_array *ea_inode_array = NULL;
192 190
193 trace_ext4_evict_inode(inode); 191 trace_ext4_evict_inode(inode);
194 192
@@ -213,7 +211,8 @@ void ext4_evict_inode(struct inode *inode)
213 */ 211 */
214 if (inode->i_ino != EXT4_JOURNAL_INO && 212 if (inode->i_ino != EXT4_JOURNAL_INO &&
215 ext4_should_journal_data(inode) && 213 ext4_should_journal_data(inode) &&
216 (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { 214 (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
215 inode->i_data.nrpages) {
217 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; 216 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
218 tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; 217 tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
219 218
@@ -238,8 +237,12 @@ void ext4_evict_inode(struct inode *inode)
238 * protection against it 237 * protection against it
239 */ 238 */
240 sb_start_intwrite(inode->i_sb); 239 sb_start_intwrite(inode->i_sb);
240
241 if (!IS_NOQUOTA(inode))
242 extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb);
243
241 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, 244 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
242 ext4_blocks_for_truncate(inode)+3); 245 ext4_blocks_for_truncate(inode)+extra_credits);
243 if (IS_ERR(handle)) { 246 if (IS_ERR(handle)) {
244 ext4_std_error(inode->i_sb, PTR_ERR(handle)); 247 ext4_std_error(inode->i_sb, PTR_ERR(handle));
245 /* 248 /*
@@ -254,6 +257,16 @@ void ext4_evict_inode(struct inode *inode)
254 257
255 if (IS_SYNC(inode)) 258 if (IS_SYNC(inode))
256 ext4_handle_sync(handle); 259 ext4_handle_sync(handle);
260
261 /*
262 * Set inode->i_size to 0 before calling ext4_truncate(). We need
263 * special handling of symlinks here because i_size is used to
264 * determine whether ext4_inode_info->i_data contains symlink data or
265 * block mappings. Setting i_size to 0 will remove its fast symlink
266 * status. Erase i_data so that it becomes a valid empty block map.
267 */
268 if (ext4_inode_is_fast_symlink(inode))
269 memset(EXT4_I(inode)->i_data, 0, sizeof(EXT4_I(inode)->i_data));
257 inode->i_size = 0; 270 inode->i_size = 0;
258 err = ext4_mark_inode_dirty(handle, inode); 271 err = ext4_mark_inode_dirty(handle, inode);
259 if (err) { 272 if (err) {
@@ -271,25 +284,17 @@ void ext4_evict_inode(struct inode *inode)
271 } 284 }
272 } 285 }
273 286
274 /* 287 /* Remove xattr references. */
275 * ext4_ext_truncate() doesn't reserve any slop when it 288 err = ext4_xattr_delete_inode(handle, inode, &ea_inode_array,
276 * restarts journal transactions; therefore there may not be 289 extra_credits);
277 * enough credits left in the handle to remove the inode from 290 if (err) {
278 * the orphan list and set the dtime field. 291 ext4_warning(inode->i_sb, "xattr delete (err %d)", err);
279 */ 292stop_handle:
280 if (!ext4_handle_has_enough_credits(handle, 3)) { 293 ext4_journal_stop(handle);
281 err = ext4_journal_extend(handle, 3); 294 ext4_orphan_del(NULL, inode);
282 if (err > 0) 295 sb_end_intwrite(inode->i_sb);
283 err = ext4_journal_restart(handle, 3); 296 ext4_xattr_inode_array_free(ea_inode_array);
284 if (err != 0) { 297 goto no_delete;
285 ext4_warning(inode->i_sb,
286 "couldn't extend journal (err %d)", err);
287 stop_handle:
288 ext4_journal_stop(handle);
289 ext4_orphan_del(NULL, inode);
290 sb_end_intwrite(inode->i_sb);
291 goto no_delete;
292 }
293 } 298 }
294 299
295 /* 300 /*
@@ -317,6 +322,7 @@ void ext4_evict_inode(struct inode *inode)
317 ext4_free_inode(handle, inode); 322 ext4_free_inode(handle, inode);
318 ext4_journal_stop(handle); 323 ext4_journal_stop(handle);
319 sb_end_intwrite(inode->i_sb); 324 sb_end_intwrite(inode->i_sb);
325 ext4_xattr_inode_array_free(ea_inode_array);
320 return; 326 return;
321no_delete: 327no_delete:
322 ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ 328 ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
@@ -710,7 +716,7 @@ out_sem:
710 if (map->m_flags & EXT4_MAP_NEW && 716 if (map->m_flags & EXT4_MAP_NEW &&
711 !(map->m_flags & EXT4_MAP_UNWRITTEN) && 717 !(map->m_flags & EXT4_MAP_UNWRITTEN) &&
712 !(flags & EXT4_GET_BLOCKS_ZERO) && 718 !(flags & EXT4_GET_BLOCKS_ZERO) &&
713 !IS_NOQUOTA(inode) && 719 !ext4_is_quota_file(inode) &&
714 ext4_should_order_data(inode)) { 720 ext4_should_order_data(inode)) {
715 if (flags & EXT4_GET_BLOCKS_IO_SUBMIT) 721 if (flags & EXT4_GET_BLOCKS_IO_SUBMIT)
716 ret = ext4_jbd2_inode_add_wait(handle, inode); 722 ret = ext4_jbd2_inode_add_wait(handle, inode);
@@ -4712,7 +4718,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4712 if (ext4_has_feature_64bit(sb)) 4718 if (ext4_has_feature_64bit(sb))
4713 ei->i_file_acl |= 4719 ei->i_file_acl |=
4714 ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; 4720 ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
4715 inode->i_size = ext4_isize(raw_inode); 4721 inode->i_size = ext4_isize(sb, raw_inode);
4716 if ((size = i_size_read(inode)) < 0) { 4722 if ((size = i_size_read(inode)) < 0) {
4717 EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size); 4723 EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size);
4718 ret = -EFSCORRUPTED; 4724 ret = -EFSCORRUPTED;
@@ -4846,6 +4852,15 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4846 } 4852 }
4847 brelse(iloc.bh); 4853 brelse(iloc.bh);
4848 ext4_set_inode_flags(inode); 4854 ext4_set_inode_flags(inode);
4855
4856 if (ei->i_flags & EXT4_EA_INODE_FL) {
4857 ext4_xattr_inode_set_class(inode);
4858
4859 inode_lock(inode);
4860 inode->i_flags |= S_NOQUOTA;
4861 inode_unlock(inode);
4862 }
4863
4849 unlock_new_inode(inode); 4864 unlock_new_inode(inode);
4850 return inode; 4865 return inode;
4851 4866
@@ -5037,7 +5052,7 @@ static int ext4_do_update_inode(handle_t *handle,
5037 raw_inode->i_file_acl_high = 5052 raw_inode->i_file_acl_high =
5038 cpu_to_le16(ei->i_file_acl >> 32); 5053 cpu_to_le16(ei->i_file_acl >> 32);
5039 raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); 5054 raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
5040 if (ei->i_disksize != ext4_isize(raw_inode)) { 5055 if (ei->i_disksize != ext4_isize(inode->i_sb, raw_inode)) {
5041 ext4_isize_set(raw_inode, ei->i_disksize); 5056 ext4_isize_set(raw_inode, ei->i_disksize);
5042 need_datasync = 1; 5057 need_datasync = 1;
5043 } 5058 }
@@ -5287,7 +5302,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5287 error = PTR_ERR(handle); 5302 error = PTR_ERR(handle);
5288 goto err_out; 5303 goto err_out;
5289 } 5304 }
5305
5306 /* dquot_transfer() calls back ext4_get_inode_usage() which
5307 * counts xattr inode references.
5308 */
5309 down_read(&EXT4_I(inode)->xattr_sem);
5290 error = dquot_transfer(inode, attr); 5310 error = dquot_transfer(inode, attr);
5311 up_read(&EXT4_I(inode)->xattr_sem);
5312
5291 if (error) { 5313 if (error) {
5292 ext4_journal_stop(handle); 5314 ext4_journal_stop(handle);
5293 return error; 5315 return error;
@@ -5307,6 +5329,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5307 loff_t oldsize = inode->i_size; 5329 loff_t oldsize = inode->i_size;
5308 int shrink = (attr->ia_size <= inode->i_size); 5330 int shrink = (attr->ia_size <= inode->i_size);
5309 5331
5332 if (ext4_encrypted_inode(inode)) {
5333 error = fscrypt_get_encryption_info(inode);
5334 if (error)
5335 return error;
5336 if (!fscrypt_has_encryption_key(inode))
5337 return -ENOKEY;
5338 }
5339
5310 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { 5340 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
5311 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 5341 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
5312 5342
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 0c21e22acd74..42b3a73143cf 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -218,7 +218,7 @@ static int ext4_ioctl_setflags(struct inode *inode,
218 unsigned int jflag; 218 unsigned int jflag;
219 219
220 /* Is it quota file? Do not allow user to mess with it */ 220 /* Is it quota file? Do not allow user to mess with it */
221 if (IS_NOQUOTA(inode)) 221 if (ext4_is_quota_file(inode))
222 goto flags_out; 222 goto flags_out;
223 223
224 oldflags = ei->i_flags; 224 oldflags = ei->i_flags;
@@ -342,7 +342,7 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
342 err = -EPERM; 342 err = -EPERM;
343 inode_lock(inode); 343 inode_lock(inode);
344 /* Is it quota file? Do not allow user to mess with it */ 344 /* Is it quota file? Do not allow user to mess with it */
345 if (IS_NOQUOTA(inode)) 345 if (ext4_is_quota_file(inode))
346 goto out_unlock; 346 goto out_unlock;
347 347
348 err = ext4_get_inode_loc(inode, &iloc); 348 err = ext4_get_inode_loc(inode, &iloc);
@@ -373,7 +373,13 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
373 373
374 transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid)); 374 transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
375 if (!IS_ERR(transfer_to[PRJQUOTA])) { 375 if (!IS_ERR(transfer_to[PRJQUOTA])) {
376
377 /* __dquot_transfer() calls back ext4_get_inode_usage() which
378 * counts xattr inode references.
379 */
380 down_read(&EXT4_I(inode)->xattr_sem);
376 err = __dquot_transfer(inode, transfer_to); 381 err = __dquot_transfer(inode, transfer_to);
382 up_read(&EXT4_I(inode)->xattr_sem);
377 dqput(transfer_to[PRJQUOTA]); 383 dqput(transfer_to[PRJQUOTA]);
378 if (err) 384 if (err)
379 goto out_dirty; 385 goto out_dirty;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b7928cddd539..581e357e8406 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -367,8 +367,6 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
367 ext4_group_t group); 367 ext4_group_t group);
368static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, 368static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
369 ext4_group_t group); 369 ext4_group_t group);
370static void ext4_free_data_callback(struct super_block *sb,
371 struct ext4_journal_cb_entry *jce, int rc);
372 370
373static inline void *mb_correct_addr_and_bit(int *bit, void *addr) 371static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
374{ 372{
@@ -2639,6 +2637,7 @@ int ext4_mb_init(struct super_block *sb)
2639 spin_lock_init(&sbi->s_md_lock); 2637 spin_lock_init(&sbi->s_md_lock);
2640 spin_lock_init(&sbi->s_bal_lock); 2638 spin_lock_init(&sbi->s_bal_lock);
2641 sbi->s_mb_free_pending = 0; 2639 sbi->s_mb_free_pending = 0;
2640 INIT_LIST_HEAD(&sbi->s_freed_data_list);
2642 2641
2643 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; 2642 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
2644 sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN; 2643 sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
@@ -2782,7 +2781,8 @@ int ext4_mb_release(struct super_block *sb)
2782} 2781}
2783 2782
2784static inline int ext4_issue_discard(struct super_block *sb, 2783static inline int ext4_issue_discard(struct super_block *sb,
2785 ext4_group_t block_group, ext4_grpblk_t cluster, int count) 2784 ext4_group_t block_group, ext4_grpblk_t cluster, int count,
2785 struct bio **biop)
2786{ 2786{
2787 ext4_fsblk_t discard_block; 2787 ext4_fsblk_t discard_block;
2788 2788
@@ -2791,18 +2791,18 @@ static inline int ext4_issue_discard(struct super_block *sb,
2791 count = EXT4_C2B(EXT4_SB(sb), count); 2791 count = EXT4_C2B(EXT4_SB(sb), count);
2792 trace_ext4_discard_blocks(sb, 2792 trace_ext4_discard_blocks(sb,
2793 (unsigned long long) discard_block, count); 2793 (unsigned long long) discard_block, count);
2794 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); 2794 if (biop) {
2795 return __blkdev_issue_discard(sb->s_bdev,
2796 (sector_t)discard_block << (sb->s_blocksize_bits - 9),
2797 (sector_t)count << (sb->s_blocksize_bits - 9),
2798 GFP_NOFS, 0, biop);
2799 } else
2800 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
2795} 2801}
2796 2802
2797/* 2803static void ext4_free_data_in_buddy(struct super_block *sb,
2798 * This function is called by the jbd2 layer once the commit has finished, 2804 struct ext4_free_data *entry)
2799 * so we know we can free the blocks that were released with that commit.
2800 */
2801static void ext4_free_data_callback(struct super_block *sb,
2802 struct ext4_journal_cb_entry *jce,
2803 int rc)
2804{ 2805{
2805 struct ext4_free_data *entry = (struct ext4_free_data *)jce;
2806 struct ext4_buddy e4b; 2806 struct ext4_buddy e4b;
2807 struct ext4_group_info *db; 2807 struct ext4_group_info *db;
2808 int err, count = 0, count2 = 0; 2808 int err, count = 0, count2 = 0;
@@ -2810,18 +2810,6 @@ static void ext4_free_data_callback(struct super_block *sb,
2810 mb_debug(1, "gonna free %u blocks in group %u (0x%p):", 2810 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2811 entry->efd_count, entry->efd_group, entry); 2811 entry->efd_count, entry->efd_group, entry);
2812 2812
2813 if (test_opt(sb, DISCARD)) {
2814 err = ext4_issue_discard(sb, entry->efd_group,
2815 entry->efd_start_cluster,
2816 entry->efd_count);
2817 if (err && err != -EOPNOTSUPP)
2818 ext4_msg(sb, KERN_WARNING, "discard request in"
2819 " group:%d block:%d count:%d failed"
2820 " with %d", entry->efd_group,
2821 entry->efd_start_cluster,
2822 entry->efd_count, err);
2823 }
2824
2825 err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b); 2813 err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
2826 /* we expect to find existing buddy because it's pinned */ 2814 /* we expect to find existing buddy because it's pinned */
2827 BUG_ON(err != 0); 2815 BUG_ON(err != 0);
@@ -2862,6 +2850,56 @@ static void ext4_free_data_callback(struct super_block *sb,
2862 mb_debug(1, "freed %u blocks in %u structures\n", count, count2); 2850 mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
2863} 2851}
2864 2852
2853/*
2854 * This function is called by the jbd2 layer once the commit has finished,
2855 * so we know we can free the blocks that were released with that commit.
2856 */
2857void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
2858{
2859 struct ext4_sb_info *sbi = EXT4_SB(sb);
2860 struct ext4_free_data *entry, *tmp;
2861 struct bio *discard_bio = NULL;
2862 struct list_head freed_data_list;
2863 struct list_head *cut_pos = NULL;
2864 int err;
2865
2866 INIT_LIST_HEAD(&freed_data_list);
2867
2868 spin_lock(&sbi->s_md_lock);
2869 list_for_each_entry(entry, &sbi->s_freed_data_list, efd_list) {
2870 if (entry->efd_tid != commit_tid)
2871 break;
2872 cut_pos = &entry->efd_list;
2873 }
2874 if (cut_pos)
2875 list_cut_position(&freed_data_list, &sbi->s_freed_data_list,
2876 cut_pos);
2877 spin_unlock(&sbi->s_md_lock);
2878
2879 if (test_opt(sb, DISCARD)) {
2880 list_for_each_entry(entry, &freed_data_list, efd_list) {
2881 err = ext4_issue_discard(sb, entry->efd_group,
2882 entry->efd_start_cluster,
2883 entry->efd_count,
2884 &discard_bio);
2885 if (err && err != -EOPNOTSUPP) {
2886 ext4_msg(sb, KERN_WARNING, "discard request in"
2887 " group:%d block:%d count:%d failed"
2888 " with %d", entry->efd_group,
2889 entry->efd_start_cluster,
2890 entry->efd_count, err);
2891 } else if (err == -EOPNOTSUPP)
2892 break;
2893 }
2894
2895 if (discard_bio)
2896 submit_bio_wait(discard_bio);
2897 }
2898
2899 list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list)
2900 ext4_free_data_in_buddy(sb, entry);
2901}
2902
2865int __init ext4_init_mballoc(void) 2903int __init ext4_init_mballoc(void)
2866{ 2904{
2867 ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space, 2905 ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
@@ -3529,7 +3567,7 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3529 ext4_set_bits(bitmap, start, len); 3567 ext4_set_bits(bitmap, start, len);
3530 preallocated += len; 3568 preallocated += len;
3531 } 3569 }
3532 mb_debug(1, "prellocated %u for group %u\n", preallocated, group); 3570 mb_debug(1, "preallocated %u for group %u\n", preallocated, group);
3533} 3571}
3534 3572
3535static void ext4_mb_pa_callback(struct rcu_head *head) 3573static void ext4_mb_pa_callback(struct rcu_head *head)
@@ -4464,7 +4502,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4464 trace_ext4_request_blocks(ar); 4502 trace_ext4_request_blocks(ar);
4465 4503
4466 /* Allow to use superuser reservation for quota file */ 4504 /* Allow to use superuser reservation for quota file */
4467 if (IS_NOQUOTA(ar->inode)) 4505 if (ext4_is_quota_file(ar->inode))
4468 ar->flags |= EXT4_MB_USE_ROOT_BLOCKS; 4506 ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
4469 4507
4470 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) { 4508 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) {
@@ -4583,14 +4621,28 @@ out:
4583 * are contiguous, AND the extents were freed by the same transaction, 4621 * are contiguous, AND the extents were freed by the same transaction,
4584 * AND the blocks are associated with the same group. 4622 * AND the blocks are associated with the same group.
4585 */ 4623 */
4586static int can_merge(struct ext4_free_data *entry1, 4624static void ext4_try_merge_freed_extent(struct ext4_sb_info *sbi,
4587 struct ext4_free_data *entry2) 4625 struct ext4_free_data *entry,
4626 struct ext4_free_data *new_entry,
4627 struct rb_root *entry_rb_root)
4588{ 4628{
4589 if ((entry1->efd_tid == entry2->efd_tid) && 4629 if ((entry->efd_tid != new_entry->efd_tid) ||
4590 (entry1->efd_group == entry2->efd_group) && 4630 (entry->efd_group != new_entry->efd_group))
4591 ((entry1->efd_start_cluster + entry1->efd_count) == entry2->efd_start_cluster)) 4631 return;
4592 return 1; 4632 if (entry->efd_start_cluster + entry->efd_count ==
4593 return 0; 4633 new_entry->efd_start_cluster) {
4634 new_entry->efd_start_cluster = entry->efd_start_cluster;
4635 new_entry->efd_count += entry->efd_count;
4636 } else if (new_entry->efd_start_cluster + new_entry->efd_count ==
4637 entry->efd_start_cluster) {
4638 new_entry->efd_count += entry->efd_count;
4639 } else
4640 return;
4641 spin_lock(&sbi->s_md_lock);
4642 list_del(&entry->efd_list);
4643 spin_unlock(&sbi->s_md_lock);
4644 rb_erase(&entry->efd_node, entry_rb_root);
4645 kmem_cache_free(ext4_free_data_cachep, entry);
4594} 4646}
4595 4647
4596static noinline_for_stack int 4648static noinline_for_stack int
@@ -4646,29 +4698,19 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4646 node = rb_prev(new_node); 4698 node = rb_prev(new_node);
4647 if (node) { 4699 if (node) {
4648 entry = rb_entry(node, struct ext4_free_data, efd_node); 4700 entry = rb_entry(node, struct ext4_free_data, efd_node);
4649 if (can_merge(entry, new_entry) && 4701 ext4_try_merge_freed_extent(sbi, entry, new_entry,
4650 ext4_journal_callback_try_del(handle, &entry->efd_jce)) { 4702 &(db->bb_free_root));
4651 new_entry->efd_start_cluster = entry->efd_start_cluster;
4652 new_entry->efd_count += entry->efd_count;
4653 rb_erase(node, &(db->bb_free_root));
4654 kmem_cache_free(ext4_free_data_cachep, entry);
4655 }
4656 } 4703 }
4657 4704
4658 node = rb_next(new_node); 4705 node = rb_next(new_node);
4659 if (node) { 4706 if (node) {
4660 entry = rb_entry(node, struct ext4_free_data, efd_node); 4707 entry = rb_entry(node, struct ext4_free_data, efd_node);
4661 if (can_merge(new_entry, entry) && 4708 ext4_try_merge_freed_extent(sbi, entry, new_entry,
4662 ext4_journal_callback_try_del(handle, &entry->efd_jce)) { 4709 &(db->bb_free_root));
4663 new_entry->efd_count += entry->efd_count;
4664 rb_erase(node, &(db->bb_free_root));
4665 kmem_cache_free(ext4_free_data_cachep, entry);
4666 }
4667 } 4710 }
4668 /* Add the extent to transaction's private list */ 4711
4669 new_entry->efd_jce.jce_func = ext4_free_data_callback;
4670 spin_lock(&sbi->s_md_lock); 4712 spin_lock(&sbi->s_md_lock);
4671 _ext4_journal_callback_add(handle, &new_entry->efd_jce); 4713 list_add_tail(&new_entry->efd_list, &sbi->s_freed_data_list);
4672 sbi->s_mb_free_pending += clusters; 4714 sbi->s_mb_free_pending += clusters;
4673 spin_unlock(&sbi->s_md_lock); 4715 spin_unlock(&sbi->s_md_lock);
4674 return 0; 4716 return 0;
@@ -4871,7 +4913,8 @@ do_more:
4871 * them with group lock_held 4913 * them with group lock_held
4872 */ 4914 */
4873 if (test_opt(sb, DISCARD)) { 4915 if (test_opt(sb, DISCARD)) {
4874 err = ext4_issue_discard(sb, block_group, bit, count); 4916 err = ext4_issue_discard(sb, block_group, bit, count,
4917 NULL);
4875 if (err && err != -EOPNOTSUPP) 4918 if (err && err != -EOPNOTSUPP)
4876 ext4_msg(sb, KERN_WARNING, "discard request in" 4919 ext4_msg(sb, KERN_WARNING, "discard request in"
4877 " group:%d block:%d count:%lu failed" 4920 " group:%d block:%d count:%lu failed"
@@ -5094,7 +5137,7 @@ __acquires(bitlock)
5094 */ 5137 */
5095 mb_mark_used(e4b, &ex); 5138 mb_mark_used(e4b, &ex);
5096 ext4_unlock_group(sb, group); 5139 ext4_unlock_group(sb, group);
5097 ret = ext4_issue_discard(sb, group, start, count); 5140 ret = ext4_issue_discard(sb, group, start, count, NULL);
5098 ext4_lock_group(sb, group); 5141 ext4_lock_group(sb, group);
5099 mb_free_blocks(NULL, e4b, start, ex.fe_len); 5142 mb_free_blocks(NULL, e4b, start, ex.fe_len);
5100 return ret; 5143 return ret;
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 2bed62084a8c..009300ee1561 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -78,10 +78,8 @@ do { \
78 78
79 79
80struct ext4_free_data { 80struct ext4_free_data {
81 /* MUST be the first member */ 81 /* this links the free block information from sb_info */
82 struct ext4_journal_cb_entry efd_jce; 82 struct list_head efd_list;
83
84 /* ext4_free_data private data starts from here */
85 83
86 /* this links the free block information from group_info */ 84 /* this links the free block information from group_info */
87 struct rb_node efd_node; 85 struct rb_node efd_node;
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 364ea4d4a943..cf5181b62df1 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -475,7 +475,7 @@ int ext4_ext_migrate(struct inode *inode)
475 owner[0] = i_uid_read(inode); 475 owner[0] = i_uid_read(inode);
476 owner[1] = i_gid_read(inode); 476 owner[1] = i_gid_read(inode);
477 tmp_inode = ext4_new_inode(handle, d_inode(inode->i_sb->s_root), 477 tmp_inode = ext4_new_inode(handle, d_inode(inode->i_sb->s_root),
478 S_IFREG, NULL, goal, owner); 478 S_IFREG, NULL, goal, owner, 0);
479 if (IS_ERR(tmp_inode)) { 479 if (IS_ERR(tmp_inode)) {
480 retval = PTR_ERR(tmp_inode); 480 retval = PTR_ERR(tmp_inode);
481 ext4_journal_stop(handle); 481 ext4_journal_stop(handle);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index c992ef2c2f94..9bb36909ec92 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -484,7 +484,7 @@ mext_check_arguments(struct inode *orig_inode,
484 return -EBUSY; 484 return -EBUSY;
485 } 485 }
486 486
487 if (IS_NOQUOTA(orig_inode) || IS_NOQUOTA(donor_inode)) { 487 if (ext4_is_quota_file(orig_inode) && ext4_is_quota_file(donor_inode)) {
488 ext4_debug("ext4 move extent: The argument files should " 488 ext4_debug("ext4 move extent: The argument files should "
489 "not be quota files [ino:orig %lu, donor %lu]\n", 489 "not be quota files [ino:orig %lu, donor %lu]\n",
490 orig_inode->i_ino, donor_inode->i_ino); 490 orig_inode->i_ino, donor_inode->i_ino);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 404256caf9cf..13f0cadb1238 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -513,7 +513,7 @@ ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
513 513
514static inline ext4_lblk_t dx_get_block(struct dx_entry *entry) 514static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
515{ 515{
516 return le32_to_cpu(entry->block) & 0x00ffffff; 516 return le32_to_cpu(entry->block) & 0x0fffffff;
517} 517}
518 518
519static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value) 519static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
@@ -739,6 +739,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
739 struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR); 739 struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR);
740 u32 hash; 740 u32 hash;
741 741
742 memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0]));
742 frame->bh = ext4_read_dirblock(dir, 0, INDEX); 743 frame->bh = ext4_read_dirblock(dir, 0, INDEX);
743 if (IS_ERR(frame->bh)) 744 if (IS_ERR(frame->bh))
744 return (struct dx_frame *) frame->bh; 745 return (struct dx_frame *) frame->bh;
@@ -768,9 +769,15 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
768 } 769 }
769 770
770 indirect = root->info.indirect_levels; 771 indirect = root->info.indirect_levels;
771 if (indirect > 1) { 772 if (indirect >= ext4_dir_htree_level(dir->i_sb)) {
772 ext4_warning_inode(dir, "Unimplemented hash depth: %#06x", 773 ext4_warning(dir->i_sb,
773 root->info.indirect_levels); 774 "Directory (ino: %lu) htree depth %#06x exceed"
775 "supported value", dir->i_ino,
776 ext4_dir_htree_level(dir->i_sb));
777 if (ext4_dir_htree_level(dir->i_sb) < EXT4_HTREE_LEVEL) {
778 ext4_warning(dir->i_sb, "Enable large directory "
779 "feature to access it");
780 }
774 goto fail; 781 goto fail;
775 } 782 }
776 783
@@ -859,12 +866,19 @@ fail:
859 866
860static void dx_release(struct dx_frame *frames) 867static void dx_release(struct dx_frame *frames)
861{ 868{
869 struct dx_root_info *info;
870 int i;
871
862 if (frames[0].bh == NULL) 872 if (frames[0].bh == NULL)
863 return; 873 return;
864 874
865 if (((struct dx_root *)frames[0].bh->b_data)->info.indirect_levels) 875 info = &((struct dx_root *)frames[0].bh->b_data)->info;
866 brelse(frames[1].bh); 876 for (i = 0; i <= info->indirect_levels; i++) {
867 brelse(frames[0].bh); 877 if (frames[i].bh == NULL)
878 break;
879 brelse(frames[i].bh);
880 frames[i].bh = NULL;
881 }
868} 882}
869 883
870/* 884/*
@@ -1050,7 +1064,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
1050{ 1064{
1051 struct dx_hash_info hinfo; 1065 struct dx_hash_info hinfo;
1052 struct ext4_dir_entry_2 *de; 1066 struct ext4_dir_entry_2 *de;
1053 struct dx_frame frames[2], *frame; 1067 struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
1054 struct inode *dir; 1068 struct inode *dir;
1055 ext4_lblk_t block; 1069 ext4_lblk_t block;
1056 int count = 0; 1070 int count = 0;
@@ -1428,11 +1442,11 @@ restart:
1428 goto next; 1442 goto next;
1429 wait_on_buffer(bh); 1443 wait_on_buffer(bh);
1430 if (!buffer_uptodate(bh)) { 1444 if (!buffer_uptodate(bh)) {
1431 /* read error, skip block & hope for the best */
1432 EXT4_ERROR_INODE(dir, "reading directory lblock %lu", 1445 EXT4_ERROR_INODE(dir, "reading directory lblock %lu",
1433 (unsigned long) block); 1446 (unsigned long) block);
1434 brelse(bh); 1447 brelse(bh);
1435 goto next; 1448 ret = ERR_PTR(-EIO);
1449 goto cleanup_and_exit;
1436 } 1450 }
1437 if (!buffer_verified(bh) && 1451 if (!buffer_verified(bh) &&
1438 !is_dx_internal_node(dir, block, 1452 !is_dx_internal_node(dir, block,
@@ -1442,7 +1456,8 @@ restart:
1442 EXT4_ERROR_INODE(dir, "checksumming directory " 1456 EXT4_ERROR_INODE(dir, "checksumming directory "
1443 "block %lu", (unsigned long)block); 1457 "block %lu", (unsigned long)block);
1444 brelse(bh); 1458 brelse(bh);
1445 goto next; 1459 ret = ERR_PTR(-EFSBADCRC);
1460 goto cleanup_and_exit;
1446 } 1461 }
1447 set_buffer_verified(bh); 1462 set_buffer_verified(bh);
1448 i = search_dirblock(bh, dir, &fname, 1463 i = search_dirblock(bh, dir, &fname,
@@ -1485,7 +1500,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
1485 struct ext4_dir_entry_2 **res_dir) 1500 struct ext4_dir_entry_2 **res_dir)
1486{ 1501{
1487 struct super_block * sb = dir->i_sb; 1502 struct super_block * sb = dir->i_sb;
1488 struct dx_frame frames[2], *frame; 1503 struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
1489 struct buffer_head *bh; 1504 struct buffer_head *bh;
1490 ext4_lblk_t block; 1505 ext4_lblk_t block;
1491 int retval; 1506 int retval;
@@ -1889,7 +1904,7 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
1889 */ 1904 */
1890 dir->i_mtime = dir->i_ctime = current_time(dir); 1905 dir->i_mtime = dir->i_ctime = current_time(dir);
1891 ext4_update_dx_flag(dir); 1906 ext4_update_dx_flag(dir);
1892 dir->i_version++; 1907 inode_inc_iversion(dir);
1893 ext4_mark_inode_dirty(handle, dir); 1908 ext4_mark_inode_dirty(handle, dir);
1894 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 1909 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
1895 err = ext4_handle_dirty_dirent_node(handle, dir, bh); 1910 err = ext4_handle_dirty_dirent_node(handle, dir, bh);
@@ -1908,7 +1923,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
1908{ 1923{
1909 struct buffer_head *bh2; 1924 struct buffer_head *bh2;
1910 struct dx_root *root; 1925 struct dx_root *root;
1911 struct dx_frame frames[2], *frame; 1926 struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
1912 struct dx_entry *entries; 1927 struct dx_entry *entries;
1913 struct ext4_dir_entry_2 *de, *de2; 1928 struct ext4_dir_entry_2 *de, *de2;
1914 struct ext4_dir_entry_tail *t; 1929 struct ext4_dir_entry_tail *t;
@@ -2127,13 +2142,16 @@ out:
2127static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname, 2142static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
2128 struct inode *dir, struct inode *inode) 2143 struct inode *dir, struct inode *inode)
2129{ 2144{
2130 struct dx_frame frames[2], *frame; 2145 struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
2131 struct dx_entry *entries, *at; 2146 struct dx_entry *entries, *at;
2132 struct buffer_head *bh; 2147 struct buffer_head *bh;
2133 struct super_block *sb = dir->i_sb; 2148 struct super_block *sb = dir->i_sb;
2134 struct ext4_dir_entry_2 *de; 2149 struct ext4_dir_entry_2 *de;
2150 int restart;
2135 int err; 2151 int err;
2136 2152
2153again:
2154 restart = 0;
2137 frame = dx_probe(fname, dir, NULL, frames); 2155 frame = dx_probe(fname, dir, NULL, frames);
2138 if (IS_ERR(frame)) 2156 if (IS_ERR(frame))
2139 return PTR_ERR(frame); 2157 return PTR_ERR(frame);
@@ -2155,24 +2173,44 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
2155 if (err != -ENOSPC) 2173 if (err != -ENOSPC)
2156 goto cleanup; 2174 goto cleanup;
2157 2175
2176 err = 0;
2158 /* Block full, should compress but for now just split */ 2177 /* Block full, should compress but for now just split */
2159 dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n", 2178 dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
2160 dx_get_count(entries), dx_get_limit(entries))); 2179 dx_get_count(entries), dx_get_limit(entries)));
2161 /* Need to split index? */ 2180 /* Need to split index? */
2162 if (dx_get_count(entries) == dx_get_limit(entries)) { 2181 if (dx_get_count(entries) == dx_get_limit(entries)) {
2163 ext4_lblk_t newblock; 2182 ext4_lblk_t newblock;
2164 unsigned icount = dx_get_count(entries); 2183 int levels = frame - frames + 1;
2165 int levels = frame - frames; 2184 unsigned int icount;
2185 int add_level = 1;
2166 struct dx_entry *entries2; 2186 struct dx_entry *entries2;
2167 struct dx_node *node2; 2187 struct dx_node *node2;
2168 struct buffer_head *bh2; 2188 struct buffer_head *bh2;
2169 2189
2170 if (levels && (dx_get_count(frames->entries) == 2190 while (frame > frames) {
2171 dx_get_limit(frames->entries))) { 2191 if (dx_get_count((frame - 1)->entries) <
2172 ext4_warning_inode(dir, "Directory index full!"); 2192 dx_get_limit((frame - 1)->entries)) {
2193 add_level = 0;
2194 break;
2195 }
2196 frame--; /* split higher index block */
2197 at = frame->at;
2198 entries = frame->entries;
2199 restart = 1;
2200 }
2201 if (add_level && levels == ext4_dir_htree_level(sb)) {
2202 ext4_warning(sb, "Directory (ino: %lu) index full, "
2203 "reach max htree level :%d",
2204 dir->i_ino, levels);
2205 if (ext4_dir_htree_level(sb) < EXT4_HTREE_LEVEL) {
2206 ext4_warning(sb, "Large directory feature is "
2207 "not enabled on this "
2208 "filesystem");
2209 }
2173 err = -ENOSPC; 2210 err = -ENOSPC;
2174 goto cleanup; 2211 goto cleanup;
2175 } 2212 }
2213 icount = dx_get_count(entries);
2176 bh2 = ext4_append(handle, dir, &newblock); 2214 bh2 = ext4_append(handle, dir, &newblock);
2177 if (IS_ERR(bh2)) { 2215 if (IS_ERR(bh2)) {
2178 err = PTR_ERR(bh2); 2216 err = PTR_ERR(bh2);
@@ -2187,7 +2225,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
2187 err = ext4_journal_get_write_access(handle, frame->bh); 2225 err = ext4_journal_get_write_access(handle, frame->bh);
2188 if (err) 2226 if (err)
2189 goto journal_error; 2227 goto journal_error;
2190 if (levels) { 2228 if (!add_level) {
2191 unsigned icount1 = icount/2, icount2 = icount - icount1; 2229 unsigned icount1 = icount/2, icount2 = icount - icount1;
2192 unsigned hash2 = dx_get_hash(entries + icount1); 2230 unsigned hash2 = dx_get_hash(entries + icount1);
2193 dxtrace(printk(KERN_DEBUG "Split index %i/%i\n", 2231 dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
@@ -2195,7 +2233,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
2195 2233
2196 BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ 2234 BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
2197 err = ext4_journal_get_write_access(handle, 2235 err = ext4_journal_get_write_access(handle,
2198 frames[0].bh); 2236 (frame - 1)->bh);
2199 if (err) 2237 if (err)
2200 goto journal_error; 2238 goto journal_error;
2201 2239
@@ -2211,17 +2249,25 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
2211 frame->entries = entries = entries2; 2249 frame->entries = entries = entries2;
2212 swap(frame->bh, bh2); 2250 swap(frame->bh, bh2);
2213 } 2251 }
2214 dx_insert_block(frames + 0, hash2, newblock); 2252 dx_insert_block((frame - 1), hash2, newblock);
2215 dxtrace(dx_show_index("node", frames[1].entries)); 2253 dxtrace(dx_show_index("node", frame->entries));
2216 dxtrace(dx_show_index("node", 2254 dxtrace(dx_show_index("node",
2217 ((struct dx_node *) bh2->b_data)->entries)); 2255 ((struct dx_node *) bh2->b_data)->entries));
2218 err = ext4_handle_dirty_dx_node(handle, dir, bh2); 2256 err = ext4_handle_dirty_dx_node(handle, dir, bh2);
2219 if (err) 2257 if (err)
2220 goto journal_error; 2258 goto journal_error;
2221 brelse (bh2); 2259 brelse (bh2);
2260 err = ext4_handle_dirty_dx_node(handle, dir,
2261 (frame - 1)->bh);
2262 if (err)
2263 goto journal_error;
2264 if (restart) {
2265 err = ext4_handle_dirty_dx_node(handle, dir,
2266 frame->bh);
2267 goto journal_error;
2268 }
2222 } else { 2269 } else {
2223 dxtrace(printk(KERN_DEBUG 2270 struct dx_root *dxroot;
2224 "Creating second level index...\n"));
2225 memcpy((char *) entries2, (char *) entries, 2271 memcpy((char *) entries2, (char *) entries,
2226 icount * sizeof(struct dx_entry)); 2272 icount * sizeof(struct dx_entry));
2227 dx_set_limit(entries2, dx_node_limit(dir)); 2273 dx_set_limit(entries2, dx_node_limit(dir));
@@ -2229,22 +2275,18 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
2229 /* Set up root */ 2275 /* Set up root */
2230 dx_set_count(entries, 1); 2276 dx_set_count(entries, 1);
2231 dx_set_block(entries + 0, newblock); 2277 dx_set_block(entries + 0, newblock);
2232 ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1; 2278 dxroot = (struct dx_root *)frames[0].bh->b_data;
2233 2279 dxroot->info.indirect_levels += 1;
2234 /* Add new access path frame */ 2280 dxtrace(printk(KERN_DEBUG
2235 frame = frames + 1; 2281 "Creating %d level index...\n",
2236 frame->at = at = at - entries + entries2; 2282 info->indirect_levels));
2237 frame->entries = entries = entries2; 2283 err = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
2238 frame->bh = bh2;
2239 err = ext4_journal_get_write_access(handle,
2240 frame->bh);
2241 if (err) 2284 if (err)
2242 goto journal_error; 2285 goto journal_error;
2243 } 2286 err = ext4_handle_dirty_dx_node(handle, dir, bh2);
2244 err = ext4_handle_dirty_dx_node(handle, dir, frames[0].bh); 2287 brelse(bh2);
2245 if (err) { 2288 restart = 1;
2246 ext4_std_error(inode->i_sb, err); 2289 goto journal_error;
2247 goto cleanup;
2248 } 2290 }
2249 } 2291 }
2250 de = do_split(handle, dir, &bh, frame, &fname->hinfo); 2292 de = do_split(handle, dir, &bh, frame, &fname->hinfo);
@@ -2256,10 +2298,15 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
2256 goto cleanup; 2298 goto cleanup;
2257 2299
2258journal_error: 2300journal_error:
2259 ext4_std_error(dir->i_sb, err); 2301 ext4_std_error(dir->i_sb, err); /* this is a no-op if err == 0 */
2260cleanup: 2302cleanup:
2261 brelse(bh); 2303 brelse(bh);
2262 dx_release(frames); 2304 dx_release(frames);
2305 /* @restart is true means htree-path has been changed, we need to
2306 * repeat dx_probe() to find out valid htree-path
2307 */
2308 if (restart && err == 0)
2309 goto again;
2263 return err; 2310 return err;
2264} 2311}
2265 2312
@@ -2296,7 +2343,7 @@ int ext4_generic_delete_entry(handle_t *handle,
2296 blocksize); 2343 blocksize);
2297 else 2344 else
2298 de->inode = 0; 2345 de->inode = 0;
2299 dir->i_version++; 2346 inode_inc_iversion(dir);
2300 return 0; 2347 return 0;
2301 } 2348 }
2302 i += ext4_rec_len_from_disk(de->rec_len, blocksize); 2349 i += ext4_rec_len_from_disk(de->rec_len, blocksize);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 18f68f09d393..0886fe82e9c4 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -373,6 +373,9 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
373 struct ext4_journal_cb_entry *jce; 373 struct ext4_journal_cb_entry *jce;
374 374
375 BUG_ON(txn->t_state == T_FINISHED); 375 BUG_ON(txn->t_state == T_FINISHED);
376
377 ext4_process_freed_data(sb, txn->t_tid);
378
376 spin_lock(&sbi->s_md_lock); 379 spin_lock(&sbi->s_md_lock);
377 while (!list_empty(&txn->t_private_list)) { 380 while (!list_empty(&txn->t_private_list)) {
378 jce = list_entry(txn->t_private_list.next, 381 jce = list_entry(txn->t_private_list.next,
@@ -927,9 +930,13 @@ static void ext4_put_super(struct super_block *sb)
927 invalidate_bdev(sbi->journal_bdev); 930 invalidate_bdev(sbi->journal_bdev);
928 ext4_blkdev_remove(sbi); 931 ext4_blkdev_remove(sbi);
929 } 932 }
930 if (sbi->s_mb_cache) { 933 if (sbi->s_ea_inode_cache) {
931 ext4_xattr_destroy_cache(sbi->s_mb_cache); 934 ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
932 sbi->s_mb_cache = NULL; 935 sbi->s_ea_inode_cache = NULL;
936 }
937 if (sbi->s_ea_block_cache) {
938 ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
939 sbi->s_ea_block_cache = NULL;
933 } 940 }
934 if (sbi->s_mmp_tsk) 941 if (sbi->s_mmp_tsk)
935 kthread_stop(sbi->s_mmp_tsk); 942 kthread_stop(sbi->s_mmp_tsk);
@@ -1143,7 +1150,16 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
1143 void *fs_data) 1150 void *fs_data)
1144{ 1151{
1145 handle_t *handle = fs_data; 1152 handle_t *handle = fs_data;
1146 int res, res2, retries = 0; 1153 int res, res2, credits, retries = 0;
1154
1155 /*
1156 * Encrypting the root directory is not allowed because e2fsck expects
1157 * lost+found to exist and be unencrypted, and encrypting the root
1158 * directory would imply encrypting the lost+found directory as well as
1159 * the filename "lost+found" itself.
1160 */
1161 if (inode->i_ino == EXT4_ROOT_INO)
1162 return -EPERM;
1147 1163
1148 res = ext4_convert_inline_data(inode); 1164 res = ext4_convert_inline_data(inode);
1149 if (res) 1165 if (res)
@@ -1178,8 +1194,12 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
1178 if (res) 1194 if (res)
1179 return res; 1195 return res;
1180retry: 1196retry:
1181 handle = ext4_journal_start(inode, EXT4_HT_MISC, 1197 res = ext4_xattr_set_credits(inode, len, false /* is_create */,
1182 ext4_jbd2_credits_xattr(inode)); 1198 &credits);
1199 if (res)
1200 return res;
1201
1202 handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
1183 if (IS_ERR(handle)) 1203 if (IS_ERR(handle))
1184 return PTR_ERR(handle); 1204 return PTR_ERR(handle);
1185 1205
@@ -1256,16 +1276,17 @@ static struct dquot **ext4_get_dquots(struct inode *inode)
1256} 1276}
1257 1277
1258static const struct dquot_operations ext4_quota_operations = { 1278static const struct dquot_operations ext4_quota_operations = {
1259 .get_reserved_space = ext4_get_reserved_space, 1279 .get_reserved_space = ext4_get_reserved_space,
1260 .write_dquot = ext4_write_dquot, 1280 .write_dquot = ext4_write_dquot,
1261 .acquire_dquot = ext4_acquire_dquot, 1281 .acquire_dquot = ext4_acquire_dquot,
1262 .release_dquot = ext4_release_dquot, 1282 .release_dquot = ext4_release_dquot,
1263 .mark_dirty = ext4_mark_dquot_dirty, 1283 .mark_dirty = ext4_mark_dquot_dirty,
1264 .write_info = ext4_write_info, 1284 .write_info = ext4_write_info,
1265 .alloc_dquot = dquot_alloc, 1285 .alloc_dquot = dquot_alloc,
1266 .destroy_dquot = dquot_destroy, 1286 .destroy_dquot = dquot_destroy,
1267 .get_projid = ext4_get_projid, 1287 .get_projid = ext4_get_projid,
1268 .get_next_id = ext4_get_next_id, 1288 .get_inode_usage = ext4_get_inode_usage,
1289 .get_next_id = ext4_get_next_id,
1269}; 1290};
1270 1291
1271static const struct quotactl_ops ext4_qctl_operations = { 1292static const struct quotactl_ops ext4_qctl_operations = {
@@ -1328,7 +1349,7 @@ enum {
1328 Opt_inode_readahead_blks, Opt_journal_ioprio, 1349 Opt_inode_readahead_blks, Opt_journal_ioprio,
1329 Opt_dioread_nolock, Opt_dioread_lock, 1350 Opt_dioread_nolock, Opt_dioread_lock,
1330 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, 1351 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
1331 Opt_max_dir_size_kb, Opt_nojournal_checksum, 1352 Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
1332}; 1353};
1333 1354
1334static const match_table_t tokens = { 1355static const match_table_t tokens = {
@@ -1411,6 +1432,8 @@ static const match_table_t tokens = {
1411 {Opt_noinit_itable, "noinit_itable"}, 1432 {Opt_noinit_itable, "noinit_itable"},
1412 {Opt_max_dir_size_kb, "max_dir_size_kb=%u"}, 1433 {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
1413 {Opt_test_dummy_encryption, "test_dummy_encryption"}, 1434 {Opt_test_dummy_encryption, "test_dummy_encryption"},
1435 {Opt_nombcache, "nombcache"},
1436 {Opt_nombcache, "no_mbcache"}, /* for backward compatibility */
1414 {Opt_removed, "check=none"}, /* mount option from ext2/3 */ 1437 {Opt_removed, "check=none"}, /* mount option from ext2/3 */
1415 {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ 1438 {Opt_removed, "nocheck"}, /* mount option from ext2/3 */
1416 {Opt_removed, "reservation"}, /* mount option from ext2/3 */ 1439 {Opt_removed, "reservation"}, /* mount option from ext2/3 */
@@ -1618,6 +1641,7 @@ static const struct mount_opts {
1618 {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, 1641 {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
1619 {Opt_max_dir_size_kb, 0, MOPT_GTE0}, 1642 {Opt_max_dir_size_kb, 0, MOPT_GTE0},
1620 {Opt_test_dummy_encryption, 0, MOPT_GTE0}, 1643 {Opt_test_dummy_encryption, 0, MOPT_GTE0},
1644 {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
1621 {Opt_err, 0, 0} 1645 {Opt_err, 0, 0}
1622}; 1646};
1623 1647
@@ -3445,7 +3469,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3445 } 3469 }
3446 3470
3447 /* Load the checksum driver */ 3471 /* Load the checksum driver */
3448 if (ext4_has_feature_metadata_csum(sb)) { 3472 if (ext4_has_feature_metadata_csum(sb) ||
3473 ext4_has_feature_ea_inode(sb)) {
3449 sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); 3474 sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
3450 if (IS_ERR(sbi->s_chksum_driver)) { 3475 if (IS_ERR(sbi->s_chksum_driver)) {
3451 ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); 3476 ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
@@ -3467,7 +3492,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3467 /* Precompute checksum seed for all metadata */ 3492 /* Precompute checksum seed for all metadata */
3468 if (ext4_has_feature_csum_seed(sb)) 3493 if (ext4_has_feature_csum_seed(sb))
3469 sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed); 3494 sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed);
3470 else if (ext4_has_metadata_csum(sb)) 3495 else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb))
3471 sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, 3496 sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
3472 sizeof(es->s_uuid)); 3497 sizeof(es->s_uuid));
3473 3498
@@ -3597,6 +3622,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3597 "The Hurd can't support 64-bit file systems"); 3622 "The Hurd can't support 64-bit file systems");
3598 goto failed_mount; 3623 goto failed_mount;
3599 } 3624 }
3625
3626 /*
3627 * ea_inode feature uses l_i_version field which is not
3628 * available in HURD_COMPAT mode.
3629 */
3630 if (ext4_has_feature_ea_inode(sb)) {
3631 ext4_msg(sb, KERN_ERR,
3632 "ea_inode feature is not supported for Hurd");
3633 goto failed_mount;
3634 }
3600 } 3635 }
3601 3636
3602 if (IS_EXT2_SB(sb)) { 3637 if (IS_EXT2_SB(sb)) {
@@ -4061,10 +4096,22 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
4061 sbi->s_journal->j_commit_callback = ext4_journal_commit_callback; 4096 sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
4062 4097
4063no_journal: 4098no_journal:
4064 sbi->s_mb_cache = ext4_xattr_create_cache(); 4099 if (!test_opt(sb, NO_MBCACHE)) {
4065 if (!sbi->s_mb_cache) { 4100 sbi->s_ea_block_cache = ext4_xattr_create_cache();
4066 ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache"); 4101 if (!sbi->s_ea_block_cache) {
4067 goto failed_mount_wq; 4102 ext4_msg(sb, KERN_ERR,
4103 "Failed to create ea_block_cache");
4104 goto failed_mount_wq;
4105 }
4106
4107 if (ext4_has_feature_ea_inode(sb)) {
4108 sbi->s_ea_inode_cache = ext4_xattr_create_cache();
4109 if (!sbi->s_ea_inode_cache) {
4110 ext4_msg(sb, KERN_ERR,
4111 "Failed to create ea_inode_cache");
4112 goto failed_mount_wq;
4113 }
4114 }
4068 } 4115 }
4069 4116
4070 if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) && 4117 if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) &&
@@ -4296,9 +4343,13 @@ failed_mount4:
4296 if (EXT4_SB(sb)->rsv_conversion_wq) 4343 if (EXT4_SB(sb)->rsv_conversion_wq)
4297 destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq); 4344 destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
4298failed_mount_wq: 4345failed_mount_wq:
4299 if (sbi->s_mb_cache) { 4346 if (sbi->s_ea_inode_cache) {
4300 ext4_xattr_destroy_cache(sbi->s_mb_cache); 4347 ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
4301 sbi->s_mb_cache = NULL; 4348 sbi->s_ea_inode_cache = NULL;
4349 }
4350 if (sbi->s_ea_block_cache) {
4351 ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
4352 sbi->s_ea_block_cache = NULL;
4302 } 4353 }
4303 if (sbi->s_journal) { 4354 if (sbi->s_journal) {
4304 jbd2_journal_destroy(sbi->s_journal); 4355 jbd2_journal_destroy(sbi->s_journal);
@@ -4957,6 +5008,12 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4957 } 5008 }
4958 } 5009 }
4959 5010
5011 if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_NO_MBCACHE) {
5012 ext4_msg(sb, KERN_ERR, "can't enable nombcache during remount");
5013 err = -EINVAL;
5014 goto restore_opts;
5015 }
5016
4960 if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) { 5017 if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
4961 ext4_msg(sb, KERN_WARNING, "warning: refusing change of " 5018 ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
4962 "dax flag with busy inodes while remounting"); 5019 "dax flag with busy inodes while remounting");
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index d74dc5f81a04..48c7a7d55ed3 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -100,7 +100,7 @@ static ssize_t reserved_clusters_store(struct ext4_attr *a,
100 int ret; 100 int ret;
101 101
102 ret = kstrtoull(skip_spaces(buf), 0, &val); 102 ret = kstrtoull(skip_spaces(buf), 0, &val);
103 if (!ret || val >= clusters) 103 if (ret || val >= clusters)
104 return -EINVAL; 104 return -EINVAL;
105 105
106 atomic64_set(&sbi->s_resv_clusters, val); 106 atomic64_set(&sbi->s_resv_clusters, val);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 5d3c2536641c..cff4f41ced61 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -72,12 +72,14 @@
72# define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__) 72# define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
73#endif 73#endif
74 74
75static void ext4_xattr_cache_insert(struct mb_cache *, struct buffer_head *); 75static void ext4_xattr_block_cache_insert(struct mb_cache *,
76static struct buffer_head *ext4_xattr_cache_find(struct inode *, 76 struct buffer_head *);
77 struct ext4_xattr_header *, 77static struct buffer_head *
78 struct mb_cache_entry **); 78ext4_xattr_block_cache_find(struct inode *, struct ext4_xattr_header *,
79static void ext4_xattr_rehash(struct ext4_xattr_header *, 79 struct mb_cache_entry **);
80 struct ext4_xattr_entry *); 80static __le32 ext4_xattr_hash_entry(char *name, size_t name_len, __le32 *value,
81 size_t value_count);
82static void ext4_xattr_rehash(struct ext4_xattr_header *);
81 83
82static const struct xattr_handler * const ext4_xattr_handler_map[] = { 84static const struct xattr_handler * const ext4_xattr_handler_map[] = {
83 [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, 85 [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler,
@@ -104,8 +106,22 @@ const struct xattr_handler *ext4_xattr_handlers[] = {
104 NULL 106 NULL
105}; 107};
106 108
107#define EXT4_GET_MB_CACHE(inode) (((struct ext4_sb_info *) \ 109#define EA_BLOCK_CACHE(inode) (((struct ext4_sb_info *) \
108 inode->i_sb->s_fs_info)->s_mb_cache) 110 inode->i_sb->s_fs_info)->s_ea_block_cache)
111
112#define EA_INODE_CACHE(inode) (((struct ext4_sb_info *) \
113 inode->i_sb->s_fs_info)->s_ea_inode_cache)
114
115static int
116ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
117 struct inode *inode);
118
119#ifdef CONFIG_LOCKDEP
120void ext4_xattr_inode_set_class(struct inode *ea_inode)
121{
122 lockdep_set_subclass(&ea_inode->i_rwsem, 1);
123}
124#endif
109 125
110static __le32 ext4_xattr_block_csum(struct inode *inode, 126static __le32 ext4_xattr_block_csum(struct inode *inode,
111 sector_t block_nr, 127 sector_t block_nr,
@@ -177,9 +193,8 @@ ext4_xattr_check_entries(struct ext4_xattr_entry *entry, void *end,
177 193
178 /* Check the values */ 194 /* Check the values */
179 while (!IS_LAST_ENTRY(entry)) { 195 while (!IS_LAST_ENTRY(entry)) {
180 if (entry->e_value_block != 0) 196 if (entry->e_value_size != 0 &&
181 return -EFSCORRUPTED; 197 entry->e_value_inum == 0) {
182 if (entry->e_value_size != 0) {
183 u16 offs = le16_to_cpu(entry->e_value_offs); 198 u16 offs = le16_to_cpu(entry->e_value_offs);
184 u32 size = le32_to_cpu(entry->e_value_size); 199 u32 size = le32_to_cpu(entry->e_value_size);
185 void *value; 200 void *value;
@@ -269,6 +284,185 @@ ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index,
269 return cmp ? -ENODATA : 0; 284 return cmp ? -ENODATA : 0;
270} 285}
271 286
287static u32
288ext4_xattr_inode_hash(struct ext4_sb_info *sbi, const void *buffer, size_t size)
289{
290 return ext4_chksum(sbi, sbi->s_csum_seed, buffer, size);
291}
292
293static u64 ext4_xattr_inode_get_ref(struct inode *ea_inode)
294{
295 return ((u64)ea_inode->i_ctime.tv_sec << 32) |
296 ((u32)ea_inode->i_version);
297}
298
299static void ext4_xattr_inode_set_ref(struct inode *ea_inode, u64 ref_count)
300{
301 ea_inode->i_ctime.tv_sec = (u32)(ref_count >> 32);
302 ea_inode->i_version = (u32)ref_count;
303}
304
305static u32 ext4_xattr_inode_get_hash(struct inode *ea_inode)
306{
307 return (u32)ea_inode->i_atime.tv_sec;
308}
309
310static void ext4_xattr_inode_set_hash(struct inode *ea_inode, u32 hash)
311{
312 ea_inode->i_atime.tv_sec = hash;
313}
314
315/*
316 * Read the EA value from an inode.
317 */
318static int ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t size)
319{
320 unsigned long block = 0;
321 struct buffer_head *bh;
322 int blocksize = ea_inode->i_sb->s_blocksize;
323 size_t csize, copied = 0;
324 void *copy_pos = buf;
325
326 while (copied < size) {
327 csize = (size - copied) > blocksize ? blocksize : size - copied;
328 bh = ext4_bread(NULL, ea_inode, block, 0);
329 if (IS_ERR(bh))
330 return PTR_ERR(bh);
331 if (!bh)
332 return -EFSCORRUPTED;
333
334 memcpy(copy_pos, bh->b_data, csize);
335 brelse(bh);
336
337 copy_pos += csize;
338 block += 1;
339 copied += csize;
340 }
341 return 0;
342}
343
344static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
345 struct inode **ea_inode)
346{
347 struct inode *inode;
348 int err;
349
350 inode = ext4_iget(parent->i_sb, ea_ino);
351 if (IS_ERR(inode)) {
352 err = PTR_ERR(inode);
353 ext4_error(parent->i_sb,
354 "error while reading EA inode %lu err=%d", ea_ino,
355 err);
356 return err;
357 }
358
359 if (is_bad_inode(inode)) {
360 ext4_error(parent->i_sb,
361 "error while reading EA inode %lu is_bad_inode",
362 ea_ino);
363 err = -EIO;
364 goto error;
365 }
366
367 if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
368 ext4_error(parent->i_sb,
369 "EA inode %lu does not have EXT4_EA_INODE_FL flag",
370 ea_ino);
371 err = -EINVAL;
372 goto error;
373 }
374
375 *ea_inode = inode;
376 return 0;
377error:
378 iput(inode);
379 return err;
380}
381
382static int
383ext4_xattr_inode_verify_hashes(struct inode *ea_inode,
384 struct ext4_xattr_entry *entry, void *buffer,
385 size_t size)
386{
387 u32 hash;
388
389 /* Verify stored hash matches calculated hash. */
390 hash = ext4_xattr_inode_hash(EXT4_SB(ea_inode->i_sb), buffer, size);
391 if (hash != ext4_xattr_inode_get_hash(ea_inode))
392 return -EFSCORRUPTED;
393
394 if (entry) {
395 __le32 e_hash, tmp_data;
396
397 /* Verify entry hash. */
398 tmp_data = cpu_to_le32(hash);
399 e_hash = ext4_xattr_hash_entry(entry->e_name, entry->e_name_len,
400 &tmp_data, 1);
401 if (e_hash != entry->e_hash)
402 return -EFSCORRUPTED;
403 }
404 return 0;
405}
406
407#define EXT4_XATTR_INODE_GET_PARENT(inode) ((__u32)(inode)->i_mtime.tv_sec)
408
409/*
410 * Read xattr value from the EA inode.
411 */
412static int
413ext4_xattr_inode_get(struct inode *inode, struct ext4_xattr_entry *entry,
414 void *buffer, size_t size)
415{
416 struct mb_cache *ea_inode_cache = EA_INODE_CACHE(inode);
417 struct inode *ea_inode;
418 int err;
419
420 err = ext4_xattr_inode_iget(inode, le32_to_cpu(entry->e_value_inum),
421 &ea_inode);
422 if (err) {
423 ea_inode = NULL;
424 goto out;
425 }
426
427 if (i_size_read(ea_inode) != size) {
428 ext4_warning_inode(ea_inode,
429 "ea_inode file size=%llu entry size=%zu",
430 i_size_read(ea_inode), size);
431 err = -EFSCORRUPTED;
432 goto out;
433 }
434
435 err = ext4_xattr_inode_read(ea_inode, buffer, size);
436 if (err)
437 goto out;
438
439 err = ext4_xattr_inode_verify_hashes(ea_inode, entry, buffer, size);
440 /*
441 * Compatibility check for old Lustre ea_inode implementation. Old
442 * version does not have hash validation, but it has a backpointer
443 * from ea_inode to the parent inode.
444 */
445 if (err == -EFSCORRUPTED) {
446 if (EXT4_XATTR_INODE_GET_PARENT(ea_inode) != inode->i_ino ||
447 ea_inode->i_generation != inode->i_generation) {
448 ext4_warning_inode(ea_inode,
449 "EA inode hash validation failed");
450 goto out;
451 }
452 /* Do not add ea_inode to the cache. */
453 ea_inode_cache = NULL;
454 } else if (err)
455 goto out;
456
457 if (ea_inode_cache)
458 mb_cache_entry_create(ea_inode_cache, GFP_NOFS,
459 ext4_xattr_inode_get_hash(ea_inode),
460 ea_inode->i_ino, true /* reusable */);
461out:
462 iput(ea_inode);
463 return err;
464}
465
272static int 466static int
273ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, 467ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
274 void *buffer, size_t buffer_size) 468 void *buffer, size_t buffer_size)
@@ -277,7 +471,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
277 struct ext4_xattr_entry *entry; 471 struct ext4_xattr_entry *entry;
278 size_t size; 472 size_t size;
279 int error; 473 int error;
280 struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); 474 struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
281 475
282 ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", 476 ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
283 name_index, name, buffer, (long)buffer_size); 477 name_index, name, buffer, (long)buffer_size);
@@ -298,7 +492,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
298 error = -EFSCORRUPTED; 492 error = -EFSCORRUPTED;
299 goto cleanup; 493 goto cleanup;
300 } 494 }
301 ext4_xattr_cache_insert(ext4_mb_cache, bh); 495 ext4_xattr_block_cache_insert(ea_block_cache, bh);
302 entry = BFIRST(bh); 496 entry = BFIRST(bh);
303 error = ext4_xattr_find_entry(&entry, name_index, name, 1); 497 error = ext4_xattr_find_entry(&entry, name_index, name, 1);
304 if (error) 498 if (error)
@@ -308,8 +502,15 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
308 error = -ERANGE; 502 error = -ERANGE;
309 if (size > buffer_size) 503 if (size > buffer_size)
310 goto cleanup; 504 goto cleanup;
311 memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), 505 if (entry->e_value_inum) {
312 size); 506 error = ext4_xattr_inode_get(inode, entry, buffer,
507 size);
508 if (error)
509 goto cleanup;
510 } else {
511 memcpy(buffer, bh->b_data +
512 le16_to_cpu(entry->e_value_offs), size);
513 }
313 } 514 }
314 error = size; 515 error = size;
315 516
@@ -350,8 +551,15 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
350 error = -ERANGE; 551 error = -ERANGE;
351 if (size > buffer_size) 552 if (size > buffer_size)
352 goto cleanup; 553 goto cleanup;
353 memcpy(buffer, (void *)IFIRST(header) + 554 if (entry->e_value_inum) {
354 le16_to_cpu(entry->e_value_offs), size); 555 error = ext4_xattr_inode_get(inode, entry, buffer,
556 size);
557 if (error)
558 goto cleanup;
559 } else {
560 memcpy(buffer, (void *)IFIRST(header) +
561 le16_to_cpu(entry->e_value_offs), size);
562 }
355 } 563 }
356 error = size; 564 error = size;
357 565
@@ -428,7 +636,6 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
428 struct inode *inode = d_inode(dentry); 636 struct inode *inode = d_inode(dentry);
429 struct buffer_head *bh = NULL; 637 struct buffer_head *bh = NULL;
430 int error; 638 int error;
431 struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
432 639
433 ea_idebug(inode, "buffer=%p, buffer_size=%ld", 640 ea_idebug(inode, "buffer=%p, buffer_size=%ld",
434 buffer, (long)buffer_size); 641 buffer, (long)buffer_size);
@@ -450,7 +657,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
450 error = -EFSCORRUPTED; 657 error = -EFSCORRUPTED;
451 goto cleanup; 658 goto cleanup;
452 } 659 }
453 ext4_xattr_cache_insert(ext4_mb_cache, bh); 660 ext4_xattr_block_cache_insert(EA_BLOCK_CACHE(inode), bh);
454 error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size); 661 error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
455 662
456cleanup: 663cleanup:
@@ -539,15 +746,445 @@ static void ext4_xattr_update_super_block(handle_t *handle,
539 } 746 }
540} 747}
541 748
749int ext4_get_inode_usage(struct inode *inode, qsize_t *usage)
750{
751 struct ext4_iloc iloc = { .bh = NULL };
752 struct buffer_head *bh = NULL;
753 struct ext4_inode *raw_inode;
754 struct ext4_xattr_ibody_header *header;
755 struct ext4_xattr_entry *entry;
756 qsize_t ea_inode_refs = 0;
757 void *end;
758 int ret;
759
760 lockdep_assert_held_read(&EXT4_I(inode)->xattr_sem);
761
762 if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
763 ret = ext4_get_inode_loc(inode, &iloc);
764 if (ret)
765 goto out;
766 raw_inode = ext4_raw_inode(&iloc);
767 header = IHDR(inode, raw_inode);
768 end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
769 ret = xattr_check_inode(inode, header, end);
770 if (ret)
771 goto out;
772
773 for (entry = IFIRST(header); !IS_LAST_ENTRY(entry);
774 entry = EXT4_XATTR_NEXT(entry))
775 if (entry->e_value_inum)
776 ea_inode_refs++;
777 }
778
779 if (EXT4_I(inode)->i_file_acl) {
780 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
781 if (!bh) {
782 ret = -EIO;
783 goto out;
784 }
785
786 if (ext4_xattr_check_block(inode, bh)) {
787 ret = -EFSCORRUPTED;
788 goto out;
789 }
790
791 for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry);
792 entry = EXT4_XATTR_NEXT(entry))
793 if (entry->e_value_inum)
794 ea_inode_refs++;
795 }
796 *usage = ea_inode_refs + 1;
797 ret = 0;
798out:
799 brelse(iloc.bh);
800 brelse(bh);
801 return ret;
802}
803
804static inline size_t round_up_cluster(struct inode *inode, size_t length)
805{
806 struct super_block *sb = inode->i_sb;
807 size_t cluster_size = 1 << (EXT4_SB(sb)->s_cluster_bits +
808 inode->i_blkbits);
809 size_t mask = ~(cluster_size - 1);
810
811 return (length + cluster_size - 1) & mask;
812}
813
814static int ext4_xattr_inode_alloc_quota(struct inode *inode, size_t len)
815{
816 int err;
817
818 err = dquot_alloc_inode(inode);
819 if (err)
820 return err;
821 err = dquot_alloc_space_nodirty(inode, round_up_cluster(inode, len));
822 if (err)
823 dquot_free_inode(inode);
824 return err;
825}
826
827static void ext4_xattr_inode_free_quota(struct inode *inode, size_t len)
828{
829 dquot_free_space_nodirty(inode, round_up_cluster(inode, len));
830 dquot_free_inode(inode);
831}
832
833int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode,
834 struct buffer_head *block_bh, size_t value_len,
835 bool is_create)
836{
837 int credits;
838 int blocks;
839
840 /*
841 * 1) Owner inode update
842 * 2) Ref count update on old xattr block
843 * 3) new xattr block
844 * 4) block bitmap update for new xattr block
845 * 5) group descriptor for new xattr block
846 * 6) block bitmap update for old xattr block
847 * 7) group descriptor for old block
848 *
849 * 6 & 7 can happen if we have two racing threads T_a and T_b
850 * which are each trying to set an xattr on inodes I_a and I_b
851 * which were both initially sharing an xattr block.
852 */
853 credits = 7;
854
855 /* Quota updates. */
856 credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(sb);
857
858 /*
859 * In case of inline data, we may push out the data to a block,
860 * so we need to reserve credits for this eventuality
861 */
862 if (inode && ext4_has_inline_data(inode))
863 credits += ext4_writepage_trans_blocks(inode) + 1;
864
865 /* We are done if ea_inode feature is not enabled. */
866 if (!ext4_has_feature_ea_inode(sb))
867 return credits;
868
869 /* New ea_inode, inode map, block bitmap, group descriptor. */
870 credits += 4;
871
872 /* Data blocks. */
873 blocks = (value_len + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
874
875 /* Indirection block or one level of extent tree. */
876 blocks += 1;
877
878 /* Block bitmap and group descriptor updates for each block. */
879 credits += blocks * 2;
880
881 /* Blocks themselves. */
882 credits += blocks;
883
884 if (!is_create) {
885 /* Dereference ea_inode holding old xattr value.
886 * Old ea_inode, inode map, block bitmap, group descriptor.
887 */
888 credits += 4;
889
890 /* Data blocks for old ea_inode. */
891 blocks = XATTR_SIZE_MAX >> sb->s_blocksize_bits;
892
893 /* Indirection block or one level of extent tree for old
894 * ea_inode.
895 */
896 blocks += 1;
897
898 /* Block bitmap and group descriptor updates for each block. */
899 credits += blocks * 2;
900 }
901
902 /* We may need to clone the existing xattr block in which case we need
903 * to increment ref counts for existing ea_inodes referenced by it.
904 */
905 if (block_bh) {
906 struct ext4_xattr_entry *entry = BFIRST(block_bh);
907
908 for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry))
909 if (entry->e_value_inum)
910 /* Ref count update on ea_inode. */
911 credits += 1;
912 }
913 return credits;
914}
915
916static int ext4_xattr_ensure_credits(handle_t *handle, struct inode *inode,
917 int credits, struct buffer_head *bh,
918 bool dirty, bool block_csum)
919{
920 int error;
921
922 if (!ext4_handle_valid(handle))
923 return 0;
924
925 if (handle->h_buffer_credits >= credits)
926 return 0;
927
928 error = ext4_journal_extend(handle, credits - handle->h_buffer_credits);
929 if (!error)
930 return 0;
931 if (error < 0) {
932 ext4_warning(inode->i_sb, "Extend journal (error %d)", error);
933 return error;
934 }
935
936 if (bh && dirty) {
937 if (block_csum)
938 ext4_xattr_block_csum_set(inode, bh);
939 error = ext4_handle_dirty_metadata(handle, NULL, bh);
940 if (error) {
941 ext4_warning(inode->i_sb, "Handle metadata (error %d)",
942 error);
943 return error;
944 }
945 }
946
947 error = ext4_journal_restart(handle, credits);
948 if (error) {
949 ext4_warning(inode->i_sb, "Restart journal (error %d)", error);
950 return error;
951 }
952
953 if (bh) {
954 error = ext4_journal_get_write_access(handle, bh);
955 if (error) {
956 ext4_warning(inode->i_sb,
957 "Get write access failed (error %d)",
958 error);
959 return error;
960 }
961 }
962 return 0;
963}
964
965static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
966 int ref_change)
967{
968 struct mb_cache *ea_inode_cache = EA_INODE_CACHE(ea_inode);
969 struct ext4_iloc iloc;
970 s64 ref_count;
971 u32 hash;
972 int ret;
973
974 inode_lock(ea_inode);
975
976 ret = ext4_reserve_inode_write(handle, ea_inode, &iloc);
977 if (ret) {
978 iloc.bh = NULL;
979 goto out;
980 }
981
982 ref_count = ext4_xattr_inode_get_ref(ea_inode);
983 ref_count += ref_change;
984 ext4_xattr_inode_set_ref(ea_inode, ref_count);
985
986 if (ref_change > 0) {
987 WARN_ONCE(ref_count <= 0, "EA inode %lu ref_count=%lld",
988 ea_inode->i_ino, ref_count);
989
990 if (ref_count == 1) {
991 WARN_ONCE(ea_inode->i_nlink, "EA inode %lu i_nlink=%u",
992 ea_inode->i_ino, ea_inode->i_nlink);
993
994 set_nlink(ea_inode, 1);
995 ext4_orphan_del(handle, ea_inode);
996
997 if (ea_inode_cache) {
998 hash = ext4_xattr_inode_get_hash(ea_inode);
999 mb_cache_entry_create(ea_inode_cache,
1000 GFP_NOFS, hash,
1001 ea_inode->i_ino,
1002 true /* reusable */);
1003 }
1004 }
1005 } else {
1006 WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld",
1007 ea_inode->i_ino, ref_count);
1008
1009 if (ref_count == 0) {
1010 WARN_ONCE(ea_inode->i_nlink != 1,
1011 "EA inode %lu i_nlink=%u",
1012 ea_inode->i_ino, ea_inode->i_nlink);
1013
1014 clear_nlink(ea_inode);
1015 ext4_orphan_add(handle, ea_inode);
1016
1017 if (ea_inode_cache) {
1018 hash = ext4_xattr_inode_get_hash(ea_inode);
1019 mb_cache_entry_delete(ea_inode_cache, hash,
1020 ea_inode->i_ino);
1021 }
1022 }
1023 }
1024
1025 ret = ext4_mark_iloc_dirty(handle, ea_inode, &iloc);
1026 iloc.bh = NULL;
1027 if (ret)
1028 ext4_warning_inode(ea_inode,
1029 "ext4_mark_iloc_dirty() failed ret=%d", ret);
1030out:
1031 brelse(iloc.bh);
1032 inode_unlock(ea_inode);
1033 return ret;
1034}
1035
1036static int ext4_xattr_inode_inc_ref(handle_t *handle, struct inode *ea_inode)
1037{
1038 return ext4_xattr_inode_update_ref(handle, ea_inode, 1);
1039}
1040
1041static int ext4_xattr_inode_dec_ref(handle_t *handle, struct inode *ea_inode)
1042{
1043 return ext4_xattr_inode_update_ref(handle, ea_inode, -1);
1044}
1045
1046static int ext4_xattr_inode_inc_ref_all(handle_t *handle, struct inode *parent,
1047 struct ext4_xattr_entry *first)
1048{
1049 struct inode *ea_inode;
1050 struct ext4_xattr_entry *entry;
1051 struct ext4_xattr_entry *failed_entry;
1052 unsigned int ea_ino;
1053 int err, saved_err;
1054
1055 for (entry = first; !IS_LAST_ENTRY(entry);
1056 entry = EXT4_XATTR_NEXT(entry)) {
1057 if (!entry->e_value_inum)
1058 continue;
1059 ea_ino = le32_to_cpu(entry->e_value_inum);
1060 err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode);
1061 if (err)
1062 goto cleanup;
1063 err = ext4_xattr_inode_inc_ref(handle, ea_inode);
1064 if (err) {
1065 ext4_warning_inode(ea_inode, "inc ref error %d", err);
1066 iput(ea_inode);
1067 goto cleanup;
1068 }
1069 iput(ea_inode);
1070 }
1071 return 0;
1072
1073cleanup:
1074 saved_err = err;
1075 failed_entry = entry;
1076
1077 for (entry = first; entry != failed_entry;
1078 entry = EXT4_XATTR_NEXT(entry)) {
1079 if (!entry->e_value_inum)
1080 continue;
1081 ea_ino = le32_to_cpu(entry->e_value_inum);
1082 err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode);
1083 if (err) {
1084 ext4_warning(parent->i_sb,
1085 "cleanup ea_ino %u iget error %d", ea_ino,
1086 err);
1087 continue;
1088 }
1089 err = ext4_xattr_inode_dec_ref(handle, ea_inode);
1090 if (err)
1091 ext4_warning_inode(ea_inode, "cleanup dec ref error %d",
1092 err);
1093 iput(ea_inode);
1094 }
1095 return saved_err;
1096}
1097
1098static void
1099ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
1100 struct buffer_head *bh,
1101 struct ext4_xattr_entry *first, bool block_csum,
1102 struct ext4_xattr_inode_array **ea_inode_array,
1103 int extra_credits, bool skip_quota)
1104{
1105 struct inode *ea_inode;
1106 struct ext4_xattr_entry *entry;
1107 bool dirty = false;
1108 unsigned int ea_ino;
1109 int err;
1110 int credits;
1111
1112 /* One credit for dec ref on ea_inode, one for orphan list addition, */
1113 credits = 2 + extra_credits;
1114
1115 for (entry = first; !IS_LAST_ENTRY(entry);
1116 entry = EXT4_XATTR_NEXT(entry)) {
1117 if (!entry->e_value_inum)
1118 continue;
1119 ea_ino = le32_to_cpu(entry->e_value_inum);
1120 err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode);
1121 if (err)
1122 continue;
1123
1124 err = ext4_expand_inode_array(ea_inode_array, ea_inode);
1125 if (err) {
1126 ext4_warning_inode(ea_inode,
1127 "Expand inode array err=%d", err);
1128 iput(ea_inode);
1129 continue;
1130 }
1131
1132 err = ext4_xattr_ensure_credits(handle, parent, credits, bh,
1133 dirty, block_csum);
1134 if (err) {
1135 ext4_warning_inode(ea_inode, "Ensure credits err=%d",
1136 err);
1137 continue;
1138 }
1139
1140 err = ext4_xattr_inode_dec_ref(handle, ea_inode);
1141 if (err) {
1142 ext4_warning_inode(ea_inode, "ea_inode dec ref err=%d",
1143 err);
1144 continue;
1145 }
1146
1147 if (!skip_quota)
1148 ext4_xattr_inode_free_quota(parent,
1149 le32_to_cpu(entry->e_value_size));
1150
1151 /*
1152 * Forget about ea_inode within the same transaction that
1153 * decrements the ref count. This avoids duplicate decrements in
1154 * case the rest of the work spills over to subsequent
1155 * transactions.
1156 */
1157 entry->e_value_inum = 0;
1158 entry->e_value_size = 0;
1159
1160 dirty = true;
1161 }
1162
1163 if (dirty) {
1164 /*
1165 * Note that we are deliberately skipping csum calculation for
1166 * the final update because we do not expect any journal
1167 * restarts until xattr block is freed.
1168 */
1169
1170 err = ext4_handle_dirty_metadata(handle, NULL, bh);
1171 if (err)
1172 ext4_warning_inode(parent,
1173 "handle dirty metadata err=%d", err);
1174 }
1175}
1176
542/* 1177/*
543 * Release the xattr block BH: If the reference count is > 1, decrement it; 1178 * Release the xattr block BH: If the reference count is > 1, decrement it;
544 * otherwise free the block. 1179 * otherwise free the block.
545 */ 1180 */
546static void 1181static void
547ext4_xattr_release_block(handle_t *handle, struct inode *inode, 1182ext4_xattr_release_block(handle_t *handle, struct inode *inode,
548 struct buffer_head *bh) 1183 struct buffer_head *bh,
1184 struct ext4_xattr_inode_array **ea_inode_array,
1185 int extra_credits)
549{ 1186{
550 struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); 1187 struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
551 u32 hash, ref; 1188 u32 hash, ref;
552 int error = 0; 1189 int error = 0;
553 1190
@@ -565,9 +1202,19 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
565 * This must happen under buffer lock for 1202 * This must happen under buffer lock for
566 * ext4_xattr_block_set() to reliably detect freed block 1203 * ext4_xattr_block_set() to reliably detect freed block
567 */ 1204 */
568 mb_cache_entry_delete_block(ext4_mb_cache, hash, bh->b_blocknr); 1205 if (ea_block_cache)
1206 mb_cache_entry_delete(ea_block_cache, hash,
1207 bh->b_blocknr);
569 get_bh(bh); 1208 get_bh(bh);
570 unlock_buffer(bh); 1209 unlock_buffer(bh);
1210
1211 if (ext4_has_feature_ea_inode(inode->i_sb))
1212 ext4_xattr_inode_dec_ref_all(handle, inode, bh,
1213 BFIRST(bh),
1214 true /* block_csum */,
1215 ea_inode_array,
1216 extra_credits,
1217 true /* skip_quota */);
571 ext4_free_blocks(handle, inode, bh, 0, 1, 1218 ext4_free_blocks(handle, inode, bh, 0, 1,
572 EXT4_FREE_BLOCKS_METADATA | 1219 EXT4_FREE_BLOCKS_METADATA |
573 EXT4_FREE_BLOCKS_FORGET); 1220 EXT4_FREE_BLOCKS_FORGET);
@@ -577,11 +1224,13 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
577 if (ref == EXT4_XATTR_REFCOUNT_MAX - 1) { 1224 if (ref == EXT4_XATTR_REFCOUNT_MAX - 1) {
578 struct mb_cache_entry *ce; 1225 struct mb_cache_entry *ce;
579 1226
580 ce = mb_cache_entry_get(ext4_mb_cache, hash, 1227 if (ea_block_cache) {
581 bh->b_blocknr); 1228 ce = mb_cache_entry_get(ea_block_cache, hash,
582 if (ce) { 1229 bh->b_blocknr);
583 ce->e_reusable = 1; 1230 if (ce) {
584 mb_cache_entry_put(ext4_mb_cache, ce); 1231 ce->e_reusable = 1;
1232 mb_cache_entry_put(ea_block_cache, ce);
1233 }
585 } 1234 }
586 } 1235 }
587 1236
@@ -620,7 +1269,7 @@ static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last,
620 size_t *min_offs, void *base, int *total) 1269 size_t *min_offs, void *base, int *total)
621{ 1270{
622 for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { 1271 for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
623 if (last->e_value_size) { 1272 if (!last->e_value_inum && last->e_value_size) {
624 size_t offs = le16_to_cpu(last->e_value_offs); 1273 size_t offs = le16_to_cpu(last->e_value_offs);
625 if (offs < *min_offs) 1274 if (offs < *min_offs)
626 *min_offs = offs; 1275 *min_offs = offs;
@@ -631,113 +1280,454 @@ static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last,
631 return (*min_offs - ((void *)last - base) - sizeof(__u32)); 1280 return (*min_offs - ((void *)last - base) - sizeof(__u32));
632} 1281}
633 1282
634static int 1283/*
635ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s) 1284 * Write the value of the EA in an inode.
1285 */
1286static int ext4_xattr_inode_write(handle_t *handle, struct inode *ea_inode,
1287 const void *buf, int bufsize)
1288{
1289 struct buffer_head *bh = NULL;
1290 unsigned long block = 0;
1291 int blocksize = ea_inode->i_sb->s_blocksize;
1292 int max_blocks = (bufsize + blocksize - 1) >> ea_inode->i_blkbits;
1293 int csize, wsize = 0;
1294 int ret = 0;
1295 int retries = 0;
1296
1297retry:
1298 while (ret >= 0 && ret < max_blocks) {
1299 struct ext4_map_blocks map;
1300 map.m_lblk = block += ret;
1301 map.m_len = max_blocks -= ret;
1302
1303 ret = ext4_map_blocks(handle, ea_inode, &map,
1304 EXT4_GET_BLOCKS_CREATE);
1305 if (ret <= 0) {
1306 ext4_mark_inode_dirty(handle, ea_inode);
1307 if (ret == -ENOSPC &&
1308 ext4_should_retry_alloc(ea_inode->i_sb, &retries)) {
1309 ret = 0;
1310 goto retry;
1311 }
1312 break;
1313 }
1314 }
1315
1316 if (ret < 0)
1317 return ret;
1318
1319 block = 0;
1320 while (wsize < bufsize) {
1321 if (bh != NULL)
1322 brelse(bh);
1323 csize = (bufsize - wsize) > blocksize ? blocksize :
1324 bufsize - wsize;
1325 bh = ext4_getblk(handle, ea_inode, block, 0);
1326 if (IS_ERR(bh))
1327 return PTR_ERR(bh);
1328 ret = ext4_journal_get_write_access(handle, bh);
1329 if (ret)
1330 goto out;
1331
1332 memcpy(bh->b_data, buf, csize);
1333 set_buffer_uptodate(bh);
1334 ext4_handle_dirty_metadata(handle, ea_inode, bh);
1335
1336 buf += csize;
1337 wsize += csize;
1338 block += 1;
1339 }
1340
1341 inode_lock(ea_inode);
1342 i_size_write(ea_inode, wsize);
1343 ext4_update_i_disksize(ea_inode, wsize);
1344 inode_unlock(ea_inode);
1345
1346 ext4_mark_inode_dirty(handle, ea_inode);
1347
1348out:
1349 brelse(bh);
1350
1351 return ret;
1352}
1353
1354/*
1355 * Create an inode to store the value of a large EA.
1356 */
1357static struct inode *ext4_xattr_inode_create(handle_t *handle,
1358 struct inode *inode, u32 hash)
1359{
1360 struct inode *ea_inode = NULL;
1361 uid_t owner[2] = { i_uid_read(inode), i_gid_read(inode) };
1362 int err;
1363
1364 /*
1365 * Let the next inode be the goal, so we try and allocate the EA inode
1366 * in the same group, or nearby one.
1367 */
1368 ea_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
1369 S_IFREG | 0600, NULL, inode->i_ino + 1, owner,
1370 EXT4_EA_INODE_FL);
1371 if (!IS_ERR(ea_inode)) {
1372 ea_inode->i_op = &ext4_file_inode_operations;
1373 ea_inode->i_fop = &ext4_file_operations;
1374 ext4_set_aops(ea_inode);
1375 ext4_xattr_inode_set_class(ea_inode);
1376 unlock_new_inode(ea_inode);
1377 ext4_xattr_inode_set_ref(ea_inode, 1);
1378 ext4_xattr_inode_set_hash(ea_inode, hash);
1379 err = ext4_mark_inode_dirty(handle, ea_inode);
1380 if (!err)
1381 err = ext4_inode_attach_jinode(ea_inode);
1382 if (err) {
1383 iput(ea_inode);
1384 return ERR_PTR(err);
1385 }
1386
1387 /*
1388 * Xattr inodes are shared therefore quota charging is performed
1389 * at a higher level.
1390 */
1391 dquot_free_inode(ea_inode);
1392 dquot_drop(ea_inode);
1393 inode_lock(ea_inode);
1394 ea_inode->i_flags |= S_NOQUOTA;
1395 inode_unlock(ea_inode);
1396 }
1397
1398 return ea_inode;
1399}
1400
1401static struct inode *
1402ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
1403 size_t value_len, u32 hash)
1404{
1405 struct inode *ea_inode;
1406 struct mb_cache_entry *ce;
1407 struct mb_cache *ea_inode_cache = EA_INODE_CACHE(inode);
1408 void *ea_data;
1409
1410 if (!ea_inode_cache)
1411 return NULL;
1412
1413 ce = mb_cache_entry_find_first(ea_inode_cache, hash);
1414 if (!ce)
1415 return NULL;
1416
1417 ea_data = ext4_kvmalloc(value_len, GFP_NOFS);
1418 if (!ea_data) {
1419 mb_cache_entry_put(ea_inode_cache, ce);
1420 return NULL;
1421 }
1422
1423 while (ce) {
1424 ea_inode = ext4_iget(inode->i_sb, ce->e_value);
1425 if (!IS_ERR(ea_inode) &&
1426 !is_bad_inode(ea_inode) &&
1427 (EXT4_I(ea_inode)->i_flags & EXT4_EA_INODE_FL) &&
1428 i_size_read(ea_inode) == value_len &&
1429 !ext4_xattr_inode_read(ea_inode, ea_data, value_len) &&
1430 !ext4_xattr_inode_verify_hashes(ea_inode, NULL, ea_data,
1431 value_len) &&
1432 !memcmp(value, ea_data, value_len)) {
1433 mb_cache_entry_touch(ea_inode_cache, ce);
1434 mb_cache_entry_put(ea_inode_cache, ce);
1435 kvfree(ea_data);
1436 return ea_inode;
1437 }
1438
1439 if (!IS_ERR(ea_inode))
1440 iput(ea_inode);
1441 ce = mb_cache_entry_find_next(ea_inode_cache, ce);
1442 }
1443 kvfree(ea_data);
1444 return NULL;
1445}
1446
1447/*
1448 * Add value of the EA in an inode.
1449 */
1450static int ext4_xattr_inode_lookup_create(handle_t *handle, struct inode *inode,
1451 const void *value, size_t value_len,
1452 struct inode **ret_inode)
1453{
1454 struct inode *ea_inode;
1455 u32 hash;
1456 int err;
1457
1458 hash = ext4_xattr_inode_hash(EXT4_SB(inode->i_sb), value, value_len);
1459 ea_inode = ext4_xattr_inode_cache_find(inode, value, value_len, hash);
1460 if (ea_inode) {
1461 err = ext4_xattr_inode_inc_ref(handle, ea_inode);
1462 if (err) {
1463 iput(ea_inode);
1464 return err;
1465 }
1466
1467 *ret_inode = ea_inode;
1468 return 0;
1469 }
1470
1471 /* Create an inode for the EA value */
1472 ea_inode = ext4_xattr_inode_create(handle, inode, hash);
1473 if (IS_ERR(ea_inode))
1474 return PTR_ERR(ea_inode);
1475
1476 err = ext4_xattr_inode_write(handle, ea_inode, value, value_len);
1477 if (err) {
1478 ext4_xattr_inode_dec_ref(handle, ea_inode);
1479 iput(ea_inode);
1480 return err;
1481 }
1482
1483 if (EA_INODE_CACHE(inode))
1484 mb_cache_entry_create(EA_INODE_CACHE(inode), GFP_NOFS, hash,
1485 ea_inode->i_ino, true /* reusable */);
1486
1487 *ret_inode = ea_inode;
1488 return 0;
1489}
1490
1491/*
1492 * Reserve min(block_size/8, 1024) bytes for xattr entries/names if ea_inode
1493 * feature is enabled.
1494 */
1495#define EXT4_XATTR_BLOCK_RESERVE(inode) min(i_blocksize(inode)/8, 1024U)
1496
1497static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
1498 struct ext4_xattr_search *s,
1499 handle_t *handle, struct inode *inode,
1500 bool is_block)
636{ 1501{
637 struct ext4_xattr_entry *last; 1502 struct ext4_xattr_entry *last;
638 size_t free, min_offs = s->end - s->base, name_len = strlen(i->name); 1503 struct ext4_xattr_entry *here = s->here;
1504 size_t min_offs = s->end - s->base, name_len = strlen(i->name);
1505 int in_inode = i->in_inode;
1506 struct inode *old_ea_inode = NULL;
1507 struct inode *new_ea_inode = NULL;
1508 size_t old_size, new_size;
1509 int ret;
1510
1511 /* Space used by old and new values. */
1512 old_size = (!s->not_found && !here->e_value_inum) ?
1513 EXT4_XATTR_SIZE(le32_to_cpu(here->e_value_size)) : 0;
1514 new_size = (i->value && !in_inode) ? EXT4_XATTR_SIZE(i->value_len) : 0;
1515
1516 /*
1517 * Optimization for the simple case when old and new values have the
1518 * same padded sizes. Not applicable if external inodes are involved.
1519 */
1520 if (new_size && new_size == old_size) {
1521 size_t offs = le16_to_cpu(here->e_value_offs);
1522 void *val = s->base + offs;
1523
1524 here->e_value_size = cpu_to_le32(i->value_len);
1525 if (i->value == EXT4_ZERO_XATTR_VALUE) {
1526 memset(val, 0, new_size);
1527 } else {
1528 memcpy(val, i->value, i->value_len);
1529 /* Clear padding bytes. */
1530 memset(val + i->value_len, 0, new_size - i->value_len);
1531 }
1532 return 0;
1533 }
639 1534
640 /* Compute min_offs and last. */ 1535 /* Compute min_offs and last. */
641 last = s->first; 1536 last = s->first;
642 for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { 1537 for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
643 if (last->e_value_size) { 1538 if (!last->e_value_inum && last->e_value_size) {
644 size_t offs = le16_to_cpu(last->e_value_offs); 1539 size_t offs = le16_to_cpu(last->e_value_offs);
645 if (offs < min_offs) 1540 if (offs < min_offs)
646 min_offs = offs; 1541 min_offs = offs;
647 } 1542 }
648 } 1543 }
649 free = min_offs - ((void *)last - s->base) - sizeof(__u32); 1544
650 if (!s->not_found) { 1545 /* Check whether we have enough space. */
651 if (s->here->e_value_size) {
652 size_t size = le32_to_cpu(s->here->e_value_size);
653 free += EXT4_XATTR_SIZE(size);
654 }
655 free += EXT4_XATTR_LEN(name_len);
656 }
657 if (i->value) { 1546 if (i->value) {
658 if (free < EXT4_XATTR_LEN(name_len) + 1547 size_t free;
659 EXT4_XATTR_SIZE(i->value_len)) 1548
660 return -ENOSPC; 1549 free = min_offs - ((void *)last - s->base) - sizeof(__u32);
1550 if (!s->not_found)
1551 free += EXT4_XATTR_LEN(name_len) + old_size;
1552
1553 if (free < EXT4_XATTR_LEN(name_len) + new_size) {
1554 ret = -ENOSPC;
1555 goto out;
1556 }
1557
1558 /*
1559 * If storing the value in an external inode is an option,
1560 * reserve space for xattr entries/names in the external
1561 * attribute block so that a long value does not occupy the
1562 * whole space and prevent futher entries being added.
1563 */
1564 if (ext4_has_feature_ea_inode(inode->i_sb) &&
1565 new_size && is_block &&
1566 (min_offs + old_size - new_size) <
1567 EXT4_XATTR_BLOCK_RESERVE(inode)) {
1568 ret = -ENOSPC;
1569 goto out;
1570 }
661 } 1571 }
662 1572
663 if (i->value && s->not_found) { 1573 /*
664 /* Insert the new name. */ 1574 * Getting access to old and new ea inodes is subject to failures.
665 size_t size = EXT4_XATTR_LEN(name_len); 1575 * Finish that work before doing any modifications to the xattr data.
666 size_t rest = (void *)last - (void *)s->here + sizeof(__u32); 1576 */
667 memmove((void *)s->here + size, s->here, rest); 1577 if (!s->not_found && here->e_value_inum) {
668 memset(s->here, 0, size); 1578 ret = ext4_xattr_inode_iget(inode,
669 s->here->e_name_index = i->name_index; 1579 le32_to_cpu(here->e_value_inum),
670 s->here->e_name_len = name_len; 1580 &old_ea_inode);
671 memcpy(s->here->e_name, i->name, name_len); 1581 if (ret) {
672 } else { 1582 old_ea_inode = NULL;
673 if (s->here->e_value_size) { 1583 goto out;
674 void *first_val = s->base + min_offs; 1584 }
675 size_t offs = le16_to_cpu(s->here->e_value_offs); 1585 }
676 void *val = s->base + offs; 1586 if (i->value && in_inode) {
677 size_t size = EXT4_XATTR_SIZE( 1587 WARN_ON_ONCE(!i->value_len);
678 le32_to_cpu(s->here->e_value_size)); 1588
679 1589 ret = ext4_xattr_inode_alloc_quota(inode, i->value_len);
680 if (i->value && size == EXT4_XATTR_SIZE(i->value_len)) { 1590 if (ret)
681 /* The old and the new value have the same 1591 goto out;
682 size. Just replace. */ 1592
683 s->here->e_value_size = 1593 ret = ext4_xattr_inode_lookup_create(handle, inode, i->value,
684 cpu_to_le32(i->value_len); 1594 i->value_len,
685 if (i->value == EXT4_ZERO_XATTR_VALUE) { 1595 &new_ea_inode);
686 memset(val, 0, size); 1596 if (ret) {
687 } else { 1597 new_ea_inode = NULL;
688 /* Clear pad bytes first. */ 1598 ext4_xattr_inode_free_quota(inode, i->value_len);
689 memset(val + size - EXT4_XATTR_PAD, 0, 1599 goto out;
690 EXT4_XATTR_PAD); 1600 }
691 memcpy(val, i->value, i->value_len); 1601 }
692 }
693 return 0;
694 }
695 1602
696 /* Remove the old value. */ 1603 if (old_ea_inode) {
697 memmove(first_val + size, first_val, val - first_val); 1604 /* We are ready to release ref count on the old_ea_inode. */
698 memset(first_val, 0, size); 1605 ret = ext4_xattr_inode_dec_ref(handle, old_ea_inode);
699 s->here->e_value_size = 0; 1606 if (ret) {
700 s->here->e_value_offs = 0; 1607 /* Release newly required ref count on new_ea_inode. */
701 min_offs += size; 1608 if (new_ea_inode) {
702 1609 int err;
703 /* Adjust all value offsets. */ 1610
704 last = s->first; 1611 err = ext4_xattr_inode_dec_ref(handle,
705 while (!IS_LAST_ENTRY(last)) { 1612 new_ea_inode);
706 size_t o = le16_to_cpu(last->e_value_offs); 1613 if (err)
707 if (last->e_value_size && o < offs) 1614 ext4_warning_inode(new_ea_inode,
708 last->e_value_offs = 1615 "dec ref new_ea_inode err=%d",
709 cpu_to_le16(o + size); 1616 err);
710 last = EXT4_XATTR_NEXT(last); 1617 ext4_xattr_inode_free_quota(inode,
1618 i->value_len);
711 } 1619 }
1620 goto out;
712 } 1621 }
713 if (!i->value) { 1622
714 /* Remove the old name. */ 1623 ext4_xattr_inode_free_quota(inode,
715 size_t size = EXT4_XATTR_LEN(name_len); 1624 le32_to_cpu(here->e_value_size));
716 last = ENTRY((void *)last - size); 1625 }
717 memmove(s->here, (void *)s->here + size, 1626
718 (void *)last - (void *)s->here + sizeof(__u32)); 1627 /* No failures allowed past this point. */
719 memset(last, 0, size); 1628
1629 if (!s->not_found && here->e_value_offs) {
1630 /* Remove the old value. */
1631 void *first_val = s->base + min_offs;
1632 size_t offs = le16_to_cpu(here->e_value_offs);
1633 void *val = s->base + offs;
1634
1635 memmove(first_val + old_size, first_val, val - first_val);
1636 memset(first_val, 0, old_size);
1637 min_offs += old_size;
1638
1639 /* Adjust all value offsets. */
1640 last = s->first;
1641 while (!IS_LAST_ENTRY(last)) {
1642 size_t o = le16_to_cpu(last->e_value_offs);
1643
1644 if (!last->e_value_inum &&
1645 last->e_value_size && o < offs)
1646 last->e_value_offs = cpu_to_le16(o + old_size);
1647 last = EXT4_XATTR_NEXT(last);
720 } 1648 }
721 } 1649 }
722 1650
1651 if (!i->value) {
1652 /* Remove old name. */
1653 size_t size = EXT4_XATTR_LEN(name_len);
1654
1655 last = ENTRY((void *)last - size);
1656 memmove(here, (void *)here + size,
1657 (void *)last - (void *)here + sizeof(__u32));
1658 memset(last, 0, size);
1659 } else if (s->not_found) {
1660 /* Insert new name. */
1661 size_t size = EXT4_XATTR_LEN(name_len);
1662 size_t rest = (void *)last - (void *)here + sizeof(__u32);
1663
1664 memmove((void *)here + size, here, rest);
1665 memset(here, 0, size);
1666 here->e_name_index = i->name_index;
1667 here->e_name_len = name_len;
1668 memcpy(here->e_name, i->name, name_len);
1669 } else {
1670 /* This is an update, reset value info. */
1671 here->e_value_inum = 0;
1672 here->e_value_offs = 0;
1673 here->e_value_size = 0;
1674 }
1675
723 if (i->value) { 1676 if (i->value) {
724 /* Insert the new value. */ 1677 /* Insert new value. */
725 s->here->e_value_size = cpu_to_le32(i->value_len); 1678 if (in_inode) {
726 if (i->value_len) { 1679 here->e_value_inum = cpu_to_le32(new_ea_inode->i_ino);
727 size_t size = EXT4_XATTR_SIZE(i->value_len); 1680 } else if (i->value_len) {
728 void *val = s->base + min_offs - size; 1681 void *val = s->base + min_offs - new_size;
729 s->here->e_value_offs = cpu_to_le16(min_offs - size); 1682
1683 here->e_value_offs = cpu_to_le16(min_offs - new_size);
730 if (i->value == EXT4_ZERO_XATTR_VALUE) { 1684 if (i->value == EXT4_ZERO_XATTR_VALUE) {
731 memset(val, 0, size); 1685 memset(val, 0, new_size);
732 } else { 1686 } else {
733 /* Clear the pad bytes first. */
734 memset(val + size - EXT4_XATTR_PAD, 0,
735 EXT4_XATTR_PAD);
736 memcpy(val, i->value, i->value_len); 1687 memcpy(val, i->value, i->value_len);
1688 /* Clear padding bytes. */
1689 memset(val + i->value_len, 0,
1690 new_size - i->value_len);
737 } 1691 }
738 } 1692 }
1693 here->e_value_size = cpu_to_le32(i->value_len);
739 } 1694 }
740 return 0; 1695
1696 if (i->value) {
1697 __le32 hash = 0;
1698
1699 /* Entry hash calculation. */
1700 if (in_inode) {
1701 __le32 crc32c_hash;
1702
1703 /*
1704 * Feed crc32c hash instead of the raw value for entry
1705 * hash calculation. This is to avoid walking
1706 * potentially long value buffer again.
1707 */
1708 crc32c_hash = cpu_to_le32(
1709 ext4_xattr_inode_get_hash(new_ea_inode));
1710 hash = ext4_xattr_hash_entry(here->e_name,
1711 here->e_name_len,
1712 &crc32c_hash, 1);
1713 } else if (is_block) {
1714 __le32 *value = s->base + min_offs - new_size;
1715
1716 hash = ext4_xattr_hash_entry(here->e_name,
1717 here->e_name_len, value,
1718 new_size >> 2);
1719 }
1720 here->e_hash = hash;
1721 }
1722
1723 if (is_block)
1724 ext4_xattr_rehash((struct ext4_xattr_header *)s->base);
1725
1726 ret = 0;
1727out:
1728 iput(old_ea_inode);
1729 iput(new_ea_inode);
1730 return ret;
741} 1731}
742 1732
743struct ext4_xattr_block_find { 1733struct ext4_xattr_block_find {
@@ -794,15 +1784,16 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
794{ 1784{
795 struct super_block *sb = inode->i_sb; 1785 struct super_block *sb = inode->i_sb;
796 struct buffer_head *new_bh = NULL; 1786 struct buffer_head *new_bh = NULL;
797 struct ext4_xattr_search *s = &bs->s; 1787 struct ext4_xattr_search s_copy = bs->s;
1788 struct ext4_xattr_search *s = &s_copy;
798 struct mb_cache_entry *ce = NULL; 1789 struct mb_cache_entry *ce = NULL;
799 int error = 0; 1790 int error = 0;
800 struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); 1791 struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
1792 struct inode *ea_inode = NULL;
1793 size_t old_ea_inode_size = 0;
801 1794
802#define header(x) ((struct ext4_xattr_header *)(x)) 1795#define header(x) ((struct ext4_xattr_header *)(x))
803 1796
804 if (i->value && i->value_len > sb->s_blocksize)
805 return -ENOSPC;
806 if (s->base) { 1797 if (s->base) {
807 BUFFER_TRACE(bs->bh, "get_write_access"); 1798 BUFFER_TRACE(bs->bh, "get_write_access");
808 error = ext4_journal_get_write_access(handle, bs->bh); 1799 error = ext4_journal_get_write_access(handle, bs->bh);
@@ -818,17 +1809,15 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
818 * ext4_xattr_block_set() to reliably detect modified 1809 * ext4_xattr_block_set() to reliably detect modified
819 * block 1810 * block
820 */ 1811 */
821 mb_cache_entry_delete_block(ext4_mb_cache, hash, 1812 if (ea_block_cache)
822 bs->bh->b_blocknr); 1813 mb_cache_entry_delete(ea_block_cache, hash,
1814 bs->bh->b_blocknr);
823 ea_bdebug(bs->bh, "modifying in-place"); 1815 ea_bdebug(bs->bh, "modifying in-place");
824 error = ext4_xattr_set_entry(i, s); 1816 error = ext4_xattr_set_entry(i, s, handle, inode,
825 if (!error) { 1817 true /* is_block */);
826 if (!IS_LAST_ENTRY(s->first)) 1818 if (!error)
827 ext4_xattr_rehash(header(s->base), 1819 ext4_xattr_block_cache_insert(ea_block_cache,
828 s->here); 1820 bs->bh);
829 ext4_xattr_cache_insert(ext4_mb_cache,
830 bs->bh);
831 }
832 ext4_xattr_block_csum_set(inode, bs->bh); 1821 ext4_xattr_block_csum_set(inode, bs->bh);
833 unlock_buffer(bs->bh); 1822 unlock_buffer(bs->bh);
834 if (error == -EFSCORRUPTED) 1823 if (error == -EFSCORRUPTED)
@@ -854,6 +1843,24 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
854 header(s->base)->h_refcount = cpu_to_le32(1); 1843 header(s->base)->h_refcount = cpu_to_le32(1);
855 s->here = ENTRY(s->base + offset); 1844 s->here = ENTRY(s->base + offset);
856 s->end = s->base + bs->bh->b_size; 1845 s->end = s->base + bs->bh->b_size;
1846
1847 /*
1848 * If existing entry points to an xattr inode, we need
1849 * to prevent ext4_xattr_set_entry() from decrementing
1850 * ref count on it because the reference belongs to the
1851 * original block. In this case, make the entry look
1852 * like it has an empty value.
1853 */
1854 if (!s->not_found && s->here->e_value_inum) {
1855 /*
1856 * Defer quota free call for previous inode
1857 * until success is guaranteed.
1858 */
1859 old_ea_inode_size = le32_to_cpu(
1860 s->here->e_value_size);
1861 s->here->e_value_inum = 0;
1862 s->here->e_value_size = 0;
1863 }
857 } 1864 }
858 } else { 1865 } else {
859 /* Allocate a buffer where we construct the new block. */ 1866 /* Allocate a buffer where we construct the new block. */
@@ -870,17 +1877,33 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
870 s->end = s->base + sb->s_blocksize; 1877 s->end = s->base + sb->s_blocksize;
871 } 1878 }
872 1879
873 error = ext4_xattr_set_entry(i, s); 1880 error = ext4_xattr_set_entry(i, s, handle, inode, true /* is_block */);
874 if (error == -EFSCORRUPTED) 1881 if (error == -EFSCORRUPTED)
875 goto bad_block; 1882 goto bad_block;
876 if (error) 1883 if (error)
877 goto cleanup; 1884 goto cleanup;
878 if (!IS_LAST_ENTRY(s->first)) 1885
879 ext4_xattr_rehash(header(s->base), s->here); 1886 if (i->value && s->here->e_value_inum) {
1887 unsigned int ea_ino;
1888
1889 /*
1890 * A ref count on ea_inode has been taken as part of the call to
1891 * ext4_xattr_set_entry() above. We would like to drop this
1892 * extra ref but we have to wait until the xattr block is
1893 * initialized and has its own ref count on the ea_inode.
1894 */
1895 ea_ino = le32_to_cpu(s->here->e_value_inum);
1896 error = ext4_xattr_inode_iget(inode, ea_ino, &ea_inode);
1897 if (error) {
1898 ea_inode = NULL;
1899 goto cleanup;
1900 }
1901 }
880 1902
881inserted: 1903inserted:
882 if (!IS_LAST_ENTRY(s->first)) { 1904 if (!IS_LAST_ENTRY(s->first)) {
883 new_bh = ext4_xattr_cache_find(inode, header(s->base), &ce); 1905 new_bh = ext4_xattr_block_cache_find(inode, header(s->base),
1906 &ce);
884 if (new_bh) { 1907 if (new_bh) {
885 /* We found an identical block in the cache. */ 1908 /* We found an identical block in the cache. */
886 if (new_bh == bs->bh) 1909 if (new_bh == bs->bh)
@@ -925,7 +1948,7 @@ inserted:
925 EXT4_C2B(EXT4_SB(sb), 1948 EXT4_C2B(EXT4_SB(sb),
926 1)); 1949 1));
927 brelse(new_bh); 1950 brelse(new_bh);
928 mb_cache_entry_put(ext4_mb_cache, ce); 1951 mb_cache_entry_put(ea_block_cache, ce);
929 ce = NULL; 1952 ce = NULL;
930 new_bh = NULL; 1953 new_bh = NULL;
931 goto inserted; 1954 goto inserted;
@@ -944,8 +1967,8 @@ inserted:
944 if (error) 1967 if (error)
945 goto cleanup_dquot; 1968 goto cleanup_dquot;
946 } 1969 }
947 mb_cache_entry_touch(ext4_mb_cache, ce); 1970 mb_cache_entry_touch(ea_block_cache, ce);
948 mb_cache_entry_put(ext4_mb_cache, ce); 1971 mb_cache_entry_put(ea_block_cache, ce);
949 ce = NULL; 1972 ce = NULL;
950 } else if (bs->bh && s->base == bs->bh->b_data) { 1973 } else if (bs->bh && s->base == bs->bh->b_data) {
951 /* We were modifying this block in-place. */ 1974 /* We were modifying this block in-place. */
@@ -984,6 +2007,22 @@ getblk_failed:
984 EXT4_FREE_BLOCKS_METADATA); 2007 EXT4_FREE_BLOCKS_METADATA);
985 goto cleanup; 2008 goto cleanup;
986 } 2009 }
2010 error = ext4_xattr_inode_inc_ref_all(handle, inode,
2011 ENTRY(header(s->base)+1));
2012 if (error)
2013 goto getblk_failed;
2014 if (ea_inode) {
2015 /* Drop the extra ref on ea_inode. */
2016 error = ext4_xattr_inode_dec_ref(handle,
2017 ea_inode);
2018 if (error)
2019 ext4_warning_inode(ea_inode,
2020 "dec ref error=%d",
2021 error);
2022 iput(ea_inode);
2023 ea_inode = NULL;
2024 }
2025
987 lock_buffer(new_bh); 2026 lock_buffer(new_bh);
988 error = ext4_journal_get_create_access(handle, new_bh); 2027 error = ext4_journal_get_create_access(handle, new_bh);
989 if (error) { 2028 if (error) {
@@ -995,7 +2034,7 @@ getblk_failed:
995 ext4_xattr_block_csum_set(inode, new_bh); 2034 ext4_xattr_block_csum_set(inode, new_bh);
996 set_buffer_uptodate(new_bh); 2035 set_buffer_uptodate(new_bh);
997 unlock_buffer(new_bh); 2036 unlock_buffer(new_bh);
998 ext4_xattr_cache_insert(ext4_mb_cache, new_bh); 2037 ext4_xattr_block_cache_insert(ea_block_cache, new_bh);
999 error = ext4_handle_dirty_metadata(handle, inode, 2038 error = ext4_handle_dirty_metadata(handle, inode,
1000 new_bh); 2039 new_bh);
1001 if (error) 2040 if (error)
@@ -1003,17 +2042,40 @@ getblk_failed:
1003 } 2042 }
1004 } 2043 }
1005 2044
2045 if (old_ea_inode_size)
2046 ext4_xattr_inode_free_quota(inode, old_ea_inode_size);
2047
1006 /* Update the inode. */ 2048 /* Update the inode. */
1007 EXT4_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; 2049 EXT4_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
1008 2050
1009 /* Drop the previous xattr block. */ 2051 /* Drop the previous xattr block. */
1010 if (bs->bh && bs->bh != new_bh) 2052 if (bs->bh && bs->bh != new_bh) {
1011 ext4_xattr_release_block(handle, inode, bs->bh); 2053 struct ext4_xattr_inode_array *ea_inode_array = NULL;
2054
2055 ext4_xattr_release_block(handle, inode, bs->bh,
2056 &ea_inode_array,
2057 0 /* extra_credits */);
2058 ext4_xattr_inode_array_free(ea_inode_array);
2059 }
1012 error = 0; 2060 error = 0;
1013 2061
1014cleanup: 2062cleanup:
2063 if (ea_inode) {
2064 int error2;
2065
2066 error2 = ext4_xattr_inode_dec_ref(handle, ea_inode);
2067 if (error2)
2068 ext4_warning_inode(ea_inode, "dec ref error=%d",
2069 error2);
2070
2071 /* If there was an error, revert the quota charge. */
2072 if (error)
2073 ext4_xattr_inode_free_quota(inode,
2074 i_size_read(ea_inode));
2075 iput(ea_inode);
2076 }
1015 if (ce) 2077 if (ce)
1016 mb_cache_entry_put(ext4_mb_cache, ce); 2078 mb_cache_entry_put(ea_block_cache, ce);
1017 brelse(new_bh); 2079 brelse(new_bh);
1018 if (!(bs->bh && s->base == bs->bh->b_data)) 2080 if (!(bs->bh && s->base == bs->bh->b_data))
1019 kfree(s->base); 2081 kfree(s->base);
@@ -1070,7 +2132,7 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
1070 2132
1071 if (EXT4_I(inode)->i_extra_isize == 0) 2133 if (EXT4_I(inode)->i_extra_isize == 0)
1072 return -ENOSPC; 2134 return -ENOSPC;
1073 error = ext4_xattr_set_entry(i, s); 2135 error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
1074 if (error) { 2136 if (error) {
1075 if (error == -ENOSPC && 2137 if (error == -ENOSPC &&
1076 ext4_has_inline_data(inode)) { 2138 ext4_has_inline_data(inode)) {
@@ -1082,7 +2144,8 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
1082 error = ext4_xattr_ibody_find(inode, i, is); 2144 error = ext4_xattr_ibody_find(inode, i, is);
1083 if (error) 2145 if (error)
1084 return error; 2146 return error;
1085 error = ext4_xattr_set_entry(i, s); 2147 error = ext4_xattr_set_entry(i, s, handle, inode,
2148 false /* is_block */);
1086 } 2149 }
1087 if (error) 2150 if (error)
1088 return error; 2151 return error;
@@ -1098,7 +2161,7 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
1098 return 0; 2161 return 0;
1099} 2162}
1100 2163
1101static int ext4_xattr_ibody_set(struct inode *inode, 2164static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
1102 struct ext4_xattr_info *i, 2165 struct ext4_xattr_info *i,
1103 struct ext4_xattr_ibody_find *is) 2166 struct ext4_xattr_ibody_find *is)
1104{ 2167{
@@ -1108,7 +2171,7 @@ static int ext4_xattr_ibody_set(struct inode *inode,
1108 2171
1109 if (EXT4_I(inode)->i_extra_isize == 0) 2172 if (EXT4_I(inode)->i_extra_isize == 0)
1110 return -ENOSPC; 2173 return -ENOSPC;
1111 error = ext4_xattr_set_entry(i, s); 2174 error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
1112 if (error) 2175 if (error)
1113 return error; 2176 return error;
1114 header = IHDR(inode, ext4_raw_inode(&is->iloc)); 2177 header = IHDR(inode, ext4_raw_inode(&is->iloc));
@@ -1127,12 +2190,31 @@ static int ext4_xattr_value_same(struct ext4_xattr_search *s,
1127{ 2190{
1128 void *value; 2191 void *value;
1129 2192
2193 /* When e_value_inum is set the value is stored externally. */
2194 if (s->here->e_value_inum)
2195 return 0;
1130 if (le32_to_cpu(s->here->e_value_size) != i->value_len) 2196 if (le32_to_cpu(s->here->e_value_size) != i->value_len)
1131 return 0; 2197 return 0;
1132 value = ((void *)s->base) + le16_to_cpu(s->here->e_value_offs); 2198 value = ((void *)s->base) + le16_to_cpu(s->here->e_value_offs);
1133 return !memcmp(value, i->value, i->value_len); 2199 return !memcmp(value, i->value, i->value_len);
1134} 2200}
1135 2201
2202static struct buffer_head *ext4_xattr_get_block(struct inode *inode)
2203{
2204 struct buffer_head *bh;
2205 int error;
2206
2207 if (!EXT4_I(inode)->i_file_acl)
2208 return NULL;
2209 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
2210 if (!bh)
2211 return ERR_PTR(-EIO);
2212 error = ext4_xattr_check_block(inode, bh);
2213 if (error)
2214 return ERR_PTR(error);
2215 return bh;
2216}
2217
1136/* 2218/*
1137 * ext4_xattr_set_handle() 2219 * ext4_xattr_set_handle()
1138 * 2220 *
@@ -1155,7 +2237,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
1155 .name = name, 2237 .name = name,
1156 .value = value, 2238 .value = value,
1157 .value_len = value_len, 2239 .value_len = value_len,
1158 2240 .in_inode = 0,
1159 }; 2241 };
1160 struct ext4_xattr_ibody_find is = { 2242 struct ext4_xattr_ibody_find is = {
1161 .s = { .not_found = -ENODATA, }, 2243 .s = { .not_found = -ENODATA, },
@@ -1173,6 +2255,28 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
1173 2255
1174 ext4_write_lock_xattr(inode, &no_expand); 2256 ext4_write_lock_xattr(inode, &no_expand);
1175 2257
2258 /* Check journal credits under write lock. */
2259 if (ext4_handle_valid(handle)) {
2260 struct buffer_head *bh;
2261 int credits;
2262
2263 bh = ext4_xattr_get_block(inode);
2264 if (IS_ERR(bh)) {
2265 error = PTR_ERR(bh);
2266 goto cleanup;
2267 }
2268
2269 credits = __ext4_xattr_set_credits(inode->i_sb, inode, bh,
2270 value_len,
2271 flags & XATTR_CREATE);
2272 brelse(bh);
2273
2274 if (!ext4_handle_has_enough_credits(handle, credits)) {
2275 error = -ENOSPC;
2276 goto cleanup;
2277 }
2278 }
2279
1176 error = ext4_reserve_inode_write(handle, inode, &is.iloc); 2280 error = ext4_reserve_inode_write(handle, inode, &is.iloc);
1177 if (error) 2281 if (error)
1178 goto cleanup; 2282 goto cleanup;
@@ -1202,9 +2306,10 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
1202 if (flags & XATTR_CREATE) 2306 if (flags & XATTR_CREATE)
1203 goto cleanup; 2307 goto cleanup;
1204 } 2308 }
2309
1205 if (!value) { 2310 if (!value) {
1206 if (!is.s.not_found) 2311 if (!is.s.not_found)
1207 error = ext4_xattr_ibody_set(inode, &i, &is); 2312 error = ext4_xattr_ibody_set(handle, inode, &i, &is);
1208 else if (!bs.s.not_found) 2313 else if (!bs.s.not_found)
1209 error = ext4_xattr_block_set(handle, inode, &i, &bs); 2314 error = ext4_xattr_block_set(handle, inode, &i, &bs);
1210 } else { 2315 } else {
@@ -1215,7 +2320,12 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
1215 if (!bs.s.not_found && ext4_xattr_value_same(&bs.s, &i)) 2320 if (!bs.s.not_found && ext4_xattr_value_same(&bs.s, &i))
1216 goto cleanup; 2321 goto cleanup;
1217 2322
1218 error = ext4_xattr_ibody_set(inode, &i, &is); 2323 if (ext4_has_feature_ea_inode(inode->i_sb) &&
2324 (EXT4_XATTR_SIZE(i.value_len) >
2325 EXT4_XATTR_MIN_LARGE_EA_SIZE(inode->i_sb->s_blocksize)))
2326 i.in_inode = 1;
2327retry_inode:
2328 error = ext4_xattr_ibody_set(handle, inode, &i, &is);
1219 if (!error && !bs.s.not_found) { 2329 if (!error && !bs.s.not_found) {
1220 i.value = NULL; 2330 i.value = NULL;
1221 error = ext4_xattr_block_set(handle, inode, &i, &bs); 2331 error = ext4_xattr_block_set(handle, inode, &i, &bs);
@@ -1226,11 +2336,20 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
1226 goto cleanup; 2336 goto cleanup;
1227 } 2337 }
1228 error = ext4_xattr_block_set(handle, inode, &i, &bs); 2338 error = ext4_xattr_block_set(handle, inode, &i, &bs);
1229 if (error) 2339 if (!error && !is.s.not_found) {
1230 goto cleanup;
1231 if (!is.s.not_found) {
1232 i.value = NULL; 2340 i.value = NULL;
1233 error = ext4_xattr_ibody_set(inode, &i, &is); 2341 error = ext4_xattr_ibody_set(handle, inode, &i,
2342 &is);
2343 } else if (error == -ENOSPC) {
2344 /*
2345 * Xattr does not fit in the block, store at
2346 * external inode if possible.
2347 */
2348 if (ext4_has_feature_ea_inode(inode->i_sb) &&
2349 !i.in_inode) {
2350 i.in_inode = 1;
2351 goto retry_inode;
2352 }
1234 } 2353 }
1235 } 2354 }
1236 } 2355 }
@@ -1256,6 +2375,33 @@ cleanup:
1256 return error; 2375 return error;
1257} 2376}
1258 2377
2378int ext4_xattr_set_credits(struct inode *inode, size_t value_len,
2379 bool is_create, int *credits)
2380{
2381 struct buffer_head *bh;
2382 int err;
2383
2384 *credits = 0;
2385
2386 if (!EXT4_SB(inode->i_sb)->s_journal)
2387 return 0;
2388
2389 down_read(&EXT4_I(inode)->xattr_sem);
2390
2391 bh = ext4_xattr_get_block(inode);
2392 if (IS_ERR(bh)) {
2393 err = PTR_ERR(bh);
2394 } else {
2395 *credits = __ext4_xattr_set_credits(inode->i_sb, inode, bh,
2396 value_len, is_create);
2397 brelse(bh);
2398 err = 0;
2399 }
2400
2401 up_read(&EXT4_I(inode)->xattr_sem);
2402 return err;
2403}
2404
1259/* 2405/*
1260 * ext4_xattr_set() 2406 * ext4_xattr_set()
1261 * 2407 *
@@ -1269,13 +2415,20 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
1269 const void *value, size_t value_len, int flags) 2415 const void *value, size_t value_len, int flags)
1270{ 2416{
1271 handle_t *handle; 2417 handle_t *handle;
2418 struct super_block *sb = inode->i_sb;
1272 int error, retries = 0; 2419 int error, retries = 0;
1273 int credits = ext4_jbd2_credits_xattr(inode); 2420 int credits;
1274 2421
1275 error = dquot_initialize(inode); 2422 error = dquot_initialize(inode);
1276 if (error) 2423 if (error)
1277 return error; 2424 return error;
2425
1278retry: 2426retry:
2427 error = ext4_xattr_set_credits(inode, value_len, flags & XATTR_CREATE,
2428 &credits);
2429 if (error)
2430 return error;
2431
1279 handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits); 2432 handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
1280 if (IS_ERR(handle)) { 2433 if (IS_ERR(handle)) {
1281 error = PTR_ERR(handle); 2434 error = PTR_ERR(handle);
@@ -1286,7 +2439,7 @@ retry:
1286 value, value_len, flags); 2439 value, value_len, flags);
1287 error2 = ext4_journal_stop(handle); 2440 error2 = ext4_journal_stop(handle);
1288 if (error == -ENOSPC && 2441 if (error == -ENOSPC &&
1289 ext4_should_retry_alloc(inode->i_sb, &retries)) 2442 ext4_should_retry_alloc(sb, &retries))
1290 goto retry; 2443 goto retry;
1291 if (error == 0) 2444 if (error == 0)
1292 error = error2; 2445 error = error2;
@@ -1311,7 +2464,7 @@ static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry,
1311 2464
1312 /* Adjust the value offsets of the entries */ 2465 /* Adjust the value offsets of the entries */
1313 for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { 2466 for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
1314 if (last->e_value_size) { 2467 if (!last->e_value_inum && last->e_value_size) {
1315 new_offs = le16_to_cpu(last->e_value_offs) + 2468 new_offs = le16_to_cpu(last->e_value_offs) +
1316 value_offs_shift; 2469 value_offs_shift;
1317 last->e_value_offs = cpu_to_le16(new_offs); 2470 last->e_value_offs = cpu_to_le16(new_offs);
@@ -1331,18 +2484,16 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
1331 struct ext4_xattr_ibody_find *is = NULL; 2484 struct ext4_xattr_ibody_find *is = NULL;
1332 struct ext4_xattr_block_find *bs = NULL; 2485 struct ext4_xattr_block_find *bs = NULL;
1333 char *buffer = NULL, *b_entry_name = NULL; 2486 char *buffer = NULL, *b_entry_name = NULL;
1334 size_t value_offs, value_size; 2487 size_t value_size = le32_to_cpu(entry->e_value_size);
1335 struct ext4_xattr_info i = { 2488 struct ext4_xattr_info i = {
1336 .value = NULL, 2489 .value = NULL,
1337 .value_len = 0, 2490 .value_len = 0,
1338 .name_index = entry->e_name_index, 2491 .name_index = entry->e_name_index,
2492 .in_inode = !!entry->e_value_inum,
1339 }; 2493 };
1340 struct ext4_xattr_ibody_header *header = IHDR(inode, raw_inode); 2494 struct ext4_xattr_ibody_header *header = IHDR(inode, raw_inode);
1341 int error; 2495 int error;
1342 2496
1343 value_offs = le16_to_cpu(entry->e_value_offs);
1344 value_size = le32_to_cpu(entry->e_value_size);
1345
1346 is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS); 2497 is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
1347 bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS); 2498 bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS);
1348 buffer = kmalloc(value_size, GFP_NOFS); 2499 buffer = kmalloc(value_size, GFP_NOFS);
@@ -1358,7 +2509,15 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
1358 bs->bh = NULL; 2509 bs->bh = NULL;
1359 2510
1360 /* Save the entry name and the entry value */ 2511 /* Save the entry name and the entry value */
1361 memcpy(buffer, (void *)IFIRST(header) + value_offs, value_size); 2512 if (entry->e_value_inum) {
2513 error = ext4_xattr_inode_get(inode, entry, buffer, value_size);
2514 if (error)
2515 goto out;
2516 } else {
2517 size_t value_offs = le16_to_cpu(entry->e_value_offs);
2518 memcpy(buffer, (void *)IFIRST(header) + value_offs, value_size);
2519 }
2520
1362 memcpy(b_entry_name, entry->e_name, entry->e_name_len); 2521 memcpy(b_entry_name, entry->e_name, entry->e_name_len);
1363 b_entry_name[entry->e_name_len] = '\0'; 2522 b_entry_name[entry->e_name_len] = '\0';
1364 i.name = b_entry_name; 2523 i.name = b_entry_name;
@@ -1372,11 +2531,10 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
1372 goto out; 2531 goto out;
1373 2532
1374 /* Remove the chosen entry from the inode */ 2533 /* Remove the chosen entry from the inode */
1375 error = ext4_xattr_ibody_set(inode, &i, is); 2534 error = ext4_xattr_ibody_set(handle, inode, &i, is);
1376 if (error) 2535 if (error)
1377 goto out; 2536 goto out;
1378 2537
1379 i.name = b_entry_name;
1380 i.value = buffer; 2538 i.value = buffer;
1381 i.value_len = value_size; 2539 i.value_len = value_size;
1382 error = ext4_xattr_block_find(inode, &i, bs); 2540 error = ext4_xattr_block_find(inode, &i, bs);
@@ -1420,9 +2578,10 @@ static int ext4_xattr_make_inode_space(handle_t *handle, struct inode *inode,
1420 last = IFIRST(header); 2578 last = IFIRST(header);
1421 /* Find the entry best suited to be pushed into EA block */ 2579 /* Find the entry best suited to be pushed into EA block */
1422 for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { 2580 for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
1423 total_size = 2581 total_size = EXT4_XATTR_LEN(last->e_name_len);
1424 EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) + 2582 if (!last->e_value_inum)
1425 EXT4_XATTR_LEN(last->e_name_len); 2583 total_size += EXT4_XATTR_SIZE(
2584 le32_to_cpu(last->e_value_size));
1426 if (total_size <= bfree && 2585 if (total_size <= bfree &&
1427 total_size < min_total_size) { 2586 total_size < min_total_size) {
1428 if (total_size + ifree < isize_diff) { 2587 if (total_size + ifree < isize_diff) {
@@ -1441,8 +2600,10 @@ static int ext4_xattr_make_inode_space(handle_t *handle, struct inode *inode,
1441 } 2600 }
1442 2601
1443 entry_size = EXT4_XATTR_LEN(entry->e_name_len); 2602 entry_size = EXT4_XATTR_LEN(entry->e_name_len);
1444 total_size = entry_size + 2603 total_size = entry_size;
1445 EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)); 2604 if (!entry->e_value_inum)
2605 total_size += EXT4_XATTR_SIZE(
2606 le32_to_cpu(entry->e_value_size));
1446 error = ext4_xattr_move_to_block(handle, inode, raw_inode, 2607 error = ext4_xattr_move_to_block(handle, inode, raw_inode,
1447 entry); 2608 entry);
1448 if (error) 2609 if (error)
@@ -1571,51 +2732,172 @@ cleanup:
1571 return error; 2732 return error;
1572} 2733}
1573 2734
2735#define EIA_INCR 16 /* must be 2^n */
2736#define EIA_MASK (EIA_INCR - 1)
1574 2737
2738/* Add the large xattr @inode into @ea_inode_array for deferred iput().
2739 * If @ea_inode_array is new or full it will be grown and the old
2740 * contents copied over.
2741 */
2742static int
2743ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
2744 struct inode *inode)
2745{
2746 if (*ea_inode_array == NULL) {
2747 /*
2748 * Start with 15 inodes, so it fits into a power-of-two size.
2749 * If *ea_inode_array is NULL, this is essentially offsetof()
2750 */
2751 (*ea_inode_array) =
2752 kmalloc(offsetof(struct ext4_xattr_inode_array,
2753 inodes[EIA_MASK]),
2754 GFP_NOFS);
2755 if (*ea_inode_array == NULL)
2756 return -ENOMEM;
2757 (*ea_inode_array)->count = 0;
2758 } else if (((*ea_inode_array)->count & EIA_MASK) == EIA_MASK) {
2759 /* expand the array once all 15 + n * 16 slots are full */
2760 struct ext4_xattr_inode_array *new_array = NULL;
2761 int count = (*ea_inode_array)->count;
2762
2763 /* if new_array is NULL, this is essentially offsetof() */
2764 new_array = kmalloc(
2765 offsetof(struct ext4_xattr_inode_array,
2766 inodes[count + EIA_INCR]),
2767 GFP_NOFS);
2768 if (new_array == NULL)
2769 return -ENOMEM;
2770 memcpy(new_array, *ea_inode_array,
2771 offsetof(struct ext4_xattr_inode_array, inodes[count]));
2772 kfree(*ea_inode_array);
2773 *ea_inode_array = new_array;
2774 }
2775 (*ea_inode_array)->inodes[(*ea_inode_array)->count++] = inode;
2776 return 0;
2777}
1575 2778
1576/* 2779/*
1577 * ext4_xattr_delete_inode() 2780 * ext4_xattr_delete_inode()
1578 * 2781 *
1579 * Free extended attribute resources associated with this inode. This 2782 * Free extended attribute resources associated with this inode. Traverse
1580 * is called immediately before an inode is freed. We have exclusive 2783 * all entries and decrement reference on any xattr inodes associated with this
1581 * access to the inode. 2784 * inode. This is called immediately before an inode is freed. We have exclusive
2785 * access to the inode. If an orphan inode is deleted it will also release its
2786 * references on xattr block and xattr inodes.
1582 */ 2787 */
1583void 2788int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
1584ext4_xattr_delete_inode(handle_t *handle, struct inode *inode) 2789 struct ext4_xattr_inode_array **ea_inode_array,
2790 int extra_credits)
1585{ 2791{
1586 struct buffer_head *bh = NULL; 2792 struct buffer_head *bh = NULL;
2793 struct ext4_xattr_ibody_header *header;
2794 struct ext4_iloc iloc = { .bh = NULL };
2795 struct ext4_xattr_entry *entry;
2796 int error;
1587 2797
1588 if (!EXT4_I(inode)->i_file_acl) 2798 error = ext4_xattr_ensure_credits(handle, inode, extra_credits,
1589 goto cleanup; 2799 NULL /* bh */,
1590 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); 2800 false /* dirty */,
1591 if (!bh) { 2801 false /* block_csum */);
1592 EXT4_ERROR_INODE(inode, "block %llu read error", 2802 if (error) {
1593 EXT4_I(inode)->i_file_acl); 2803 EXT4_ERROR_INODE(inode, "ensure credits (error %d)", error);
1594 goto cleanup; 2804 goto cleanup;
1595 } 2805 }
1596 if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || 2806
1597 BHDR(bh)->h_blocks != cpu_to_le32(1)) { 2807 if (ext4_has_feature_ea_inode(inode->i_sb) &&
1598 EXT4_ERROR_INODE(inode, "bad block %llu", 2808 ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
1599 EXT4_I(inode)->i_file_acl); 2809
1600 goto cleanup; 2810 error = ext4_get_inode_loc(inode, &iloc);
2811 if (error) {
2812 EXT4_ERROR_INODE(inode, "inode loc (error %d)", error);
2813 goto cleanup;
2814 }
2815
2816 error = ext4_journal_get_write_access(handle, iloc.bh);
2817 if (error) {
2818 EXT4_ERROR_INODE(inode, "write access (error %d)",
2819 error);
2820 goto cleanup;
2821 }
2822
2823 header = IHDR(inode, ext4_raw_inode(&iloc));
2824 if (header->h_magic == cpu_to_le32(EXT4_XATTR_MAGIC))
2825 ext4_xattr_inode_dec_ref_all(handle, inode, iloc.bh,
2826 IFIRST(header),
2827 false /* block_csum */,
2828 ea_inode_array,
2829 extra_credits,
2830 false /* skip_quota */);
1601 } 2831 }
1602 ext4_xattr_release_block(handle, inode, bh);
1603 EXT4_I(inode)->i_file_acl = 0;
1604 2832
2833 if (EXT4_I(inode)->i_file_acl) {
2834 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
2835 if (!bh) {
2836 EXT4_ERROR_INODE(inode, "block %llu read error",
2837 EXT4_I(inode)->i_file_acl);
2838 error = -EIO;
2839 goto cleanup;
2840 }
2841 error = ext4_xattr_check_block(inode, bh);
2842 if (error) {
2843 EXT4_ERROR_INODE(inode, "bad block %llu (error %d)",
2844 EXT4_I(inode)->i_file_acl, error);
2845 goto cleanup;
2846 }
2847
2848 if (ext4_has_feature_ea_inode(inode->i_sb)) {
2849 for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry);
2850 entry = EXT4_XATTR_NEXT(entry))
2851 if (entry->e_value_inum)
2852 ext4_xattr_inode_free_quota(inode,
2853 le32_to_cpu(entry->e_value_size));
2854
2855 }
2856
2857 ext4_xattr_release_block(handle, inode, bh, ea_inode_array,
2858 extra_credits);
2859 /*
2860 * Update i_file_acl value in the same transaction that releases
2861 * block.
2862 */
2863 EXT4_I(inode)->i_file_acl = 0;
2864 error = ext4_mark_inode_dirty(handle, inode);
2865 if (error) {
2866 EXT4_ERROR_INODE(inode, "mark inode dirty (error %d)",
2867 error);
2868 goto cleanup;
2869 }
2870 }
2871 error = 0;
1605cleanup: 2872cleanup:
2873 brelse(iloc.bh);
1606 brelse(bh); 2874 brelse(bh);
2875 return error;
2876}
2877
2878void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *ea_inode_array)
2879{
2880 int idx;
2881
2882 if (ea_inode_array == NULL)
2883 return;
2884
2885 for (idx = 0; idx < ea_inode_array->count; ++idx)
2886 iput(ea_inode_array->inodes[idx]);
2887 kfree(ea_inode_array);
1607} 2888}
1608 2889
1609/* 2890/*
1610 * ext4_xattr_cache_insert() 2891 * ext4_xattr_block_cache_insert()
1611 * 2892 *
1612 * Create a new entry in the extended attribute cache, and insert 2893 * Create a new entry in the extended attribute block cache, and insert
1613 * it unless such an entry is already in the cache. 2894 * it unless such an entry is already in the cache.
1614 * 2895 *
1615 * Returns 0, or a negative error number on failure. 2896 * Returns 0, or a negative error number on failure.
1616 */ 2897 */
1617static void 2898static void
1618ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh) 2899ext4_xattr_block_cache_insert(struct mb_cache *ea_block_cache,
2900 struct buffer_head *bh)
1619{ 2901{
1620 struct ext4_xattr_header *header = BHDR(bh); 2902 struct ext4_xattr_header *header = BHDR(bh);
1621 __u32 hash = le32_to_cpu(header->h_hash); 2903 __u32 hash = le32_to_cpu(header->h_hash);
@@ -1623,7 +2905,9 @@ ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
1623 EXT4_XATTR_REFCOUNT_MAX; 2905 EXT4_XATTR_REFCOUNT_MAX;
1624 int error; 2906 int error;
1625 2907
1626 error = mb_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash, 2908 if (!ea_block_cache)
2909 return;
2910 error = mb_cache_entry_create(ea_block_cache, GFP_NOFS, hash,
1627 bh->b_blocknr, reusable); 2911 bh->b_blocknr, reusable);
1628 if (error) { 2912 if (error) {
1629 if (error == -EBUSY) 2913 if (error == -EBUSY)
@@ -1655,11 +2939,11 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1,
1655 entry1->e_name_index != entry2->e_name_index || 2939 entry1->e_name_index != entry2->e_name_index ||
1656 entry1->e_name_len != entry2->e_name_len || 2940 entry1->e_name_len != entry2->e_name_len ||
1657 entry1->e_value_size != entry2->e_value_size || 2941 entry1->e_value_size != entry2->e_value_size ||
2942 entry1->e_value_inum != entry2->e_value_inum ||
1658 memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) 2943 memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
1659 return 1; 2944 return 1;
1660 if (entry1->e_value_block != 0 || entry2->e_value_block != 0) 2945 if (!entry1->e_value_inum &&
1661 return -EFSCORRUPTED; 2946 memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
1662 if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
1663 (char *)header2 + le16_to_cpu(entry2->e_value_offs), 2947 (char *)header2 + le16_to_cpu(entry2->e_value_offs),
1664 le32_to_cpu(entry1->e_value_size))) 2948 le32_to_cpu(entry1->e_value_size)))
1665 return 1; 2949 return 1;
@@ -1673,7 +2957,7 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1,
1673} 2957}
1674 2958
1675/* 2959/*
1676 * ext4_xattr_cache_find() 2960 * ext4_xattr_block_cache_find()
1677 * 2961 *
1678 * Find an identical extended attribute block. 2962 * Find an identical extended attribute block.
1679 * 2963 *
@@ -1681,30 +2965,33 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1,
1681 * not found or an error occurred. 2965 * not found or an error occurred.
1682 */ 2966 */
1683static struct buffer_head * 2967static struct buffer_head *
1684ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header, 2968ext4_xattr_block_cache_find(struct inode *inode,
1685 struct mb_cache_entry **pce) 2969 struct ext4_xattr_header *header,
2970 struct mb_cache_entry **pce)
1686{ 2971{
1687 __u32 hash = le32_to_cpu(header->h_hash); 2972 __u32 hash = le32_to_cpu(header->h_hash);
1688 struct mb_cache_entry *ce; 2973 struct mb_cache_entry *ce;
1689 struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); 2974 struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
1690 2975
2976 if (!ea_block_cache)
2977 return NULL;
1691 if (!header->h_hash) 2978 if (!header->h_hash)
1692 return NULL; /* never share */ 2979 return NULL; /* never share */
1693 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); 2980 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
1694 ce = mb_cache_entry_find_first(ext4_mb_cache, hash); 2981 ce = mb_cache_entry_find_first(ea_block_cache, hash);
1695 while (ce) { 2982 while (ce) {
1696 struct buffer_head *bh; 2983 struct buffer_head *bh;
1697 2984
1698 bh = sb_bread(inode->i_sb, ce->e_block); 2985 bh = sb_bread(inode->i_sb, ce->e_value);
1699 if (!bh) { 2986 if (!bh) {
1700 EXT4_ERROR_INODE(inode, "block %lu read error", 2987 EXT4_ERROR_INODE(inode, "block %lu read error",
1701 (unsigned long) ce->e_block); 2988 (unsigned long)ce->e_value);
1702 } else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) { 2989 } else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
1703 *pce = ce; 2990 *pce = ce;
1704 return bh; 2991 return bh;
1705 } 2992 }
1706 brelse(bh); 2993 brelse(bh);
1707 ce = mb_cache_entry_find_next(ext4_mb_cache, ce); 2994 ce = mb_cache_entry_find_next(ea_block_cache, ce);
1708 } 2995 }
1709 return NULL; 2996 return NULL;
1710} 2997}
@@ -1717,30 +3004,22 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
1717 * 3004 *
1718 * Compute the hash of an extended attribute. 3005 * Compute the hash of an extended attribute.
1719 */ 3006 */
1720static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header, 3007static __le32 ext4_xattr_hash_entry(char *name, size_t name_len, __le32 *value,
1721 struct ext4_xattr_entry *entry) 3008 size_t value_count)
1722{ 3009{
1723 __u32 hash = 0; 3010 __u32 hash = 0;
1724 char *name = entry->e_name;
1725 int n;
1726 3011
1727 for (n = 0; n < entry->e_name_len; n++) { 3012 while (name_len--) {
1728 hash = (hash << NAME_HASH_SHIFT) ^ 3013 hash = (hash << NAME_HASH_SHIFT) ^
1729 (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ 3014 (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
1730 *name++; 3015 *name++;
1731 } 3016 }
1732 3017 while (value_count--) {
1733 if (entry->e_value_size != 0) { 3018 hash = (hash << VALUE_HASH_SHIFT) ^
1734 __le32 *value = (__le32 *)((char *)header + 3019 (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
1735 le16_to_cpu(entry->e_value_offs)); 3020 le32_to_cpu(*value++);
1736 for (n = (le32_to_cpu(entry->e_value_size) +
1737 EXT4_XATTR_ROUND) >> EXT4_XATTR_PAD_BITS; n; n--) {
1738 hash = (hash << VALUE_HASH_SHIFT) ^
1739 (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
1740 le32_to_cpu(*value++);
1741 }
1742 } 3021 }
1743 entry->e_hash = cpu_to_le32(hash); 3022 return cpu_to_le32(hash);
1744} 3023}
1745 3024
1746#undef NAME_HASH_SHIFT 3025#undef NAME_HASH_SHIFT
@@ -1753,13 +3032,11 @@ static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
1753 * 3032 *
1754 * Re-compute the extended attribute hash value after an entry has changed. 3033 * Re-compute the extended attribute hash value after an entry has changed.
1755 */ 3034 */
1756static void ext4_xattr_rehash(struct ext4_xattr_header *header, 3035static void ext4_xattr_rehash(struct ext4_xattr_header *header)
1757 struct ext4_xattr_entry *entry)
1758{ 3036{
1759 struct ext4_xattr_entry *here; 3037 struct ext4_xattr_entry *here;
1760 __u32 hash = 0; 3038 __u32 hash = 0;
1761 3039
1762 ext4_xattr_hash_entry(header, entry);
1763 here = ENTRY(header+1); 3040 here = ENTRY(header+1);
1764 while (!IS_LAST_ENTRY(here)) { 3041 while (!IS_LAST_ENTRY(here)) {
1765 if (!here->e_hash) { 3042 if (!here->e_hash) {
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 099c8b670ef5..0d2dde1fa87a 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -44,7 +44,7 @@ struct ext4_xattr_entry {
44 __u8 e_name_len; /* length of name */ 44 __u8 e_name_len; /* length of name */
45 __u8 e_name_index; /* attribute name index */ 45 __u8 e_name_index; /* attribute name index */
46 __le16 e_value_offs; /* offset in disk block of value */ 46 __le16 e_value_offs; /* offset in disk block of value */
47 __le32 e_value_block; /* disk block attribute is stored on (n/i) */ 47 __le32 e_value_inum; /* inode in which the value is stored */
48 __le32 e_value_size; /* size of attribute value */ 48 __le32 e_value_size; /* size of attribute value */
49 __le32 e_hash; /* hash value of name and value */ 49 __le32 e_hash; /* hash value of name and value */
50 char e_name[0]; /* attribute name */ 50 char e_name[0]; /* attribute name */
@@ -69,6 +69,13 @@ struct ext4_xattr_entry {
69 EXT4_I(inode)->i_extra_isize)) 69 EXT4_I(inode)->i_extra_isize))
70#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) 70#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
71 71
72/*
73 * The minimum size of EA value when you start storing it in an external inode
74 * size of block - size of header - size of 1 entry - 4 null bytes
75*/
76#define EXT4_XATTR_MIN_LARGE_EA_SIZE(b) \
77 ((b) - EXT4_XATTR_LEN(3) - sizeof(struct ext4_xattr_header) - 4)
78
72#define BHDR(bh) ((struct ext4_xattr_header *)((bh)->b_data)) 79#define BHDR(bh) ((struct ext4_xattr_header *)((bh)->b_data))
73#define ENTRY(ptr) ((struct ext4_xattr_entry *)(ptr)) 80#define ENTRY(ptr) ((struct ext4_xattr_entry *)(ptr))
74#define BFIRST(bh) ENTRY(BHDR(bh)+1) 81#define BFIRST(bh) ENTRY(BHDR(bh)+1)
@@ -77,10 +84,11 @@ struct ext4_xattr_entry {
77#define EXT4_ZERO_XATTR_VALUE ((void *)-1) 84#define EXT4_ZERO_XATTR_VALUE ((void *)-1)
78 85
79struct ext4_xattr_info { 86struct ext4_xattr_info {
80 int name_index;
81 const char *name; 87 const char *name;
82 const void *value; 88 const void *value;
83 size_t value_len; 89 size_t value_len;
90 int name_index;
91 int in_inode;
84}; 92};
85 93
86struct ext4_xattr_search { 94struct ext4_xattr_search {
@@ -96,6 +104,11 @@ struct ext4_xattr_ibody_find {
96 struct ext4_iloc iloc; 104 struct ext4_iloc iloc;
97}; 105};
98 106
107struct ext4_xattr_inode_array {
108 unsigned int count; /* # of used items in the array */
109 struct inode *inodes[0];
110};
111
99extern const struct xattr_handler ext4_xattr_user_handler; 112extern const struct xattr_handler ext4_xattr_user_handler;
100extern const struct xattr_handler ext4_xattr_trusted_handler; 113extern const struct xattr_handler ext4_xattr_trusted_handler;
101extern const struct xattr_handler ext4_xattr_security_handler; 114extern const struct xattr_handler ext4_xattr_security_handler;
@@ -139,8 +152,16 @@ extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
139extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t); 152extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
140extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int); 153extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
141extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int); 154extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
155extern int ext4_xattr_set_credits(struct inode *inode, size_t value_len,
156 bool is_create, int *credits);
157extern int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode,
158 struct buffer_head *block_bh, size_t value_len,
159 bool is_create);
142 160
143extern void ext4_xattr_delete_inode(handle_t *, struct inode *); 161extern int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
162 struct ext4_xattr_inode_array **array,
163 int extra_credits);
164extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array);
144 165
145extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, 166extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
146 struct ext4_inode *raw_inode, handle_t *handle); 167 struct ext4_inode *raw_inode, handle_t *handle);
@@ -169,3 +190,11 @@ static inline int ext4_init_security(handle_t *handle, struct inode *inode,
169 return 0; 190 return 0;
170} 191}
171#endif 192#endif
193
194#ifdef CONFIG_LOCKDEP
195extern void ext4_xattr_inode_set_class(struct inode *ea_inode);
196#else
197static inline void ext4_xattr_inode_set_class(struct inode *ea_inode) { }
198#endif
199
200extern int ext4_get_inode_usage(struct inode *inode, qsize_t *usage);
diff --git a/fs/mbcache.c b/fs/mbcache.c
index b19be429d655..d818fd236787 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -10,13 +10,14 @@
10/* 10/*
11 * Mbcache is a simple key-value store. Keys need not be unique, however 11 * Mbcache is a simple key-value store. Keys need not be unique, however
12 * key-value pairs are expected to be unique (we use this fact in 12 * key-value pairs are expected to be unique (we use this fact in
13 * mb_cache_entry_delete_block()). 13 * mb_cache_entry_delete()).
14 * 14 *
15 * Ext2 and ext4 use this cache for deduplication of extended attribute blocks. 15 * Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
16 * They use hash of a block contents as a key and block number as a value. 16 * Ext4 also uses it for deduplication of xattr values stored in inodes.
17 * That's why keys need not be unique (different xattr blocks may end up having 17 * They use hash of data as a key and provide a value that may represent a
18 * the same hash). However block number always uniquely identifies a cache 18 * block or inode number. That's why keys need not be unique (hash of different
19 * entry. 19 * data may be the same). However user provided value always uniquely
20 * identifies a cache entry.
20 * 21 *
21 * We provide functions for creation and removal of entries, search by key, 22 * We provide functions for creation and removal of entries, search by key,
22 * and a special "delete entry with given key-value pair" operation. Fixed 23 * and a special "delete entry with given key-value pair" operation. Fixed
@@ -62,15 +63,15 @@ static inline struct hlist_bl_head *mb_cache_entry_head(struct mb_cache *cache,
62 * @cache - cache where the entry should be created 63 * @cache - cache where the entry should be created
63 * @mask - gfp mask with which the entry should be allocated 64 * @mask - gfp mask with which the entry should be allocated
64 * @key - key of the entry 65 * @key - key of the entry
65 * @block - block that contains data 66 * @value - value of the entry
66 * @reusable - is the block reusable by other inodes? 67 * @reusable - is the entry reusable by others?
67 * 68 *
68 * Creates entry in @cache with key @key and records that data is stored in 69 * Creates entry in @cache with key @key and value @value. The function returns
69 * block @block. The function returns -EBUSY if entry with the same key 70 * -EBUSY if entry with the same key and value already exists in cache.
70 * and for the same block already exists in cache. Otherwise 0 is returned. 71 * Otherwise 0 is returned.
71 */ 72 */
72int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, 73int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
73 sector_t block, bool reusable) 74 u64 value, bool reusable)
74{ 75{
75 struct mb_cache_entry *entry, *dup; 76 struct mb_cache_entry *entry, *dup;
76 struct hlist_bl_node *dup_node; 77 struct hlist_bl_node *dup_node;
@@ -91,12 +92,12 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
91 /* One ref for hash, one ref returned */ 92 /* One ref for hash, one ref returned */
92 atomic_set(&entry->e_refcnt, 1); 93 atomic_set(&entry->e_refcnt, 1);
93 entry->e_key = key; 94 entry->e_key = key;
94 entry->e_block = block; 95 entry->e_value = value;
95 entry->e_reusable = reusable; 96 entry->e_reusable = reusable;
96 head = mb_cache_entry_head(cache, key); 97 head = mb_cache_entry_head(cache, key);
97 hlist_bl_lock(head); 98 hlist_bl_lock(head);
98 hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) { 99 hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
99 if (dup->e_key == key && dup->e_block == block) { 100 if (dup->e_key == key && dup->e_value == value) {
100 hlist_bl_unlock(head); 101 hlist_bl_unlock(head);
101 kmem_cache_free(mb_entry_cache, entry); 102 kmem_cache_free(mb_entry_cache, entry);
102 return -EBUSY; 103 return -EBUSY;
@@ -187,13 +188,13 @@ struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache,
187EXPORT_SYMBOL(mb_cache_entry_find_next); 188EXPORT_SYMBOL(mb_cache_entry_find_next);
188 189
189/* 190/*
190 * mb_cache_entry_get - get a cache entry by block number (and key) 191 * mb_cache_entry_get - get a cache entry by value (and key)
191 * @cache - cache we work with 192 * @cache - cache we work with
192 * @key - key of block number @block 193 * @key - key
193 * @block - block number 194 * @value - value
194 */ 195 */
195struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key, 196struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
196 sector_t block) 197 u64 value)
197{ 198{
198 struct hlist_bl_node *node; 199 struct hlist_bl_node *node;
199 struct hlist_bl_head *head; 200 struct hlist_bl_head *head;
@@ -202,7 +203,7 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
202 head = mb_cache_entry_head(cache, key); 203 head = mb_cache_entry_head(cache, key);
203 hlist_bl_lock(head); 204 hlist_bl_lock(head);
204 hlist_bl_for_each_entry(entry, node, head, e_hash_list) { 205 hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
205 if (entry->e_key == key && entry->e_block == block) { 206 if (entry->e_key == key && entry->e_value == value) {
206 atomic_inc(&entry->e_refcnt); 207 atomic_inc(&entry->e_refcnt);
207 goto out; 208 goto out;
208 } 209 }
@@ -214,15 +215,14 @@ out:
214} 215}
215EXPORT_SYMBOL(mb_cache_entry_get); 216EXPORT_SYMBOL(mb_cache_entry_get);
216 217
217/* mb_cache_entry_delete_block - remove information about block from cache 218/* mb_cache_entry_delete - remove a cache entry
218 * @cache - cache we work with 219 * @cache - cache we work with
219 * @key - key of block @block 220 * @key - key
220 * @block - block number 221 * @value - value
221 * 222 *
222 * Remove entry from cache @cache with key @key with data stored in @block. 223 * Remove entry from cache @cache with key @key and value @value.
223 */ 224 */
224void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key, 225void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value)
225 sector_t block)
226{ 226{
227 struct hlist_bl_node *node; 227 struct hlist_bl_node *node;
228 struct hlist_bl_head *head; 228 struct hlist_bl_head *head;
@@ -231,7 +231,7 @@ void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key,
231 head = mb_cache_entry_head(cache, key); 231 head = mb_cache_entry_head(cache, key);
232 hlist_bl_lock(head); 232 hlist_bl_lock(head);
233 hlist_bl_for_each_entry(entry, node, head, e_hash_list) { 233 hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
234 if (entry->e_key == key && entry->e_block == block) { 234 if (entry->e_key == key && entry->e_value == value) {
235 /* We keep hash list reference to keep entry alive */ 235 /* We keep hash list reference to keep entry alive */
236 hlist_bl_del_init(&entry->e_hash_list); 236 hlist_bl_del_init(&entry->e_hash_list);
237 hlist_bl_unlock(head); 237 hlist_bl_unlock(head);
@@ -248,7 +248,7 @@ void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key,
248 } 248 }
249 hlist_bl_unlock(head); 249 hlist_bl_unlock(head);
250} 250}
251EXPORT_SYMBOL(mb_cache_entry_delete_block); 251EXPORT_SYMBOL(mb_cache_entry_delete);
252 252
253/* mb_cache_entry_touch - cache entry got used 253/* mb_cache_entry_touch - cache entry got used
254 * @cache - cache the entry belongs to 254 * @cache - cache the entry belongs to
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 48813aeaab80..53a17496c5c5 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1910,6 +1910,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
1910{ 1910{
1911 qsize_t space, cur_space; 1911 qsize_t space, cur_space;
1912 qsize_t rsv_space = 0; 1912 qsize_t rsv_space = 0;
1913 qsize_t inode_usage = 1;
1913 struct dquot *transfer_from[MAXQUOTAS] = {}; 1914 struct dquot *transfer_from[MAXQUOTAS] = {};
1914 int cnt, ret = 0; 1915 int cnt, ret = 0;
1915 char is_valid[MAXQUOTAS] = {}; 1916 char is_valid[MAXQUOTAS] = {};
@@ -1919,6 +1920,13 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
1919 1920
1920 if (IS_NOQUOTA(inode)) 1921 if (IS_NOQUOTA(inode))
1921 return 0; 1922 return 0;
1923
1924 if (inode->i_sb->dq_op->get_inode_usage) {
1925 ret = inode->i_sb->dq_op->get_inode_usage(inode, &inode_usage);
1926 if (ret)
1927 return ret;
1928 }
1929
1922 /* Initialize the arrays */ 1930 /* Initialize the arrays */
1923 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1931 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1924 warn_to[cnt].w_type = QUOTA_NL_NOWARN; 1932 warn_to[cnt].w_type = QUOTA_NL_NOWARN;
@@ -1946,7 +1954,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
1946 continue; 1954 continue;
1947 is_valid[cnt] = 1; 1955 is_valid[cnt] = 1;
1948 transfer_from[cnt] = i_dquot(inode)[cnt]; 1956 transfer_from[cnt] = i_dquot(inode)[cnt];
1949 ret = check_idq(transfer_to[cnt], 1, &warn_to[cnt]); 1957 ret = check_idq(transfer_to[cnt], inode_usage, &warn_to[cnt]);
1950 if (ret) 1958 if (ret)
1951 goto over_quota; 1959 goto over_quota;
1952 ret = check_bdq(transfer_to[cnt], space, 0, &warn_to[cnt]); 1960 ret = check_bdq(transfer_to[cnt], space, 0, &warn_to[cnt]);
@@ -1963,7 +1971,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
1963 /* Due to IO error we might not have transfer_from[] structure */ 1971 /* Due to IO error we might not have transfer_from[] structure */
1964 if (transfer_from[cnt]) { 1972 if (transfer_from[cnt]) {
1965 int wtype; 1973 int wtype;
1966 wtype = info_idq_free(transfer_from[cnt], 1); 1974 wtype = info_idq_free(transfer_from[cnt], inode_usage);
1967 if (wtype != QUOTA_NL_NOWARN) 1975 if (wtype != QUOTA_NL_NOWARN)
1968 prepare_warning(&warn_from_inodes[cnt], 1976 prepare_warning(&warn_from_inodes[cnt],
1969 transfer_from[cnt], wtype); 1977 transfer_from[cnt], wtype);
@@ -1971,13 +1979,13 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
1971 if (wtype != QUOTA_NL_NOWARN) 1979 if (wtype != QUOTA_NL_NOWARN)
1972 prepare_warning(&warn_from_space[cnt], 1980 prepare_warning(&warn_from_space[cnt],
1973 transfer_from[cnt], wtype); 1981 transfer_from[cnt], wtype);
1974 dquot_decr_inodes(transfer_from[cnt], 1); 1982 dquot_decr_inodes(transfer_from[cnt], inode_usage);
1975 dquot_decr_space(transfer_from[cnt], cur_space); 1983 dquot_decr_space(transfer_from[cnt], cur_space);
1976 dquot_free_reserved_space(transfer_from[cnt], 1984 dquot_free_reserved_space(transfer_from[cnt],
1977 rsv_space); 1985 rsv_space);
1978 } 1986 }
1979 1987
1980 dquot_incr_inodes(transfer_to[cnt], 1); 1988 dquot_incr_inodes(transfer_to[cnt], inode_usage);
1981 dquot_incr_space(transfer_to[cnt], cur_space); 1989 dquot_incr_space(transfer_to[cnt], cur_space);
1982 dquot_resv_space(transfer_to[cnt], rsv_space); 1990 dquot_resv_space(transfer_to[cnt], rsv_space);
1983 1991
diff --git a/include/linux/fscrypt_common.h b/include/linux/fscrypt_common.h
index e3e1208e0f54..97f738628b36 100644
--- a/include/linux/fscrypt_common.h
+++ b/include/linux/fscrypt_common.h
@@ -83,6 +83,9 @@ struct fscrypt_operations {
83 unsigned (*max_namelen)(struct inode *); 83 unsigned (*max_namelen)(struct inode *);
84}; 84};
85 85
86/* Maximum value for the third parameter of fscrypt_operations.set_context(). */
87#define FSCRYPT_SET_CONTEXT_MAX_SIZE 28
88
86static inline bool fscrypt_dummy_context_enabled(struct inode *inode) 89static inline bool fscrypt_dummy_context_enabled(struct inode *inode)
87{ 90{
88 if (inode->i_sb->s_cop->dummy_context && 91 if (inode->i_sb->s_cop->dummy_context &&
diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
index 86c9a8b480c5..e1bc73414983 100644
--- a/include/linux/mbcache.h
+++ b/include/linux/mbcache.h
@@ -19,15 +19,15 @@ struct mb_cache_entry {
19 u32 e_key; 19 u32 e_key;
20 u32 e_referenced:1; 20 u32 e_referenced:1;
21 u32 e_reusable:1; 21 u32 e_reusable:1;
22 /* Block number of hashed block - stable during lifetime of the entry */ 22 /* User provided value - stable during lifetime of the entry */
23 sector_t e_block; 23 u64 e_value;
24}; 24};
25 25
26struct mb_cache *mb_cache_create(int bucket_bits); 26struct mb_cache *mb_cache_create(int bucket_bits);
27void mb_cache_destroy(struct mb_cache *cache); 27void mb_cache_destroy(struct mb_cache *cache);
28 28
29int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, 29int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
30 sector_t block, bool reusable); 30 u64 value, bool reusable);
31void __mb_cache_entry_free(struct mb_cache_entry *entry); 31void __mb_cache_entry_free(struct mb_cache_entry *entry);
32static inline int mb_cache_entry_put(struct mb_cache *cache, 32static inline int mb_cache_entry_put(struct mb_cache *cache,
33 struct mb_cache_entry *entry) 33 struct mb_cache_entry *entry)
@@ -38,10 +38,9 @@ static inline int mb_cache_entry_put(struct mb_cache *cache,
38 return 1; 38 return 1;
39} 39}
40 40
41void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key, 41void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value);
42 sector_t block);
43struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key, 42struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
44 sector_t block); 43 u64 value);
45struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache, 44struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache,
46 u32 key); 45 u32 key);
47struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache, 46struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache,
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 3434eef2a5aa..bfd077ca6ac3 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -332,6 +332,8 @@ struct dquot_operations {
332 * quota code only */ 332 * quota code only */
333 qsize_t *(*get_reserved_space) (struct inode *); 333 qsize_t *(*get_reserved_space) (struct inode *);
334 int (*get_projid) (struct inode *, kprojid_t *);/* Get project ID */ 334 int (*get_projid) (struct inode *, kprojid_t *);/* Get project ID */
335 /* Get number of inodes that were charged for a given inode */
336 int (*get_inode_usage) (struct inode *, qsize_t *);
335 /* Get next ID with active quota structure */ 337 /* Get next ID with active quota structure */
336 int (*get_next_id) (struct super_block *sb, struct kqid *qid); 338 int (*get_next_id) (struct super_block *sb, struct kqid *qid);
337}; 339};