summaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorTahsin Erdogan <tahsin@google.com>2017-06-22 11:44:55 -0400
committerTheodore Ts'o <tytso@mit.edu>2017-06-22 11:44:55 -0400
commitdec214d00e0d78a08b947d7dccdfdb84407a9f4d (patch)
treed0912312aa9d0deb0b6544445859e09090c1d404 /fs/ext4
parent30a7eb970c3aae6f1b74b2edea896fdca1cbea38 (diff)
ext4: xattr inode deduplication
Ext4 now supports xattr values that are up to 64k in size (vfs limit). Large xattr values are stored in external inodes each one holding a single value. Once written the data blocks of these inodes are immutable. The real world use cases are expected to have a lot of value duplication such as inherited acls etc. To reduce data duplication on disk, this patch implements a deduplicator that allows sharing of xattr inodes. The deduplication is based on an in-memory hash lookup that is a best effort sharing scheme. When a xattr inode is read from disk (i.e. getxattr() call), its crc32c hash is added to a hash table. Before creating a new xattr inode for a value being set, the hash table is checked to see if an existing inode holds an identical value. If such an inode is found, the ref count on that inode is incremented. On value removal the ref count is decremented and if it reaches zero the inode is deleted. The quota charging for such inodes is manually managed. Every reference holder is charged the full size as if there was no sharing happening. This is consistent with how xattr blocks are also charged. [ Fixed up journal credits calculation to handle inline data and the rare case where an shared xattr block can get freed when two thread race on breaking the xattr block sharing. --tytso ] Signed-off-by: Tahsin Erdogan <tahsin@google.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/acl.c5
-rw-r--r--fs/ext4/ext4.h23
-rw-r--r--fs/ext4/inode.c13
-rw-r--r--fs/ext4/super.c37
-rw-r--r--fs/ext4/xattr.c1052
-rw-r--r--fs/ext4/xattr.h17
6 files changed, 857 insertions, 290 deletions
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 74f7ac539e00..8db03e5c78bc 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -238,7 +238,10 @@ ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type)
238 if (error) 238 if (error)
239 return error; 239 return error;
240retry: 240retry:
241 credits = ext4_xattr_set_credits(inode, acl_size); 241 error = ext4_xattr_set_credits(inode, acl_size, &credits);
242 if (error)
243 return error;
244
242 handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits); 245 handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
243 if (IS_ERR(handle)) 246 if (IS_ERR(handle))
244 return PTR_ERR(handle); 247 return PTR_ERR(handle);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 09983c774d31..fe92a63c86cb 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1517,6 +1517,7 @@ struct ext4_sb_info {
1517 long s_es_nr_inode; 1517 long s_es_nr_inode;
1518 struct ext4_es_stats s_es_stats; 1518 struct ext4_es_stats s_es_stats;
1519 struct mb_cache *s_ea_block_cache; 1519 struct mb_cache *s_ea_block_cache;
1520 struct mb_cache *s_ea_inode_cache;
1520 spinlock_t s_es_lock ____cacheline_aligned_in_smp; 1521 spinlock_t s_es_lock ____cacheline_aligned_in_smp;
1521 1522
1522 /* Ratelimit ext4 messages. */ 1523 /* Ratelimit ext4 messages. */
@@ -2100,7 +2101,11 @@ static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc)
2100 return (struct ext4_inode *) (iloc->bh->b_data + iloc->offset); 2101 return (struct ext4_inode *) (iloc->bh->b_data + iloc->offset);
2101} 2102}
2102 2103
2103#define ext4_is_quota_file(inode) IS_NOQUOTA(inode) 2104static inline bool ext4_is_quota_file(struct inode *inode)
2105{
2106 return IS_NOQUOTA(inode) &&
2107 !(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL);
2108}
2104 2109
2105/* 2110/*
2106 * This structure is stuffed into the struct file's private_data field 2111 * This structure is stuffed into the struct file's private_data field
@@ -2493,7 +2498,6 @@ extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
2493extern void ext4_set_inode_flags(struct inode *); 2498extern void ext4_set_inode_flags(struct inode *);
2494extern int ext4_alloc_da_blocks(struct inode *inode); 2499extern int ext4_alloc_da_blocks(struct inode *inode);
2495extern void ext4_set_aops(struct inode *inode); 2500extern void ext4_set_aops(struct inode *inode);
2496extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int chunk);
2497extern int ext4_writepage_trans_blocks(struct inode *); 2501extern int ext4_writepage_trans_blocks(struct inode *);
2498extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); 2502extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
2499extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, 2503extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
@@ -2720,19 +2724,20 @@ extern void ext4_group_desc_csum_set(struct super_block *sb, __u32 group,
2720extern int ext4_register_li_request(struct super_block *sb, 2724extern int ext4_register_li_request(struct super_block *sb,
2721 ext4_group_t first_not_zeroed); 2725 ext4_group_t first_not_zeroed);
2722 2726
2723static inline int ext4_has_group_desc_csum(struct super_block *sb)
2724{
2725 return ext4_has_feature_gdt_csum(sb) ||
2726 EXT4_SB(sb)->s_chksum_driver != NULL;
2727}
2728
2729static inline int ext4_has_metadata_csum(struct super_block *sb) 2727static inline int ext4_has_metadata_csum(struct super_block *sb)
2730{ 2728{
2731 WARN_ON_ONCE(ext4_has_feature_metadata_csum(sb) && 2729 WARN_ON_ONCE(ext4_has_feature_metadata_csum(sb) &&
2732 !EXT4_SB(sb)->s_chksum_driver); 2730 !EXT4_SB(sb)->s_chksum_driver);
2733 2731
2734 return (EXT4_SB(sb)->s_chksum_driver != NULL); 2732 return ext4_has_feature_metadata_csum(sb) &&
2733 (EXT4_SB(sb)->s_chksum_driver != NULL);
2735} 2734}
2735
2736static inline int ext4_has_group_desc_csum(struct super_block *sb)
2737{
2738 return ext4_has_feature_gdt_csum(sb) || ext4_has_metadata_csum(sb);
2739}
2740
2736static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) 2741static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
2737{ 2742{
2738 return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) | 2743 return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 46def73d3472..962f28a0e176 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -139,6 +139,8 @@ static void ext4_invalidatepage(struct page *page, unsigned int offset,
139 unsigned int length); 139 unsigned int length);
140static int __ext4_journalled_writepage(struct page *page, unsigned int len); 140static int __ext4_journalled_writepage(struct page *page, unsigned int len);
141static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); 141static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
142static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
143 int pextents);
142 144
143/* 145/*
144 * Test whether an inode is a fast symlink. 146 * Test whether an inode is a fast symlink.
@@ -4843,8 +4845,15 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4843 } 4845 }
4844 brelse(iloc.bh); 4846 brelse(iloc.bh);
4845 ext4_set_inode_flags(inode); 4847 ext4_set_inode_flags(inode);
4846 if (ei->i_flags & EXT4_EA_INODE_FL) 4848
4849 if (ei->i_flags & EXT4_EA_INODE_FL) {
4847 ext4_xattr_inode_set_class(inode); 4850 ext4_xattr_inode_set_class(inode);
4851
4852 inode_lock(inode);
4853 inode->i_flags |= S_NOQUOTA;
4854 inode_unlock(inode);
4855 }
4856
4848 unlock_new_inode(inode); 4857 unlock_new_inode(inode);
4849 return inode; 4858 return inode;
4850 4859
@@ -5503,7 +5512,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int lblocks,
5503 * 5512 *
5504 * Also account for superblock, inode, quota and xattr blocks 5513 * Also account for superblock, inode, quota and xattr blocks
5505 */ 5514 */
5506int ext4_meta_trans_blocks(struct inode *inode, int lblocks, 5515static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
5507 int pextents) 5516 int pextents)
5508{ 5517{
5509 ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); 5518 ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 380389740575..d501f8256dc4 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -927,6 +927,10 @@ static void ext4_put_super(struct super_block *sb)
927 invalidate_bdev(sbi->journal_bdev); 927 invalidate_bdev(sbi->journal_bdev);
928 ext4_blkdev_remove(sbi); 928 ext4_blkdev_remove(sbi);
929 } 929 }
930 if (sbi->s_ea_inode_cache) {
931 ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
932 sbi->s_ea_inode_cache = NULL;
933 }
930 if (sbi->s_ea_block_cache) { 934 if (sbi->s_ea_block_cache) {
931 ext4_xattr_destroy_cache(sbi->s_ea_block_cache); 935 ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
932 sbi->s_ea_block_cache = NULL; 936 sbi->s_ea_block_cache = NULL;
@@ -1178,7 +1182,10 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
1178 if (res) 1182 if (res)
1179 return res; 1183 return res;
1180retry: 1184retry:
1181 credits = ext4_xattr_set_credits(inode, len); 1185 res = ext4_xattr_set_credits(inode, len, &credits);
1186 if (res)
1187 return res;
1188
1182 handle = ext4_journal_start(inode, EXT4_HT_MISC, credits); 1189 handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
1183 if (IS_ERR(handle)) 1190 if (IS_ERR(handle))
1184 return PTR_ERR(handle); 1191 return PTR_ERR(handle);
@@ -3445,7 +3452,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3445 } 3452 }
3446 3453
3447 /* Load the checksum driver */ 3454 /* Load the checksum driver */
3448 if (ext4_has_feature_metadata_csum(sb)) { 3455 if (ext4_has_feature_metadata_csum(sb) ||
3456 ext4_has_feature_ea_inode(sb)) {
3449 sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); 3457 sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
3450 if (IS_ERR(sbi->s_chksum_driver)) { 3458 if (IS_ERR(sbi->s_chksum_driver)) {
3451 ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); 3459 ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
@@ -3467,7 +3475,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3467 /* Precompute checksum seed for all metadata */ 3475 /* Precompute checksum seed for all metadata */
3468 if (ext4_has_feature_csum_seed(sb)) 3476 if (ext4_has_feature_csum_seed(sb))
3469 sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed); 3477 sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed);
3470 else if (ext4_has_metadata_csum(sb)) 3478 else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb))
3471 sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, 3479 sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
3472 sizeof(es->s_uuid)); 3480 sizeof(es->s_uuid));
3473 3481
@@ -3597,6 +3605,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3597 "The Hurd can't support 64-bit file systems"); 3605 "The Hurd can't support 64-bit file systems");
3598 goto failed_mount; 3606 goto failed_mount;
3599 } 3607 }
3608
3609 /*
3610 * ea_inode feature uses l_i_version field which is not
3611 * available in HURD_COMPAT mode.
3612 */
3613 if (ext4_has_feature_ea_inode(sb)) {
3614 ext4_msg(sb, KERN_ERR,
3615 "ea_inode feature is not supported for Hurd");
3616 goto failed_mount;
3617 }
3600 } 3618 }
3601 3619
3602 if (IS_EXT2_SB(sb)) { 3620 if (IS_EXT2_SB(sb)) {
@@ -4067,6 +4085,15 @@ no_journal:
4067 goto failed_mount_wq; 4085 goto failed_mount_wq;
4068 } 4086 }
4069 4087
4088 if (ext4_has_feature_ea_inode(sb)) {
4089 sbi->s_ea_inode_cache = ext4_xattr_create_cache();
4090 if (!sbi->s_ea_inode_cache) {
4091 ext4_msg(sb, KERN_ERR,
4092 "Failed to create ea_inode_cache");
4093 goto failed_mount_wq;
4094 }
4095 }
4096
4070 if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) && 4097 if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) &&
4071 (blocksize != PAGE_SIZE)) { 4098 (blocksize != PAGE_SIZE)) {
4072 ext4_msg(sb, KERN_ERR, 4099 ext4_msg(sb, KERN_ERR,
@@ -4296,6 +4323,10 @@ failed_mount4:
4296 if (EXT4_SB(sb)->rsv_conversion_wq) 4323 if (EXT4_SB(sb)->rsv_conversion_wq)
4297 destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq); 4324 destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
4298failed_mount_wq: 4325failed_mount_wq:
4326 if (sbi->s_ea_inode_cache) {
4327 ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
4328 sbi->s_ea_inode_cache = NULL;
4329 }
4299 if (sbi->s_ea_block_cache) { 4330 if (sbi->s_ea_block_cache) {
4300 ext4_xattr_destroy_cache(sbi->s_ea_block_cache); 4331 ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
4301 sbi->s_ea_block_cache = NULL; 4332 sbi->s_ea_block_cache = NULL;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 649dc2953901..a4c8fe3692a2 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -108,6 +108,9 @@ const struct xattr_handler *ext4_xattr_handlers[] = {
108#define EA_BLOCK_CACHE(inode) (((struct ext4_sb_info *) \ 108#define EA_BLOCK_CACHE(inode) (((struct ext4_sb_info *) \
109 inode->i_sb->s_fs_info)->s_ea_block_cache) 109 inode->i_sb->s_fs_info)->s_ea_block_cache)
110 110
111#define EA_INODE_CACHE(inode) (((struct ext4_sb_info *) \
112 inode->i_sb->s_fs_info)->s_ea_inode_cache)
113
111static int 114static int
112ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array, 115ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
113 struct inode *inode); 116 struct inode *inode);
@@ -280,15 +283,44 @@ ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index,
280 return cmp ? -ENODATA : 0; 283 return cmp ? -ENODATA : 0;
281} 284}
282 285
286static u32
287ext4_xattr_inode_hash(struct ext4_sb_info *sbi, const void *buffer, size_t size)
288{
289 return ext4_chksum(sbi, sbi->s_csum_seed, buffer, size);
290}
291
292static u64 ext4_xattr_inode_get_ref(struct inode *ea_inode)
293{
294 return ((u64)ea_inode->i_ctime.tv_sec << 32) |
295 ((u32)ea_inode->i_version);
296}
297
298static void ext4_xattr_inode_set_ref(struct inode *ea_inode, u64 ref_count)
299{
300 ea_inode->i_ctime.tv_sec = (u32)(ref_count >> 32);
301 ea_inode->i_version = (u32)ref_count;
302}
303
304static u32 ext4_xattr_inode_get_hash(struct inode *ea_inode)
305{
306 return (u32)ea_inode->i_atime.tv_sec;
307}
308
309static void ext4_xattr_inode_set_hash(struct inode *ea_inode, u32 hash)
310{
311 ea_inode->i_atime.tv_sec = hash;
312}
313
283/* 314/*
284 * Read the EA value from an inode. 315 * Read the EA value from an inode.
285 */ 316 */
286static int ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t size) 317static int ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t size)
287{ 318{
288 unsigned long block = 0; 319 unsigned long block = 0;
289 struct buffer_head *bh = NULL; 320 struct buffer_head *bh;
290 int blocksize = ea_inode->i_sb->s_blocksize; 321 int blocksize = ea_inode->i_sb->s_blocksize;
291 size_t csize, copied = 0; 322 size_t csize, copied = 0;
323 void *copy_pos = buf;
292 324
293 while (copied < size) { 325 while (copied < size) {
294 csize = (size - copied) > blocksize ? blocksize : size - copied; 326 csize = (size - copied) > blocksize ? blocksize : size - copied;
@@ -298,10 +330,10 @@ static int ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t size)
298 if (!bh) 330 if (!bh)
299 return -EFSCORRUPTED; 331 return -EFSCORRUPTED;
300 332
301 memcpy(buf, bh->b_data, csize); 333 memcpy(copy_pos, bh->b_data, csize);
302 brelse(bh); 334 brelse(bh);
303 335
304 buf += csize; 336 copy_pos += csize;
305 block += 1; 337 block += 1;
306 copied += csize; 338 copied += csize;
307 } 339 }
@@ -317,29 +349,24 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
317 inode = ext4_iget(parent->i_sb, ea_ino); 349 inode = ext4_iget(parent->i_sb, ea_ino);
318 if (IS_ERR(inode)) { 350 if (IS_ERR(inode)) {
319 err = PTR_ERR(inode); 351 err = PTR_ERR(inode);
320 ext4_error(parent->i_sb, "error while reading EA inode %lu " 352 ext4_error(parent->i_sb,
321 "err=%d", ea_ino, err); 353 "error while reading EA inode %lu err=%d", ea_ino,
354 err);
322 return err; 355 return err;
323 } 356 }
324 357
325 if (is_bad_inode(inode)) { 358 if (is_bad_inode(inode)) {
326 ext4_error(parent->i_sb, "error while reading EA inode %lu " 359 ext4_error(parent->i_sb,
327 "is_bad_inode", ea_ino); 360 "error while reading EA inode %lu is_bad_inode",
361 ea_ino);
328 err = -EIO; 362 err = -EIO;
329 goto error; 363 goto error;
330 } 364 }
331 365
332 if (EXT4_XATTR_INODE_GET_PARENT(inode) != parent->i_ino ||
333 inode->i_generation != parent->i_generation) {
334 ext4_error(parent->i_sb, "Backpointer from EA inode %lu "
335 "to parent is invalid.", ea_ino);
336 err = -EINVAL;
337 goto error;
338 }
339
340 if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) { 366 if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
341 ext4_error(parent->i_sb, "EA inode %lu does not have " 367 ext4_error(parent->i_sb,
342 "EXT4_EA_INODE_FL flag set.\n", ea_ino); 368 "EA inode %lu does not have EXT4_EA_INODE_FL flag",
369 ea_ino);
343 err = -EINVAL; 370 err = -EINVAL;
344 goto error; 371 goto error;
345 } 372 }
@@ -351,6 +378,20 @@ error:
351 return err; 378 return err;
352} 379}
353 380
381static int
382ext4_xattr_inode_verify_hash(struct inode *ea_inode, void *buffer, size_t size)
383{
384 u32 hash;
385
386 /* Verify stored hash matches calculated hash. */
387 hash = ext4_xattr_inode_hash(EXT4_SB(ea_inode->i_sb), buffer, size);
388 if (hash != ext4_xattr_inode_get_hash(ea_inode))
389 return -EFSCORRUPTED;
390 return 0;
391}
392
393#define EXT4_XATTR_INODE_GET_PARENT(inode) ((__u32)(inode)->i_mtime.tv_sec)
394
354/* 395/*
355 * Read the value from the EA inode. 396 * Read the value from the EA inode.
356 */ 397 */
@@ -358,17 +399,53 @@ static int
358ext4_xattr_inode_get(struct inode *inode, unsigned long ea_ino, void *buffer, 399ext4_xattr_inode_get(struct inode *inode, unsigned long ea_ino, void *buffer,
359 size_t size) 400 size_t size)
360{ 401{
402 struct mb_cache *ea_inode_cache = EA_INODE_CACHE(inode);
361 struct inode *ea_inode; 403 struct inode *ea_inode;
362 int ret; 404 int err;
363 405
364 ret = ext4_xattr_inode_iget(inode, ea_ino, &ea_inode); 406 err = ext4_xattr_inode_iget(inode, ea_ino, &ea_inode);
365 if (ret) 407 if (err) {
366 return ret; 408 ea_inode = NULL;
409 goto out;
410 }
367 411
368 ret = ext4_xattr_inode_read(ea_inode, buffer, size); 412 if (i_size_read(ea_inode) != size) {
369 iput(ea_inode); 413 ext4_warning_inode(ea_inode,
414 "ea_inode file size=%llu entry size=%zu",
415 i_size_read(ea_inode), size);
416 err = -EFSCORRUPTED;
417 goto out;
418 }
370 419
371 return ret; 420 err = ext4_xattr_inode_read(ea_inode, buffer, size);
421 if (err)
422 goto out;
423
424 err = ext4_xattr_inode_verify_hash(ea_inode, buffer, size);
425 /*
426 * Compatibility check for old Lustre ea_inode implementation. Old
427 * version does not have hash validation, but it has a backpointer
428 * from ea_inode to the parent inode.
429 */
430 if (err == -EFSCORRUPTED) {
431 if (EXT4_XATTR_INODE_GET_PARENT(ea_inode) != inode->i_ino ||
432 ea_inode->i_generation != inode->i_generation) {
433 ext4_warning_inode(ea_inode,
434 "EA inode hash validation failed");
435 goto out;
436 }
437 /* Do not add ea_inode to the cache. */
438 ea_inode_cache = NULL;
439 } else if (err)
440 goto out;
441
442 if (ea_inode_cache)
443 mb_cache_entry_create(ea_inode_cache, GFP_NOFS,
444 ext4_xattr_inode_get_hash(ea_inode),
445 ea_inode->i_ino, true /* reusable */);
446out:
447 iput(ea_inode);
448 return err;
372} 449}
373 450
374static int 451static int
@@ -656,6 +733,115 @@ static void ext4_xattr_update_super_block(handle_t *handle,
656 } 733 }
657} 734}
658 735
736static inline size_t round_up_cluster(struct inode *inode, size_t length)
737{
738 struct super_block *sb = inode->i_sb;
739 size_t cluster_size = 1 << (EXT4_SB(sb)->s_cluster_bits +
740 inode->i_blkbits);
741 size_t mask = ~(cluster_size - 1);
742
743 return (length + cluster_size - 1) & mask;
744}
745
746static int ext4_xattr_inode_alloc_quota(struct inode *inode, size_t len)
747{
748 int err;
749
750 err = dquot_alloc_inode(inode);
751 if (err)
752 return err;
753 err = dquot_alloc_space_nodirty(inode, round_up_cluster(inode, len));
754 if (err)
755 dquot_free_inode(inode);
756 return err;
757}
758
759static void ext4_xattr_inode_free_quota(struct inode *inode, size_t len)
760{
761 dquot_free_space_nodirty(inode, round_up_cluster(inode, len));
762 dquot_free_inode(inode);
763}
764
765static int __ext4_xattr_set_credits(struct inode *inode,
766 struct buffer_head *block_bh,
767 size_t value_len)
768{
769 struct super_block *sb = inode->i_sb;
770 int credits;
771 int blocks;
772
773 /*
774 * 1) Owner inode update
775 * 2) Ref count update on old xattr block
776 * 3) new xattr block
777 * 4) block bitmap update for new xattr block
778 * 5) group descriptor for new xattr block
779 * 6) block bitmap update for old xattr block
780 * 7) group descriptor for old block
781 *
782 * 6 & 7 can happen if we have two racing threads T_a and T_b
783 * which are each trying to set an xattr on inodes I_a and I_b
784 * which were both initially sharing an xattr block.
785 */
786 credits = 7;
787
788 /* Quota updates. */
789 credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(sb);
790
791 /*
792 * In case of inline data, we may push out the data to a block,
793 * so we need to reserve credits for this eventuality
794 */
795 if (ext4_has_inline_data(inode))
796 credits += ext4_writepage_trans_blocks(inode) + 1;
797
798 /* We are done if ea_inode feature is not enabled. */
799 if (!ext4_has_feature_ea_inode(sb))
800 return credits;
801
802 /* New ea_inode, inode map, block bitmap, group descriptor. */
803 credits += 4;
804
805 /* Data blocks. */
806 blocks = (value_len + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
807
808 /* Indirection block or one level of extent tree. */
809 blocks += 1;
810
811 /* Block bitmap and group descriptor updates for each block. */
812 credits += blocks * 2;
813
814 /* Blocks themselves. */
815 credits += blocks;
816
817 /* Dereference ea_inode holding old xattr value.
818 * Old ea_inode, inode map, block bitmap, group descriptor.
819 */
820 credits += 4;
821
822 /* Data blocks for old ea_inode. */
823 blocks = XATTR_SIZE_MAX >> sb->s_blocksize_bits;
824
825 /* Indirection block or one level of extent tree for old ea_inode. */
826 blocks += 1;
827
828 /* Block bitmap and group descriptor updates for each block. */
829 credits += blocks * 2;
830
831 /* We may need to clone the existing xattr block in which case we need
832 * to increment ref counts for existing ea_inodes referenced by it.
833 */
834 if (block_bh) {
835 struct ext4_xattr_entry *entry = BFIRST(block_bh);
836
837 for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry))
838 if (entry->e_value_inum)
839 /* Ref count update on ea_inode. */
840 credits += 1;
841 }
842 return credits;
843}
844
659static int ext4_xattr_ensure_credits(handle_t *handle, struct inode *inode, 845static int ext4_xattr_ensure_credits(handle_t *handle, struct inode *inode,
660 int credits, struct buffer_head *bh, 846 int credits, struct buffer_head *bh,
661 bool dirty, bool block_csum) 847 bool dirty, bool block_csum)
@@ -705,12 +891,140 @@ static int ext4_xattr_ensure_credits(handle_t *handle, struct inode *inode,
705 return 0; 891 return 0;
706} 892}
707 893
894static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
895 int ref_change)
896{
897 struct mb_cache *ea_inode_cache = EA_INODE_CACHE(ea_inode);
898 struct ext4_iloc iloc;
899 s64 ref_count;
900 u32 hash;
901 int ret;
902
903 inode_lock(ea_inode);
904
905 ret = ext4_reserve_inode_write(handle, ea_inode, &iloc);
906 if (ret) {
907 iloc.bh = NULL;
908 goto out;
909 }
910
911 ref_count = ext4_xattr_inode_get_ref(ea_inode);
912 ref_count += ref_change;
913 ext4_xattr_inode_set_ref(ea_inode, ref_count);
914
915 if (ref_change > 0) {
916 WARN_ONCE(ref_count <= 0, "EA inode %lu ref_count=%lld",
917 ea_inode->i_ino, ref_count);
918
919 if (ref_count == 1) {
920 WARN_ONCE(ea_inode->i_nlink, "EA inode %lu i_nlink=%u",
921 ea_inode->i_ino, ea_inode->i_nlink);
922
923 set_nlink(ea_inode, 1);
924 ext4_orphan_del(handle, ea_inode);
925
926 hash = ext4_xattr_inode_get_hash(ea_inode);
927 mb_cache_entry_create(ea_inode_cache, GFP_NOFS, hash,
928 ea_inode->i_ino,
929 true /* reusable */);
930 }
931 } else {
932 WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld",
933 ea_inode->i_ino, ref_count);
934
935 if (ref_count == 0) {
936 WARN_ONCE(ea_inode->i_nlink != 1,
937 "EA inode %lu i_nlink=%u",
938 ea_inode->i_ino, ea_inode->i_nlink);
939
940 clear_nlink(ea_inode);
941 ext4_orphan_add(handle, ea_inode);
942
943 hash = ext4_xattr_inode_get_hash(ea_inode);
944 mb_cache_entry_delete(ea_inode_cache, hash,
945 ea_inode->i_ino);
946 }
947 }
948
949 ret = ext4_mark_iloc_dirty(handle, ea_inode, &iloc);
950 iloc.bh = NULL;
951 if (ret)
952 ext4_warning_inode(ea_inode,
953 "ext4_mark_iloc_dirty() failed ret=%d", ret);
954out:
955 brelse(iloc.bh);
956 inode_unlock(ea_inode);
957 return ret;
958}
959
960static int ext4_xattr_inode_inc_ref(handle_t *handle, struct inode *ea_inode)
961{
962 return ext4_xattr_inode_update_ref(handle, ea_inode, 1);
963}
964
965static int ext4_xattr_inode_dec_ref(handle_t *handle, struct inode *ea_inode)
966{
967 return ext4_xattr_inode_update_ref(handle, ea_inode, -1);
968}
969
970static int ext4_xattr_inode_inc_ref_all(handle_t *handle, struct inode *parent,
971 struct ext4_xattr_entry *first)
972{
973 struct inode *ea_inode;
974 struct ext4_xattr_entry *entry;
975 struct ext4_xattr_entry *failed_entry;
976 unsigned int ea_ino;
977 int err, saved_err;
978
979 for (entry = first; !IS_LAST_ENTRY(entry);
980 entry = EXT4_XATTR_NEXT(entry)) {
981 if (!entry->e_value_inum)
982 continue;
983 ea_ino = le32_to_cpu(entry->e_value_inum);
984 err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode);
985 if (err)
986 goto cleanup;
987 err = ext4_xattr_inode_inc_ref(handle, ea_inode);
988 if (err) {
989 ext4_warning_inode(ea_inode, "inc ref error %d", err);
990 iput(ea_inode);
991 goto cleanup;
992 }
993 iput(ea_inode);
994 }
995 return 0;
996
997cleanup:
998 saved_err = err;
999 failed_entry = entry;
1000
1001 for (entry = first; entry != failed_entry;
1002 entry = EXT4_XATTR_NEXT(entry)) {
1003 if (!entry->e_value_inum)
1004 continue;
1005 ea_ino = le32_to_cpu(entry->e_value_inum);
1006 err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode);
1007 if (err) {
1008 ext4_warning(parent->i_sb,
1009 "cleanup ea_ino %u iget error %d", ea_ino,
1010 err);
1011 continue;
1012 }
1013 err = ext4_xattr_inode_dec_ref(handle, ea_inode);
1014 if (err)
1015 ext4_warning_inode(ea_inode, "cleanup dec ref error %d",
1016 err);
1017 iput(ea_inode);
1018 }
1019 return saved_err;
1020}
1021
708static void 1022static void
709ext4_xattr_inode_remove_all(handle_t *handle, struct inode *parent, 1023ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
710 struct buffer_head *bh, 1024 struct buffer_head *bh,
711 struct ext4_xattr_entry *first, bool block_csum, 1025 struct ext4_xattr_entry *first, bool block_csum,
712 struct ext4_xattr_inode_array **ea_inode_array, 1026 struct ext4_xattr_inode_array **ea_inode_array,
713 int extra_credits) 1027 int extra_credits, bool skip_quota)
714{ 1028{
715 struct inode *ea_inode; 1029 struct inode *ea_inode;
716 struct ext4_xattr_entry *entry; 1030 struct ext4_xattr_entry *entry;
@@ -747,10 +1061,16 @@ ext4_xattr_inode_remove_all(handle_t *handle, struct inode *parent,
747 continue; 1061 continue;
748 } 1062 }
749 1063
750 inode_lock(ea_inode); 1064 err = ext4_xattr_inode_dec_ref(handle, ea_inode);
751 clear_nlink(ea_inode); 1065 if (err) {
752 ext4_orphan_add(handle, ea_inode); 1066 ext4_warning_inode(ea_inode, "ea_inode dec ref err=%d",
753 inode_unlock(ea_inode); 1067 err);
1068 continue;
1069 }
1070
1071 if (!skip_quota)
1072 ext4_xattr_inode_free_quota(parent,
1073 le32_to_cpu(entry->e_value_size));
754 1074
755 /* 1075 /*
756 * Forget about ea_inode within the same transaction that 1076 * Forget about ea_inode within the same transaction that
@@ -784,7 +1104,9 @@ ext4_xattr_inode_remove_all(handle_t *handle, struct inode *parent,
784 */ 1104 */
785static void 1105static void
786ext4_xattr_release_block(handle_t *handle, struct inode *inode, 1106ext4_xattr_release_block(handle_t *handle, struct inode *inode,
787 struct buffer_head *bh) 1107 struct buffer_head *bh,
1108 struct ext4_xattr_inode_array **ea_inode_array,
1109 int extra_credits)
788{ 1110{
789 struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode); 1111 struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
790 u32 hash, ref; 1112 u32 hash, ref;
@@ -807,6 +1129,14 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
807 mb_cache_entry_delete(ea_block_cache, hash, bh->b_blocknr); 1129 mb_cache_entry_delete(ea_block_cache, hash, bh->b_blocknr);
808 get_bh(bh); 1130 get_bh(bh);
809 unlock_buffer(bh); 1131 unlock_buffer(bh);
1132
1133 if (ext4_has_feature_ea_inode(inode->i_sb))
1134 ext4_xattr_inode_dec_ref_all(handle, inode, bh,
1135 BFIRST(bh),
1136 true /* block_csum */,
1137 ea_inode_array,
1138 extra_credits,
1139 true /* skip_quota */);
810 ext4_free_blocks(handle, inode, bh, 0, 1, 1140 ext4_free_blocks(handle, inode, bh, 0, 1,
811 EXT4_FREE_BLOCKS_METADATA | 1141 EXT4_FREE_BLOCKS_METADATA |
812 EXT4_FREE_BLOCKS_FORGET); 1142 EXT4_FREE_BLOCKS_FORGET);
@@ -878,8 +1208,8 @@ static int ext4_xattr_inode_write(handle_t *handle, struct inode *ea_inode,
878{ 1208{
879 struct buffer_head *bh = NULL; 1209 struct buffer_head *bh = NULL;
880 unsigned long block = 0; 1210 unsigned long block = 0;
881 unsigned blocksize = ea_inode->i_sb->s_blocksize; 1211 int blocksize = ea_inode->i_sb->s_blocksize;
882 unsigned max_blocks = (bufsize + blocksize - 1) >> ea_inode->i_blkbits; 1212 int max_blocks = (bufsize + blocksize - 1) >> ea_inode->i_blkbits;
883 int csize, wsize = 0; 1213 int csize, wsize = 0;
884 int ret = 0; 1214 int ret = 0;
885 int retries = 0; 1215 int retries = 0;
@@ -945,7 +1275,7 @@ out:
945 * Create an inode to store the value of a large EA. 1275 * Create an inode to store the value of a large EA.
946 */ 1276 */
947static struct inode *ext4_xattr_inode_create(handle_t *handle, 1277static struct inode *ext4_xattr_inode_create(handle_t *handle,
948 struct inode *inode) 1278 struct inode *inode, u32 hash)
949{ 1279{
950 struct inode *ea_inode = NULL; 1280 struct inode *ea_inode = NULL;
951 uid_t owner[2] = { i_uid_read(inode), i_gid_read(inode) }; 1281 uid_t owner[2] = { i_uid_read(inode), i_gid_read(inode) };
@@ -963,67 +1293,115 @@ static struct inode *ext4_xattr_inode_create(handle_t *handle,
963 ea_inode->i_fop = &ext4_file_operations; 1293 ea_inode->i_fop = &ext4_file_operations;
964 ext4_set_aops(ea_inode); 1294 ext4_set_aops(ea_inode);
965 ext4_xattr_inode_set_class(ea_inode); 1295 ext4_xattr_inode_set_class(ea_inode);
966 ea_inode->i_generation = inode->i_generation;
967 EXT4_I(ea_inode)->i_flags |= EXT4_EA_INODE_FL;
968
969 /*
970 * A back-pointer from EA inode to parent inode will be useful
971 * for e2fsck.
972 */
973 EXT4_XATTR_INODE_SET_PARENT(ea_inode, inode->i_ino);
974 unlock_new_inode(ea_inode); 1296 unlock_new_inode(ea_inode);
975 err = ext4_inode_attach_jinode(ea_inode); 1297 ext4_xattr_inode_set_ref(ea_inode, 1);
1298 ext4_xattr_inode_set_hash(ea_inode, hash);
1299 err = ext4_mark_inode_dirty(handle, ea_inode);
1300 if (!err)
1301 err = ext4_inode_attach_jinode(ea_inode);
976 if (err) { 1302 if (err) {
977 iput(ea_inode); 1303 iput(ea_inode);
978 return ERR_PTR(err); 1304 return ERR_PTR(err);
979 } 1305 }
1306
1307 /*
1308 * Xattr inodes are shared therefore quota charging is performed
1309 * at a higher level.
1310 */
1311 dquot_free_inode(ea_inode);
1312 dquot_drop(ea_inode);
1313 inode_lock(ea_inode);
1314 ea_inode->i_flags |= S_NOQUOTA;
1315 inode_unlock(ea_inode);
980 } 1316 }
981 1317
982 return ea_inode; 1318 return ea_inode;
983} 1319}
984 1320
985/* 1321static struct inode *
986 * Unlink the inode storing the value of the EA. 1322ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
987 */ 1323 size_t value_len, u32 hash)
988int ext4_xattr_inode_unlink(struct inode *inode, unsigned long ea_ino)
989{ 1324{
990 struct inode *ea_inode = NULL; 1325 struct inode *ea_inode;
991 int err; 1326 struct mb_cache_entry *ce;
1327 struct mb_cache *ea_inode_cache = EA_INODE_CACHE(inode);
1328 void *ea_data;
992 1329
993 err = ext4_xattr_inode_iget(inode, ea_ino, &ea_inode); 1330 ce = mb_cache_entry_find_first(ea_inode_cache, hash);
994 if (err) 1331 if (!ce)
995 return err; 1332 return NULL;
996 1333
997 clear_nlink(ea_inode); 1334 ea_data = ext4_kvmalloc(value_len, GFP_NOFS);
998 iput(ea_inode); 1335 if (!ea_data) {
1336 mb_cache_entry_put(ea_inode_cache, ce);
1337 return NULL;
1338 }
999 1339
1000 return 0; 1340 while (ce) {
1341 ea_inode = ext4_iget(inode->i_sb, ce->e_value);
1342 if (!IS_ERR(ea_inode) &&
1343 !is_bad_inode(ea_inode) &&
1344 (EXT4_I(ea_inode)->i_flags & EXT4_EA_INODE_FL) &&
1345 i_size_read(ea_inode) == value_len &&
1346 !ext4_xattr_inode_read(ea_inode, ea_data, value_len) &&
1347 !ext4_xattr_inode_verify_hash(ea_inode, ea_data,
1348 value_len) &&
1349 !memcmp(value, ea_data, value_len)) {
1350 mb_cache_entry_touch(ea_inode_cache, ce);
1351 mb_cache_entry_put(ea_inode_cache, ce);
1352 kvfree(ea_data);
1353 return ea_inode;
1354 }
1355
1356 if (!IS_ERR(ea_inode))
1357 iput(ea_inode);
1358 ce = mb_cache_entry_find_next(ea_inode_cache, ce);
1359 }
1360 kvfree(ea_data);
1361 return NULL;
1001} 1362}
1002 1363
1003/* 1364/*
1004 * Add value of the EA in an inode. 1365 * Add value of the EA in an inode.
1005 */ 1366 */
1006static int ext4_xattr_inode_set(handle_t *handle, struct inode *inode, 1367static int ext4_xattr_inode_lookup_create(handle_t *handle, struct inode *inode,
1007 unsigned long *ea_ino, const void *value, 1368 const void *value, size_t value_len,
1008 size_t value_len) 1369 struct inode **ret_inode)
1009{ 1370{
1010 struct inode *ea_inode; 1371 struct inode *ea_inode;
1372 u32 hash;
1011 int err; 1373 int err;
1012 1374
1375 hash = ext4_xattr_inode_hash(EXT4_SB(inode->i_sb), value, value_len);
1376 ea_inode = ext4_xattr_inode_cache_find(inode, value, value_len, hash);
1377 if (ea_inode) {
1378 err = ext4_xattr_inode_inc_ref(handle, ea_inode);
1379 if (err) {
1380 iput(ea_inode);
1381 return err;
1382 }
1383
1384 *ret_inode = ea_inode;
1385 return 0;
1386 }
1387
1013 /* Create an inode for the EA value */ 1388 /* Create an inode for the EA value */
1014 ea_inode = ext4_xattr_inode_create(handle, inode); 1389 ea_inode = ext4_xattr_inode_create(handle, inode, hash);
1015 if (IS_ERR(ea_inode)) 1390 if (IS_ERR(ea_inode))
1016 return PTR_ERR(ea_inode); 1391 return PTR_ERR(ea_inode);
1017 1392
1018 err = ext4_xattr_inode_write(handle, ea_inode, value, value_len); 1393 err = ext4_xattr_inode_write(handle, ea_inode, value, value_len);
1019 if (err) 1394 if (err) {
1020 clear_nlink(ea_inode); 1395 ext4_xattr_inode_dec_ref(handle, ea_inode);
1021 else 1396 iput(ea_inode);
1022 *ea_ino = ea_inode->i_ino; 1397 return err;
1398 }
1023 1399
1024 iput(ea_inode); 1400 mb_cache_entry_create(EA_INODE_CACHE(inode), GFP_NOFS, hash,
1401 ea_inode->i_ino, true /* reusable */);
1025 1402
1026 return err; 1403 *ret_inode = ea_inode;
1404 return 0;
1027} 1405}
1028 1406
1029static int ext4_xattr_set_entry(struct ext4_xattr_info *i, 1407static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
@@ -1031,9 +1409,37 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
1031 handle_t *handle, struct inode *inode) 1409 handle_t *handle, struct inode *inode)
1032{ 1410{
1033 struct ext4_xattr_entry *last; 1411 struct ext4_xattr_entry *last;
1034 size_t free, min_offs = s->end - s->base, name_len = strlen(i->name); 1412 struct ext4_xattr_entry *here = s->here;
1413 size_t min_offs = s->end - s->base, name_len = strlen(i->name);
1035 int in_inode = i->in_inode; 1414 int in_inode = i->in_inode;
1036 int rc; 1415 struct inode *old_ea_inode = NULL;
1416 struct inode *new_ea_inode = NULL;
1417 size_t old_size, new_size;
1418 int ret;
1419
1420 /* Space used by old and new values. */
1421 old_size = (!s->not_found && !here->e_value_inum) ?
1422 EXT4_XATTR_SIZE(le32_to_cpu(here->e_value_size)) : 0;
1423 new_size = (i->value && !in_inode) ? EXT4_XATTR_SIZE(i->value_len) : 0;
1424
1425 /*
1426 * Optimization for the simple case when old and new values have the
1427 * same padded sizes. Not applicable if external inodes are involved.
1428 */
1429 if (new_size && new_size == old_size) {
1430 size_t offs = le16_to_cpu(here->e_value_offs);
1431 void *val = s->base + offs;
1432
1433 here->e_value_size = cpu_to_le32(i->value_len);
1434 if (i->value == EXT4_ZERO_XATTR_VALUE) {
1435 memset(val, 0, new_size);
1436 } else {
1437 memcpy(val, i->value, i->value_len);
1438 /* Clear padding bytes. */
1439 memset(val + i->value_len, 0, new_size - i->value_len);
1440 }
1441 return 0;
1442 }
1037 1443
1038 /* Compute min_offs and last. */ 1444 /* Compute min_offs and last. */
1039 last = s->first; 1445 last = s->first;
@@ -1044,122 +1450,148 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
1044 min_offs = offs; 1450 min_offs = offs;
1045 } 1451 }
1046 } 1452 }
1047 free = min_offs - ((void *)last - s->base) - sizeof(__u32); 1453
1048 if (!s->not_found) { 1454 /* Check whether we have enough space. */
1049 if (!in_inode &&
1050 !s->here->e_value_inum && s->here->e_value_size) {
1051 size_t size = le32_to_cpu(s->here->e_value_size);
1052 free += EXT4_XATTR_SIZE(size);
1053 }
1054 free += EXT4_XATTR_LEN(name_len);
1055 }
1056 if (i->value) { 1455 if (i->value) {
1057 size_t value_len = EXT4_XATTR_SIZE(i->value_len); 1456 size_t free;
1058 1457
1059 if (in_inode) 1458 free = min_offs - ((void *)last - s->base) - sizeof(__u32);
1060 value_len = 0; 1459 if (!s->not_found)
1460 free += EXT4_XATTR_LEN(name_len) + old_size;
1061 1461
1062 if (free < EXT4_XATTR_LEN(name_len) + value_len) 1462 if (free < EXT4_XATTR_LEN(name_len) + new_size) {
1063 return -ENOSPC; 1463 ret = -ENOSPC;
1464 goto out;
1465 }
1064 } 1466 }
1065 1467
1066 if (i->value && s->not_found) { 1468 /*
1067 /* Insert the new name. */ 1469 * Getting access to old and new ea inodes is subject to failures.
1068 size_t size = EXT4_XATTR_LEN(name_len); 1470 * Finish that work before doing any modifications to the xattr data.
1069 size_t rest = (void *)last - (void *)s->here + sizeof(__u32); 1471 */
1070 memmove((void *)s->here + size, s->here, rest); 1472 if (!s->not_found && here->e_value_inum) {
1071 memset(s->here, 0, size); 1473 ret = ext4_xattr_inode_iget(inode,
1072 s->here->e_name_index = i->name_index; 1474 le32_to_cpu(here->e_value_inum),
1073 s->here->e_name_len = name_len; 1475 &old_ea_inode);
1074 memcpy(s->here->e_name, i->name, name_len); 1476 if (ret) {
1075 } else { 1477 old_ea_inode = NULL;
1076 if (!s->here->e_value_inum && s->here->e_value_size && 1478 goto out;
1077 s->here->e_value_offs > 0) { 1479 }
1078 void *first_val = s->base + min_offs; 1480 }
1079 size_t offs = le16_to_cpu(s->here->e_value_offs); 1481 if (i->value && in_inode) {
1080 void *val = s->base + offs; 1482 WARN_ON_ONCE(!i->value_len);
1081 size_t size = EXT4_XATTR_SIZE(
1082 le32_to_cpu(s->here->e_value_size));
1083
1084 if (i->value && size == EXT4_XATTR_SIZE(i->value_len)) {
1085 /* The old and the new value have the same
1086 size. Just replace. */
1087 s->here->e_value_size =
1088 cpu_to_le32(i->value_len);
1089 if (i->value == EXT4_ZERO_XATTR_VALUE) {
1090 memset(val, 0, size);
1091 } else {
1092 /* Clear pad bytes first. */
1093 memset(val + size - EXT4_XATTR_PAD, 0,
1094 EXT4_XATTR_PAD);
1095 memcpy(val, i->value, i->value_len);
1096 }
1097 return 0;
1098 }
1099 1483
1100 /* Remove the old value. */ 1484 ret = ext4_xattr_inode_alloc_quota(inode, i->value_len);
1101 memmove(first_val + size, first_val, val - first_val); 1485 if (ret)
1102 memset(first_val, 0, size); 1486 goto out;
1103 s->here->e_value_size = 0; 1487
1104 s->here->e_value_offs = 0; 1488 ret = ext4_xattr_inode_lookup_create(handle, inode, i->value,
1105 min_offs += size; 1489 i->value_len,
1106 1490 &new_ea_inode);
1107 /* Adjust all value offsets. */ 1491 if (ret) {
1108 last = s->first; 1492 new_ea_inode = NULL;
1109 while (!IS_LAST_ENTRY(last)) { 1493 ext4_xattr_inode_free_quota(inode, i->value_len);
1110 size_t o = le16_to_cpu(last->e_value_offs); 1494 goto out;
1111 if (!last->e_value_inum &&
1112 last->e_value_size && o < offs)
1113 last->e_value_offs =
1114 cpu_to_le16(o + size);
1115 last = EXT4_XATTR_NEXT(last);
1116 }
1117 } 1495 }
1118 if (s->here->e_value_inum) { 1496 }
1119 ext4_xattr_inode_unlink(inode, 1497
1120 le32_to_cpu(s->here->e_value_inum)); 1498 if (old_ea_inode) {
1121 s->here->e_value_inum = 0; 1499 /* We are ready to release ref count on the old_ea_inode. */
1500 ret = ext4_xattr_inode_dec_ref(handle, old_ea_inode);
1501 if (ret) {
1502 /* Release newly required ref count on new_ea_inode. */
1503 if (new_ea_inode) {
1504 int err;
1505
1506 err = ext4_xattr_inode_dec_ref(handle,
1507 new_ea_inode);
1508 if (err)
1509 ext4_warning_inode(new_ea_inode,
1510 "dec ref new_ea_inode err=%d",
1511 err);
1512 ext4_xattr_inode_free_quota(inode,
1513 i->value_len);
1514 }
1515 goto out;
1122 } 1516 }
1123 if (!i->value) { 1517
1124 /* Remove the old name. */ 1518 ext4_xattr_inode_free_quota(inode,
1125 size_t size = EXT4_XATTR_LEN(name_len); 1519 le32_to_cpu(here->e_value_size));
1126 last = ENTRY((void *)last - size); 1520 }
1127 memmove(s->here, (void *)s->here + size, 1521
1128 (void *)last - (void *)s->here + sizeof(__u32)); 1522 /* No failures allowed past this point. */
1129 memset(last, 0, size); 1523
1524 if (!s->not_found && here->e_value_offs) {
1525 /* Remove the old value. */
1526 void *first_val = s->base + min_offs;
1527 size_t offs = le16_to_cpu(here->e_value_offs);
1528 void *val = s->base + offs;
1529
1530 memmove(first_val + old_size, first_val, val - first_val);
1531 memset(first_val, 0, old_size);
1532 min_offs += old_size;
1533
1534 /* Adjust all value offsets. */
1535 last = s->first;
1536 while (!IS_LAST_ENTRY(last)) {
1537 size_t o = le16_to_cpu(last->e_value_offs);
1538
1539 if (!last->e_value_inum &&
1540 last->e_value_size && o < offs)
1541 last->e_value_offs = cpu_to_le16(o + old_size);
1542 last = EXT4_XATTR_NEXT(last);
1130 } 1543 }
1131 } 1544 }
1132 1545
1546 if (!i->value) {
1547 /* Remove old name. */
1548 size_t size = EXT4_XATTR_LEN(name_len);
1549
1550 last = ENTRY((void *)last - size);
1551 memmove(here, (void *)here + size,
1552 (void *)last - (void *)here + sizeof(__u32));
1553 memset(last, 0, size);
1554 } else if (s->not_found) {
1555 /* Insert new name. */
1556 size_t size = EXT4_XATTR_LEN(name_len);
1557 size_t rest = (void *)last - (void *)here + sizeof(__u32);
1558
1559 memmove((void *)here + size, here, rest);
1560 memset(here, 0, size);
1561 here->e_name_index = i->name_index;
1562 here->e_name_len = name_len;
1563 memcpy(here->e_name, i->name, name_len);
1564 } else {
1565 /* This is an update, reset value info. */
1566 here->e_value_inum = 0;
1567 here->e_value_offs = 0;
1568 here->e_value_size = 0;
1569 }
1570
1133 if (i->value) { 1571 if (i->value) {
1134 /* Insert the new value. */ 1572 /* Insert new value. */
1135 if (in_inode) { 1573 if (in_inode) {
1136 unsigned long ea_ino = 1574 here->e_value_inum = cpu_to_le32(new_ea_inode->i_ino);
1137 le32_to_cpu(s->here->e_value_inum);
1138 rc = ext4_xattr_inode_set(handle, inode, &ea_ino,
1139 i->value, i->value_len);
1140 if (rc)
1141 goto out;
1142 s->here->e_value_inum = cpu_to_le32(ea_ino);
1143 s->here->e_value_offs = 0;
1144 } else if (i->value_len) { 1575 } else if (i->value_len) {
1145 size_t size = EXT4_XATTR_SIZE(i->value_len); 1576 void *val = s->base + min_offs - new_size;
1146 void *val = s->base + min_offs - size; 1577
1147 s->here->e_value_offs = cpu_to_le16(min_offs - size); 1578 here->e_value_offs = cpu_to_le16(min_offs - new_size);
1148 s->here->e_value_inum = 0;
1149 if (i->value == EXT4_ZERO_XATTR_VALUE) { 1579 if (i->value == EXT4_ZERO_XATTR_VALUE) {
1150 memset(val, 0, size); 1580 memset(val, 0, new_size);
1151 } else { 1581 } else {
1152 /* Clear the pad bytes first. */
1153 memset(val + size - EXT4_XATTR_PAD, 0,
1154 EXT4_XATTR_PAD);
1155 memcpy(val, i->value, i->value_len); 1582 memcpy(val, i->value, i->value_len);
1583 /* Clear padding bytes. */
1584 memset(val + i->value_len, 0,
1585 new_size - i->value_len);
1156 } 1586 }
1157 } 1587 }
1158 s->here->e_value_size = cpu_to_le32(i->value_len); 1588 here->e_value_size = cpu_to_le32(i->value_len);
1159 } 1589 }
1160 1590 ret = 0;
1161out: 1591out:
1162 return rc; 1592 iput(old_ea_inode);
1593 iput(new_ea_inode);
1594 return ret;
1163} 1595}
1164 1596
1165struct ext4_xattr_block_find { 1597struct ext4_xattr_block_find {
@@ -1221,6 +1653,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
1221 struct mb_cache_entry *ce = NULL; 1653 struct mb_cache_entry *ce = NULL;
1222 int error = 0; 1654 int error = 0;
1223 struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode); 1655 struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
1656 struct inode *ea_inode = NULL;
1657 size_t old_ea_inode_size = 0;
1224 1658
1225#define header(x) ((struct ext4_xattr_header *)(x)) 1659#define header(x) ((struct ext4_xattr_header *)(x))
1226 1660
@@ -1275,6 +1709,24 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
1275 header(s->base)->h_refcount = cpu_to_le32(1); 1709 header(s->base)->h_refcount = cpu_to_le32(1);
1276 s->here = ENTRY(s->base + offset); 1710 s->here = ENTRY(s->base + offset);
1277 s->end = s->base + bs->bh->b_size; 1711 s->end = s->base + bs->bh->b_size;
1712
1713 /*
1714 * If existing entry points to an xattr inode, we need
1715 * to prevent ext4_xattr_set_entry() from decrementing
1716 * ref count on it because the reference belongs to the
1717 * original block. In this case, make the entry look
1718 * like it has an empty value.
1719 */
1720 if (!s->not_found && s->here->e_value_inum) {
1721 /*
1722 * Defer quota free call for previous inode
1723 * until success is guaranteed.
1724 */
1725 old_ea_inode_size = le32_to_cpu(
1726 s->here->e_value_size);
1727 s->here->e_value_inum = 0;
1728 s->here->e_value_size = 0;
1729 }
1278 } 1730 }
1279 } else { 1731 } else {
1280 /* Allocate a buffer where we construct the new block. */ 1732 /* Allocate a buffer where we construct the new block. */
@@ -1296,6 +1748,24 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
1296 goto bad_block; 1748 goto bad_block;
1297 if (error) 1749 if (error)
1298 goto cleanup; 1750 goto cleanup;
1751
1752 if (i->value && s->here->e_value_inum) {
1753 unsigned int ea_ino;
1754
1755 /*
1756 * A ref count on ea_inode has been taken as part of the call to
1757 * ext4_xattr_set_entry() above. We would like to drop this
1758 * extra ref but we have to wait until the xattr block is
1759 * initialized and has its own ref count on the ea_inode.
1760 */
1761 ea_ino = le32_to_cpu(s->here->e_value_inum);
1762 error = ext4_xattr_inode_iget(inode, ea_ino, &ea_inode);
1763 if (error) {
1764 ea_inode = NULL;
1765 goto cleanup;
1766 }
1767 }
1768
1299 if (!IS_LAST_ENTRY(s->first)) 1769 if (!IS_LAST_ENTRY(s->first))
1300 ext4_xattr_rehash(header(s->base), s->here); 1770 ext4_xattr_rehash(header(s->base), s->here);
1301 1771
@@ -1406,6 +1876,22 @@ getblk_failed:
1406 EXT4_FREE_BLOCKS_METADATA); 1876 EXT4_FREE_BLOCKS_METADATA);
1407 goto cleanup; 1877 goto cleanup;
1408 } 1878 }
1879 error = ext4_xattr_inode_inc_ref_all(handle, inode,
1880 ENTRY(header(s->base)+1));
1881 if (error)
1882 goto getblk_failed;
1883 if (ea_inode) {
1884 /* Drop the extra ref on ea_inode. */
1885 error = ext4_xattr_inode_dec_ref(handle,
1886 ea_inode);
1887 if (error)
1888 ext4_warning_inode(ea_inode,
1889 "dec ref error=%d",
1890 error);
1891 iput(ea_inode);
1892 ea_inode = NULL;
1893 }
1894
1409 lock_buffer(new_bh); 1895 lock_buffer(new_bh);
1410 error = ext4_journal_get_create_access(handle, new_bh); 1896 error = ext4_journal_get_create_access(handle, new_bh);
1411 if (error) { 1897 if (error) {
@@ -1425,15 +1911,38 @@ getblk_failed:
1425 } 1911 }
1426 } 1912 }
1427 1913
1914 if (old_ea_inode_size)
1915 ext4_xattr_inode_free_quota(inode, old_ea_inode_size);
1916
1428 /* Update the inode. */ 1917 /* Update the inode. */
1429 EXT4_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; 1918 EXT4_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
1430 1919
1431 /* Drop the previous xattr block. */ 1920 /* Drop the previous xattr block. */
1432 if (bs->bh && bs->bh != new_bh) 1921 if (bs->bh && bs->bh != new_bh) {
1433 ext4_xattr_release_block(handle, inode, bs->bh); 1922 struct ext4_xattr_inode_array *ea_inode_array = NULL;
1923
1924 ext4_xattr_release_block(handle, inode, bs->bh,
1925 &ea_inode_array,
1926 0 /* extra_credits */);
1927 ext4_xattr_inode_array_free(ea_inode_array);
1928 }
1434 error = 0; 1929 error = 0;
1435 1930
1436cleanup: 1931cleanup:
1932 if (ea_inode) {
1933 int error2;
1934
1935 error2 = ext4_xattr_inode_dec_ref(handle, ea_inode);
1936 if (error2)
1937 ext4_warning_inode(ea_inode, "dec ref error=%d",
1938 error2);
1939
1940 /* If there was an error, revert the quota charge. */
1941 if (error)
1942 ext4_xattr_inode_free_quota(inode,
1943 i_size_read(ea_inode));
1944 iput(ea_inode);
1945 }
1437 if (ce) 1946 if (ce)
1438 mb_cache_entry_put(ea_block_cache, ce); 1947 mb_cache_entry_put(ea_block_cache, ce);
1439 brelse(new_bh); 1948 brelse(new_bh);
@@ -1558,6 +2067,22 @@ static int ext4_xattr_value_same(struct ext4_xattr_search *s,
1558 return !memcmp(value, i->value, i->value_len); 2067 return !memcmp(value, i->value, i->value_len);
1559} 2068}
1560 2069
2070static struct buffer_head *ext4_xattr_get_block(struct inode *inode)
2071{
2072 struct buffer_head *bh;
2073 int error;
2074
2075 if (!EXT4_I(inode)->i_file_acl)
2076 return NULL;
2077 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
2078 if (!bh)
2079 return ERR_PTR(-EIO);
2080 error = ext4_xattr_check_block(inode, bh);
2081 if (error)
2082 return ERR_PTR(error);
2083 return bh;
2084}
2085
1561/* 2086/*
1562 * ext4_xattr_set_handle() 2087 * ext4_xattr_set_handle()
1563 * 2088 *
@@ -1600,9 +2125,18 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
1600 2125
1601 /* Check journal credits under write lock. */ 2126 /* Check journal credits under write lock. */
1602 if (ext4_handle_valid(handle)) { 2127 if (ext4_handle_valid(handle)) {
2128 struct buffer_head *bh;
1603 int credits; 2129 int credits;
1604 2130
1605 credits = ext4_xattr_set_credits(inode, value_len); 2131 bh = ext4_xattr_get_block(inode);
2132 if (IS_ERR(bh)) {
2133 error = PTR_ERR(bh);
2134 goto cleanup;
2135 }
2136
2137 credits = __ext4_xattr_set_credits(inode, bh, value_len);
2138 brelse(bh);
2139
1606 if (!ext4_handle_has_enough_credits(handle, credits)) { 2140 if (!ext4_handle_has_enough_credits(handle, credits)) {
1607 error = -ENOSPC; 2141 error = -ENOSPC;
1608 goto cleanup; 2142 goto cleanup;
@@ -1638,6 +2172,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
1638 if (flags & XATTR_CREATE) 2172 if (flags & XATTR_CREATE)
1639 goto cleanup; 2173 goto cleanup;
1640 } 2174 }
2175
1641 if (!value) { 2176 if (!value) {
1642 if (!is.s.not_found) 2177 if (!is.s.not_found)
1643 error = ext4_xattr_ibody_set(handle, inode, &i, &is); 2178 error = ext4_xattr_ibody_set(handle, inode, &i, &is);
@@ -1706,34 +2241,29 @@ cleanup:
1706 return error; 2241 return error;
1707} 2242}
1708 2243
1709int ext4_xattr_set_credits(struct inode *inode, size_t value_len) 2244int ext4_xattr_set_credits(struct inode *inode, size_t value_len, int *credits)
1710{ 2245{
1711 struct super_block *sb = inode->i_sb; 2246 struct buffer_head *bh;
1712 int credits; 2247 int err;
1713
1714 if (!EXT4_SB(sb)->s_journal)
1715 return 0;
1716 2248
1717 credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb); 2249 *credits = 0;
1718 2250
1719 /* 2251 if (!EXT4_SB(inode->i_sb)->s_journal)
1720 * In case of inline data, we may push out the data to a block, 2252 return 0;
1721 * so we need to reserve credits for this eventuality
1722 */
1723 if (ext4_has_inline_data(inode))
1724 credits += ext4_writepage_trans_blocks(inode) + 1;
1725
1726 if (ext4_has_feature_ea_inode(sb)) {
1727 int nrblocks = (value_len + sb->s_blocksize - 1) >>
1728 sb->s_blocksize_bits;
1729 2253
1730 /* For new inode */ 2254 down_read(&EXT4_I(inode)->xattr_sem);
1731 credits += EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + 3;
1732 2255
1733 /* For data blocks of EA inode */ 2256 bh = ext4_xattr_get_block(inode);
1734 credits += ext4_meta_trans_blocks(inode, nrblocks, 0); 2257 if (IS_ERR(bh)) {
2258 err = PTR_ERR(bh);
2259 } else {
2260 *credits = __ext4_xattr_set_credits(inode, bh, value_len);
2261 brelse(bh);
2262 err = 0;
1735 } 2263 }
1736 return credits; 2264
2265 up_read(&EXT4_I(inode)->xattr_sem);
2266 return err;
1737} 2267}
1738 2268
1739/* 2269/*
@@ -1758,7 +2288,10 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
1758 return error; 2288 return error;
1759 2289
1760retry: 2290retry:
1761 credits = ext4_xattr_set_credits(inode, value_len); 2291 error = ext4_xattr_set_credits(inode, value_len, &credits);
2292 if (error)
2293 return error;
2294
1762 handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits); 2295 handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
1763 if (IS_ERR(handle)) { 2296 if (IS_ERR(handle)) {
1764 error = PTR_ERR(handle); 2297 error = PTR_ERR(handle);
@@ -2064,10 +2597,10 @@ cleanup:
2064 return error; 2597 return error;
2065} 2598}
2066 2599
2067
2068#define EIA_INCR 16 /* must be 2^n */ 2600#define EIA_INCR 16 /* must be 2^n */
2069#define EIA_MASK (EIA_INCR - 1) 2601#define EIA_MASK (EIA_INCR - 1)
2070/* Add the large xattr @inode into @ea_inode_array for later deletion. 2602
2603/* Add the large xattr @inode into @ea_inode_array for deferred iput().
2071 * If @ea_inode_array is new or full it will be grown and the old 2604 * If @ea_inode_array is new or full it will be grown and the old
2072 * contents copied over. 2605 * contents copied over.
2073 */ 2606 */
@@ -2112,21 +2645,19 @@ ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
2112 * ext4_xattr_delete_inode() 2645 * ext4_xattr_delete_inode()
2113 * 2646 *
2114 * Free extended attribute resources associated with this inode. Traverse 2647 * Free extended attribute resources associated with this inode. Traverse
2115 * all entries and unlink any xattr inodes associated with this inode. This 2648 * all entries and decrement reference on any xattr inodes associated with this
2116 * is called immediately before an inode is freed. We have exclusive 2649 * inode. This is called immediately before an inode is freed. We have exclusive
2117 * access to the inode. If an orphan inode is deleted it will also delete any 2650 * access to the inode. If an orphan inode is deleted it will also release its
2118 * xattr block and all xattr inodes. They are checked by ext4_xattr_inode_iget() 2651 * references on xattr block and xattr inodes.
2119 * to ensure they belong to the parent inode and were not deleted already.
2120 */ 2652 */
2121int 2653int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
2122ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, 2654 struct ext4_xattr_inode_array **ea_inode_array,
2123 struct ext4_xattr_inode_array **ea_inode_array, 2655 int extra_credits)
2124 int extra_credits)
2125{ 2656{
2126 struct buffer_head *bh = NULL; 2657 struct buffer_head *bh = NULL;
2127 struct ext4_xattr_ibody_header *header; 2658 struct ext4_xattr_ibody_header *header;
2128 struct ext4_inode *raw_inode;
2129 struct ext4_iloc iloc = { .bh = NULL }; 2659 struct ext4_iloc iloc = { .bh = NULL };
2660 struct ext4_xattr_entry *entry;
2130 int error; 2661 int error;
2131 2662
2132 error = ext4_xattr_ensure_credits(handle, inode, extra_credits, 2663 error = ext4_xattr_ensure_credits(handle, inode, extra_credits,
@@ -2138,66 +2669,71 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
2138 goto cleanup; 2669 goto cleanup;
2139 } 2670 }
2140 2671
2141 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR)) 2672 if (ext4_has_feature_ea_inode(inode->i_sb) &&
2142 goto delete_external_ea; 2673 ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
2143 2674
2144 error = ext4_get_inode_loc(inode, &iloc); 2675 error = ext4_get_inode_loc(inode, &iloc);
2145 if (error) 2676 if (error) {
2146 goto cleanup; 2677 EXT4_ERROR_INODE(inode, "inode loc (error %d)", error);
2147 2678 goto cleanup;
2148 error = ext4_journal_get_write_access(handle, iloc.bh); 2679 }
2149 if (error)
2150 goto cleanup;
2151 2680
2152 raw_inode = ext4_raw_inode(&iloc); 2681 error = ext4_journal_get_write_access(handle, iloc.bh);
2153 header = IHDR(inode, raw_inode); 2682 if (error) {
2154 ext4_xattr_inode_remove_all(handle, inode, iloc.bh, IFIRST(header), 2683 EXT4_ERROR_INODE(inode, "write access (error %d)",
2155 false /* block_csum */, ea_inode_array, 2684 error);
2156 extra_credits); 2685 goto cleanup;
2686 }
2157 2687
2158delete_external_ea: 2688 header = IHDR(inode, ext4_raw_inode(&iloc));
2159 if (!EXT4_I(inode)->i_file_acl) { 2689 if (header->h_magic == cpu_to_le32(EXT4_XATTR_MAGIC))
2160 error = 0; 2690 ext4_xattr_inode_dec_ref_all(handle, inode, iloc.bh,
2161 goto cleanup; 2691 IFIRST(header),
2162 } 2692 false /* block_csum */,
2163 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); 2693 ea_inode_array,
2164 if (!bh) { 2694 extra_credits,
2165 EXT4_ERROR_INODE(inode, "block %llu read error", 2695 false /* skip_quota */);
2166 EXT4_I(inode)->i_file_acl);
2167 error = -EIO;
2168 goto cleanup;
2169 }
2170 if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
2171 BHDR(bh)->h_blocks != cpu_to_le32(1)) {
2172 EXT4_ERROR_INODE(inode, "bad block %llu",
2173 EXT4_I(inode)->i_file_acl);
2174 error = -EFSCORRUPTED;
2175 goto cleanup;
2176 } 2696 }
2177 2697
2178 if (ext4_has_feature_ea_inode(inode->i_sb)) { 2698 if (EXT4_I(inode)->i_file_acl) {
2179 error = ext4_journal_get_write_access(handle, bh); 2699 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
2180 if (error) { 2700 if (!bh) {
2181 EXT4_ERROR_INODE(inode, "write access %llu", 2701 EXT4_ERROR_INODE(inode, "block %llu read error",
2182 EXT4_I(inode)->i_file_acl); 2702 EXT4_I(inode)->i_file_acl);
2703 error = -EIO;
2704 goto cleanup;
2705 }
2706 error = ext4_xattr_check_block(inode, bh);
2707 if (error) {
2708 EXT4_ERROR_INODE(inode, "bad block %llu (error %d)",
2709 EXT4_I(inode)->i_file_acl, error);
2183 goto cleanup; 2710 goto cleanup;
2184 } 2711 }
2185 ext4_xattr_inode_remove_all(handle, inode, bh,
2186 BFIRST(bh),
2187 true /* block_csum */,
2188 ea_inode_array,
2189 extra_credits);
2190 }
2191 2712
2192 ext4_xattr_release_block(handle, inode, bh); 2713 if (ext4_has_feature_ea_inode(inode->i_sb)) {
2193 /* Update i_file_acl within the same transaction that releases block. */ 2714 for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry);
2194 EXT4_I(inode)->i_file_acl = 0; 2715 entry = EXT4_XATTR_NEXT(entry))
2195 error = ext4_mark_inode_dirty(handle, inode); 2716 if (entry->e_value_inum)
2196 if (error) { 2717 ext4_xattr_inode_free_quota(inode,
2197 EXT4_ERROR_INODE(inode, "mark inode dirty (error %d)", 2718 le32_to_cpu(entry->e_value_size));
2198 error); 2719
2199 goto cleanup; 2720 }
2721
2722 ext4_xattr_release_block(handle, inode, bh, ea_inode_array,
2723 extra_credits);
2724 /*
2725 * Update i_file_acl value in the same transaction that releases
2726 * block.
2727 */
2728 EXT4_I(inode)->i_file_acl = 0;
2729 error = ext4_mark_inode_dirty(handle, inode);
2730 if (error) {
2731 EXT4_ERROR_INODE(inode, "mark inode dirty (error %d)",
2732 error);
2733 goto cleanup;
2734 }
2200 } 2735 }
2736 error = 0;
2201cleanup: 2737cleanup:
2202 brelse(iloc.bh); 2738 brelse(iloc.bh);
2203 brelse(bh); 2739 brelse(bh);
@@ -2206,17 +2742,13 @@ cleanup:
2206 2742
2207void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *ea_inode_array) 2743void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *ea_inode_array)
2208{ 2744{
2209 struct inode *ea_inode; 2745 int idx;
2210 int idx = 0;
2211 2746
2212 if (ea_inode_array == NULL) 2747 if (ea_inode_array == NULL)
2213 return; 2748 return;
2214 2749
2215 for (; idx < ea_inode_array->count; ++idx) { 2750 for (idx = 0; idx < ea_inode_array->count; ++idx)
2216 ea_inode = ea_inode_array->inodes[idx]; 2751 iput(ea_inode_array->inodes[idx]);
2217 clear_nlink(ea_inode);
2218 iput(ea_inode);
2219 }
2220 kfree(ea_inode_array); 2752 kfree(ea_inode_array);
2221} 2753}
2222 2754
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index b2005a2716d9..67616cb9a059 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -70,19 +70,6 @@ struct ext4_xattr_entry {
70#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) 70#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
71 71
72/* 72/*
73 * Link EA inode back to parent one using i_mtime field.
74 * Extra integer type conversion added to ignore higher
75 * bits in i_mtime.tv_sec which might be set by ext4_get()
76 */
77#define EXT4_XATTR_INODE_SET_PARENT(inode, inum) \
78do { \
79 (inode)->i_mtime.tv_sec = inum; \
80} while(0)
81
82#define EXT4_XATTR_INODE_GET_PARENT(inode) \
83((__u32)(inode)->i_mtime.tv_sec)
84
85/*
86 * The minimum size of EA value when you start storing it in an external inode 73 * The minimum size of EA value when you start storing it in an external inode
87 * size of block - size of header - size of 1 entry - 4 null bytes 74 * size of block - size of header - size of 1 entry - 4 null bytes
88*/ 75*/
@@ -165,9 +152,9 @@ extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
165extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t); 152extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
166extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int); 153extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
167extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int); 154extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
168extern int ext4_xattr_set_credits(struct inode *inode, size_t value_len); 155extern int ext4_xattr_set_credits(struct inode *inode, size_t value_len,
156 int *credits);
169 157
170extern int ext4_xattr_inode_unlink(struct inode *inode, unsigned long ea_ino);
171extern int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, 158extern int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
172 struct ext4_xattr_inode_array **array, 159 struct ext4_xattr_inode_array **array,
173 int extra_credits); 160 int extra_credits);