diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-24 15:55:26 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-24 15:55:26 -0400 |
commit | 0e01df100b6bf22a1de61b66657502a6454153c5 (patch) | |
tree | aae8f9787efc3014696b3e5ae854c1cf9e472bdd | |
parent | a56f489502e28caac56c8a0735549740f0ae0711 (diff) | |
parent | 12735f881952c32b31bc4e433768f18489f79ec9 (diff) |
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o:
"Fix a number of bugs, most notably a potential stale data exposure
after a crash and a potential BUG_ON crash if a file has the data
journalling flag enabled while it has dirty delayed allocation blocks
that haven't been written yet. Also fix a potential crash in the new
project quota code and a maliciously corrupted file system.
In addition, fix some DAX-specific bugs, including when there is a
transient ENOSPC situation and races between writes via direct I/O and
an mmap'ed segment that could lead to lost I/O.
Finally the usual set of miscellaneous cleanups"
* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (23 commits)
ext4: pre-zero allocated blocks for DAX IO
ext4: refactor direct IO code
ext4: fix race in transient ENOSPC detection
ext4: handle transient ENOSPC properly for DAX
dax: call get_blocks() with create == 1 for write faults to unwritten extents
ext4: remove unmeetable inconsisteny check from ext4_find_extent()
jbd2: remove excess descriptions for handle_s
ext4: remove unnecessary bio get/put
ext4: silence UBSAN in ext4_mb_init()
ext4: address UBSAN warning in mb_find_order_for_block()
ext4: fix oops on corrupted filesystem
ext4: fix check of dqget() return value in ext4_ioctl_setproject()
ext4: clean up error handling when orphan list is corrupted
ext4: fix hang when processing corrupted orphaned inode list
ext4: remove trailing \n from ext4_warning/ext4_error calls
ext4: fix races between changing inode journal mode and ext4_writepages
ext4: handle unwritten or delalloc buffers before enabling data journaling
ext4: fix jbd2 handle extension in ext4_ext_truncate_extend_restart()
ext4: do not ask jbd2 to write data for delalloc buffers
jbd2: add support for avoiding data writes during transaction commits
...
-rw-r--r-- | fs/compat.c | 4 | ||||
-rw-r--r-- | fs/dax.c | 2 | ||||
-rw-r--r-- | fs/ext4/balloc.c | 3 | ||||
-rw-r--r-- | fs/ext4/dir.c | 5 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 20 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.h | 15 | ||||
-rw-r--r-- | fs/ext4/extents.c | 20 | ||||
-rw-r--r-- | fs/ext4/extents_status.c | 2 | ||||
-rw-r--r-- | fs/ext4/file.c | 6 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 59 | ||||
-rw-r--r-- | fs/ext4/indirect.c | 127 | ||||
-rw-r--r-- | fs/ext4/inline.c | 2 | ||||
-rw-r--r-- | fs/ext4/inode.c | 323 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 2 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 12 | ||||
-rw-r--r-- | fs/ext4/mmp.c | 4 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 2 | ||||
-rw-r--r-- | fs/ext4/namei.c | 9 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 2 | ||||
-rw-r--r-- | fs/ext4/resize.c | 2 | ||||
-rw-r--r-- | fs/ext4/super.c | 4 | ||||
-rw-r--r-- | fs/jbd2/commit.c | 4 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 3 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 22 | ||||
-rw-r--r-- | fs/ocfs2/journal.h | 2 | ||||
-rw-r--r-- | fs/readdir.c | 4 | ||||
-rw-r--r-- | include/linux/jbd2.h | 16 | ||||
-rw-r--r-- | kernel/locking/percpu-rwsem.c | 1 |
28 files changed, 364 insertions, 313 deletions
diff --git a/fs/compat.c b/fs/compat.c index 8754e9aa14ad..be6e48b0a46c 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -936,6 +936,8 @@ static int compat_filldir(struct dir_context *ctx, const char *name, int namlen, | |||
936 | } | 936 | } |
937 | dirent = buf->previous; | 937 | dirent = buf->previous; |
938 | if (dirent) { | 938 | if (dirent) { |
939 | if (signal_pending(current)) | ||
940 | return -EINTR; | ||
939 | if (__put_user(offset, &dirent->d_off)) | 941 | if (__put_user(offset, &dirent->d_off)) |
940 | goto efault; | 942 | goto efault; |
941 | } | 943 | } |
@@ -1020,6 +1022,8 @@ static int compat_filldir64(struct dir_context *ctx, const char *name, | |||
1020 | dirent = buf->previous; | 1022 | dirent = buf->previous; |
1021 | 1023 | ||
1022 | if (dirent) { | 1024 | if (dirent) { |
1025 | if (signal_pending(current)) | ||
1026 | return -EINTR; | ||
1023 | if (__put_user_unaligned(offset, &dirent->d_off)) | 1027 | if (__put_user_unaligned(offset, &dirent->d_off)) |
1024 | goto efault; | 1028 | goto efault; |
1025 | } | 1029 | } |
@@ -676,7 +676,7 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, | |||
676 | if (error) | 676 | if (error) |
677 | goto unlock_page; | 677 | goto unlock_page; |
678 | 678 | ||
679 | if (!buffer_mapped(&bh) && !buffer_unwritten(&bh) && !vmf->cow_page) { | 679 | if (!buffer_mapped(&bh) && !vmf->cow_page) { |
680 | if (vmf->flags & FAULT_FLAG_WRITE) { | 680 | if (vmf->flags & FAULT_FLAG_WRITE) { |
681 | error = get_block(inode, block, &bh, 1); | 681 | error = get_block(inode, block, &bh, 1); |
682 | count_vm_event(PGMAJFAULT); | 682 | count_vm_event(PGMAJFAULT); |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index fe1f50fe764f..3020fd70c392 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -610,7 +610,8 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) | |||
610 | 610 | ||
611 | jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); | 611 | jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); |
612 | 612 | ||
613 | return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); | 613 | jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); |
614 | return 1; | ||
614 | } | 615 | } |
615 | 616 | ||
616 | /* | 617 | /* |
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 5d00bf060254..68323e3da3fa 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -150,6 +150,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) | |||
150 | while (ctx->pos < inode->i_size) { | 150 | while (ctx->pos < inode->i_size) { |
151 | struct ext4_map_blocks map; | 151 | struct ext4_map_blocks map; |
152 | 152 | ||
153 | if (fatal_signal_pending(current)) { | ||
154 | err = -ERESTARTSYS; | ||
155 | goto errout; | ||
156 | } | ||
157 | cond_resched(); | ||
153 | map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb); | 158 | map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb); |
154 | map.m_len = 1; | 159 | map.m_len = 1; |
155 | err = ext4_map_blocks(NULL, inode, &map, 0); | 160 | err = ext4_map_blocks(NULL, inode, &map, 0); |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 72f4c9e00e97..b84aa1ca480a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/ratelimit.h> | 33 | #include <linux/ratelimit.h> |
34 | #include <crypto/hash.h> | 34 | #include <crypto/hash.h> |
35 | #include <linux/falloc.h> | 35 | #include <linux/falloc.h> |
36 | #include <linux/percpu-rwsem.h> | ||
36 | #ifdef __KERNEL__ | 37 | #ifdef __KERNEL__ |
37 | #include <linux/compat.h> | 38 | #include <linux/compat.h> |
38 | #endif | 39 | #endif |
@@ -581,6 +582,9 @@ enum { | |||
581 | #define EXT4_GET_BLOCKS_ZERO 0x0200 | 582 | #define EXT4_GET_BLOCKS_ZERO 0x0200 |
582 | #define EXT4_GET_BLOCKS_CREATE_ZERO (EXT4_GET_BLOCKS_CREATE |\ | 583 | #define EXT4_GET_BLOCKS_CREATE_ZERO (EXT4_GET_BLOCKS_CREATE |\ |
583 | EXT4_GET_BLOCKS_ZERO) | 584 | EXT4_GET_BLOCKS_ZERO) |
585 | /* Caller will submit data before dropping transaction handle. This | ||
586 | * allows jbd2 to avoid submitting data before commit. */ | ||
587 | #define EXT4_GET_BLOCKS_IO_SUBMIT 0x0400 | ||
584 | 588 | ||
585 | /* | 589 | /* |
586 | * The bit position of these flags must not overlap with any of the | 590 | * The bit position of these flags must not overlap with any of the |
@@ -1505,6 +1509,9 @@ struct ext4_sb_info { | |||
1505 | struct ratelimit_state s_err_ratelimit_state; | 1509 | struct ratelimit_state s_err_ratelimit_state; |
1506 | struct ratelimit_state s_warning_ratelimit_state; | 1510 | struct ratelimit_state s_warning_ratelimit_state; |
1507 | struct ratelimit_state s_msg_ratelimit_state; | 1511 | struct ratelimit_state s_msg_ratelimit_state; |
1512 | |||
1513 | /* Barrier between changing inodes' journal flags and writepages ops. */ | ||
1514 | struct percpu_rw_semaphore s_journal_flag_rwsem; | ||
1508 | }; | 1515 | }; |
1509 | 1516 | ||
1510 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) | 1517 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) |
@@ -1549,7 +1556,6 @@ enum { | |||
1549 | EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read | 1556 | EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read |
1550 | nolocking */ | 1557 | nolocking */ |
1551 | EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ | 1558 | EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ |
1552 | EXT4_STATE_ORDERED_MODE, /* data=ordered mode */ | ||
1553 | EXT4_STATE_EXT_PRECACHED, /* extents have been precached */ | 1559 | EXT4_STATE_EXT_PRECACHED, /* extents have been precached */ |
1554 | }; | 1560 | }; |
1555 | 1561 | ||
@@ -2521,8 +2527,8 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int); | |||
2521 | struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int); | 2527 | struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int); |
2522 | int ext4_get_block_unwritten(struct inode *inode, sector_t iblock, | 2528 | int ext4_get_block_unwritten(struct inode *inode, sector_t iblock, |
2523 | struct buffer_head *bh_result, int create); | 2529 | struct buffer_head *bh_result, int create); |
2524 | int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock, | 2530 | int ext4_dax_get_block(struct inode *inode, sector_t iblock, |
2525 | struct buffer_head *bh_result, int create); | 2531 | struct buffer_head *bh_result, int create); |
2526 | int ext4_get_block(struct inode *inode, sector_t iblock, | 2532 | int ext4_get_block(struct inode *inode, sector_t iblock, |
2527 | struct buffer_head *bh_result, int create); | 2533 | struct buffer_head *bh_result, int create); |
2528 | int ext4_dio_get_block(struct inode *inode, sector_t iblock, | 2534 | int ext4_dio_get_block(struct inode *inode, sector_t iblock, |
@@ -2581,7 +2587,6 @@ extern int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk, | |||
2581 | /* indirect.c */ | 2587 | /* indirect.c */ |
2582 | extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | 2588 | extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, |
2583 | struct ext4_map_blocks *map, int flags); | 2589 | struct ext4_map_blocks *map, int flags); |
2584 | extern ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter); | ||
2585 | extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); | 2590 | extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); |
2586 | extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); | 2591 | extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); |
2587 | extern void ext4_ind_truncate(handle_t *, struct inode *inode); | 2592 | extern void ext4_ind_truncate(handle_t *, struct inode *inode); |
@@ -3329,6 +3334,13 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end) | |||
3329 | } | 3334 | } |
3330 | } | 3335 | } |
3331 | 3336 | ||
3337 | static inline bool ext4_aligned_io(struct inode *inode, loff_t off, loff_t len) | ||
3338 | { | ||
3339 | int blksize = 1 << inode->i_blkbits; | ||
3340 | |||
3341 | return IS_ALIGNED(off, blksize) && IS_ALIGNED(len, blksize); | ||
3342 | } | ||
3343 | |||
3332 | #endif /* __KERNEL__ */ | 3344 | #endif /* __KERNEL__ */ |
3333 | 3345 | ||
3334 | #define EFSBADCRC EBADMSG /* Bad CRC detected */ | 3346 | #define EFSBADCRC EBADMSG /* Bad CRC detected */ |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 5f5846211095..09c1ef38cbe6 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
@@ -359,10 +359,21 @@ static inline int ext4_journal_force_commit(journal_t *journal) | |||
359 | return 0; | 359 | return 0; |
360 | } | 360 | } |
361 | 361 | ||
362 | static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode) | 362 | static inline int ext4_jbd2_inode_add_write(handle_t *handle, |
363 | struct inode *inode) | ||
363 | { | 364 | { |
364 | if (ext4_handle_valid(handle)) | 365 | if (ext4_handle_valid(handle)) |
365 | return jbd2_journal_file_inode(handle, EXT4_I(inode)->jinode); | 366 | return jbd2_journal_inode_add_write(handle, |
367 | EXT4_I(inode)->jinode); | ||
368 | return 0; | ||
369 | } | ||
370 | |||
371 | static inline int ext4_jbd2_inode_add_wait(handle_t *handle, | ||
372 | struct inode *inode) | ||
373 | { | ||
374 | if (ext4_handle_valid(handle)) | ||
375 | return jbd2_journal_inode_add_wait(handle, | ||
376 | EXT4_I(inode)->jinode); | ||
366 | return 0; | 377 | return 0; |
367 | } | 378 | } |
368 | 379 | ||
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 95bf4679ac54..2a2eef9c14e4 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -120,9 +120,14 @@ static int ext4_ext_truncate_extend_restart(handle_t *handle, | |||
120 | 120 | ||
121 | if (!ext4_handle_valid(handle)) | 121 | if (!ext4_handle_valid(handle)) |
122 | return 0; | 122 | return 0; |
123 | if (handle->h_buffer_credits > needed) | 123 | if (handle->h_buffer_credits >= needed) |
124 | return 0; | 124 | return 0; |
125 | err = ext4_journal_extend(handle, needed); | 125 | /* |
126 | * If we need to extend the journal get a few extra blocks | ||
127 | * while we're at it for efficiency's sake. | ||
128 | */ | ||
129 | needed += 3; | ||
130 | err = ext4_journal_extend(handle, needed - handle->h_buffer_credits); | ||
126 | if (err <= 0) | 131 | if (err <= 0) |
127 | return err; | 132 | return err; |
128 | err = ext4_truncate_restart_trans(handle, inode, needed); | 133 | err = ext4_truncate_restart_trans(handle, inode, needed); |
@@ -907,13 +912,6 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, | |||
907 | 912 | ||
908 | eh = ext_block_hdr(bh); | 913 | eh = ext_block_hdr(bh); |
909 | ppos++; | 914 | ppos++; |
910 | if (unlikely(ppos > depth)) { | ||
911 | put_bh(bh); | ||
912 | EXT4_ERROR_INODE(inode, | ||
913 | "ppos %d > depth %d", ppos, depth); | ||
914 | ret = -EFSCORRUPTED; | ||
915 | goto err; | ||
916 | } | ||
917 | path[ppos].p_bh = bh; | 915 | path[ppos].p_bh = bh; |
918 | path[ppos].p_hdr = eh; | 916 | path[ppos].p_hdr = eh; |
919 | } | 917 | } |
@@ -2583,7 +2581,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2583 | } | 2581 | } |
2584 | } else | 2582 | } else |
2585 | ext4_error(sbi->s_sb, "strange request: removal(2) " | 2583 | ext4_error(sbi->s_sb, "strange request: removal(2) " |
2586 | "%u-%u from %u:%u\n", | 2584 | "%u-%u from %u:%u", |
2587 | from, to, le32_to_cpu(ex->ee_block), ee_len); | 2585 | from, to, le32_to_cpu(ex->ee_block), ee_len); |
2588 | return 0; | 2586 | return 0; |
2589 | } | 2587 | } |
@@ -3738,7 +3736,7 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, | |||
3738 | if (ee_block != map->m_lblk || ee_len > map->m_len) { | 3736 | if (ee_block != map->m_lblk || ee_len > map->m_len) { |
3739 | #ifdef EXT4_DEBUG | 3737 | #ifdef EXT4_DEBUG |
3740 | ext4_warning("Inode (%ld) finished: extent logical block %llu," | 3738 | ext4_warning("Inode (%ld) finished: extent logical block %llu," |
3741 | " len %u; IO logical block %llu, len %u\n", | 3739 | " len %u; IO logical block %llu, len %u", |
3742 | inode->i_ino, (unsigned long long)ee_block, ee_len, | 3740 | inode->i_ino, (unsigned long long)ee_block, ee_len, |
3743 | (unsigned long long)map->m_lblk, map->m_len); | 3741 | (unsigned long long)map->m_lblk, map->m_len); |
3744 | #endif | 3742 | #endif |
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index e38b987ac7f5..37e059202cd2 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c | |||
@@ -707,7 +707,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, | |||
707 | (status & EXTENT_STATUS_WRITTEN)) { | 707 | (status & EXTENT_STATUS_WRITTEN)) { |
708 | ext4_warning(inode->i_sb, "Inserting extent [%u/%u] as " | 708 | ext4_warning(inode->i_sb, "Inserting extent [%u/%u] as " |
709 | " delayed and written which can potentially " | 709 | " delayed and written which can potentially " |
710 | " cause data loss.\n", lblk, len); | 710 | " cause data loss.", lblk, len); |
711 | WARN_ON(1); | 711 | WARN_ON(1); |
712 | } | 712 | } |
713 | 713 | ||
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 00ff6912adb3..d478110c32a6 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -202,7 +202,7 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
202 | if (IS_ERR(handle)) | 202 | if (IS_ERR(handle)) |
203 | result = VM_FAULT_SIGBUS; | 203 | result = VM_FAULT_SIGBUS; |
204 | else | 204 | else |
205 | result = __dax_fault(vma, vmf, ext4_dax_mmap_get_block, NULL); | 205 | result = __dax_fault(vma, vmf, ext4_dax_get_block, NULL); |
206 | 206 | ||
207 | if (write) { | 207 | if (write) { |
208 | if (!IS_ERR(handle)) | 208 | if (!IS_ERR(handle)) |
@@ -238,7 +238,7 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, | |||
238 | result = VM_FAULT_SIGBUS; | 238 | result = VM_FAULT_SIGBUS; |
239 | else | 239 | else |
240 | result = __dax_pmd_fault(vma, addr, pmd, flags, | 240 | result = __dax_pmd_fault(vma, addr, pmd, flags, |
241 | ext4_dax_mmap_get_block, NULL); | 241 | ext4_dax_get_block, NULL); |
242 | 242 | ||
243 | if (write) { | 243 | if (write) { |
244 | if (!IS_ERR(handle)) | 244 | if (!IS_ERR(handle)) |
@@ -373,7 +373,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp) | |||
373 | if (ext4_encrypted_inode(d_inode(dir)) && | 373 | if (ext4_encrypted_inode(d_inode(dir)) && |
374 | !ext4_is_child_context_consistent_with_parent(d_inode(dir), inode)) { | 374 | !ext4_is_child_context_consistent_with_parent(d_inode(dir), inode)) { |
375 | ext4_warning(inode->i_sb, | 375 | ext4_warning(inode->i_sb, |
376 | "Inconsistent encryption contexts: %lu/%lu\n", | 376 | "Inconsistent encryption contexts: %lu/%lu", |
377 | (unsigned long) d_inode(dir)->i_ino, | 377 | (unsigned long) d_inode(dir)->i_ino, |
378 | (unsigned long) inode->i_ino); | 378 | (unsigned long) inode->i_ino); |
379 | dput(dir); | 379 | dput(dir); |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 237b877d316d..3da4cf8d18b6 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -1150,25 +1150,20 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) | |||
1150 | unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count); | 1150 | unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count); |
1151 | ext4_group_t block_group; | 1151 | ext4_group_t block_group; |
1152 | int bit; | 1152 | int bit; |
1153 | struct buffer_head *bitmap_bh; | 1153 | struct buffer_head *bitmap_bh = NULL; |
1154 | struct inode *inode = NULL; | 1154 | struct inode *inode = NULL; |
1155 | long err = -EIO; | 1155 | int err = -EFSCORRUPTED; |
1156 | 1156 | ||
1157 | /* Error cases - e2fsck has already cleaned up for us */ | 1157 | if (ino < EXT4_FIRST_INO(sb) || ino > max_ino) |
1158 | if (ino > max_ino) { | 1158 | goto bad_orphan; |
1159 | ext4_warning(sb, "bad orphan ino %lu! e2fsck was run?", ino); | ||
1160 | err = -EFSCORRUPTED; | ||
1161 | goto error; | ||
1162 | } | ||
1163 | 1159 | ||
1164 | block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); | 1160 | block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); |
1165 | bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); | 1161 | bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); |
1166 | bitmap_bh = ext4_read_inode_bitmap(sb, block_group); | 1162 | bitmap_bh = ext4_read_inode_bitmap(sb, block_group); |
1167 | if (IS_ERR(bitmap_bh)) { | 1163 | if (IS_ERR(bitmap_bh)) { |
1168 | err = PTR_ERR(bitmap_bh); | 1164 | ext4_error(sb, "inode bitmap error %ld for orphan %lu", |
1169 | ext4_warning(sb, "inode bitmap error %ld for orphan %lu", | 1165 | ino, PTR_ERR(bitmap_bh)); |
1170 | ino, err); | 1166 | return (struct inode *) bitmap_bh; |
1171 | goto error; | ||
1172 | } | 1167 | } |
1173 | 1168 | ||
1174 | /* Having the inode bit set should be a 100% indicator that this | 1169 | /* Having the inode bit set should be a 100% indicator that this |
@@ -1179,15 +1174,21 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) | |||
1179 | goto bad_orphan; | 1174 | goto bad_orphan; |
1180 | 1175 | ||
1181 | inode = ext4_iget(sb, ino); | 1176 | inode = ext4_iget(sb, ino); |
1182 | if (IS_ERR(inode)) | 1177 | if (IS_ERR(inode)) { |
1183 | goto iget_failed; | 1178 | err = PTR_ERR(inode); |
1179 | ext4_error(sb, "couldn't read orphan inode %lu (err %d)", | ||
1180 | ino, err); | ||
1181 | return inode; | ||
1182 | } | ||
1184 | 1183 | ||
1185 | /* | 1184 | /* |
1186 | * If the orphans has i_nlinks > 0 then it should be able to be | 1185 | * If the orphans has i_nlinks > 0 then it should be able to |
1187 | * truncated, otherwise it won't be removed from the orphan list | 1186 | * be truncated, otherwise it won't be removed from the orphan |
1188 | * during processing and an infinite loop will result. | 1187 | * list during processing and an infinite loop will result. |
1188 | * Similarly, it must not be a bad inode. | ||
1189 | */ | 1189 | */ |
1190 | if (inode->i_nlink && !ext4_can_truncate(inode)) | 1190 | if ((inode->i_nlink && !ext4_can_truncate(inode)) || |
1191 | is_bad_inode(inode)) | ||
1191 | goto bad_orphan; | 1192 | goto bad_orphan; |
1192 | 1193 | ||
1193 | if (NEXT_ORPHAN(inode) > max_ino) | 1194 | if (NEXT_ORPHAN(inode) > max_ino) |
@@ -1195,29 +1196,25 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) | |||
1195 | brelse(bitmap_bh); | 1196 | brelse(bitmap_bh); |
1196 | return inode; | 1197 | return inode; |
1197 | 1198 | ||
1198 | iget_failed: | ||
1199 | err = PTR_ERR(inode); | ||
1200 | inode = NULL; | ||
1201 | bad_orphan: | 1199 | bad_orphan: |
1202 | ext4_warning(sb, "bad orphan inode %lu! e2fsck was run?", ino); | 1200 | ext4_error(sb, "bad orphan inode %lu", ino); |
1203 | printk(KERN_WARNING "ext4_test_bit(bit=%d, block=%llu) = %d\n", | 1201 | if (bitmap_bh) |
1204 | bit, (unsigned long long)bitmap_bh->b_blocknr, | 1202 | printk(KERN_ERR "ext4_test_bit(bit=%d, block=%llu) = %d\n", |
1205 | ext4_test_bit(bit, bitmap_bh->b_data)); | 1203 | bit, (unsigned long long)bitmap_bh->b_blocknr, |
1206 | printk(KERN_WARNING "inode=%p\n", inode); | 1204 | ext4_test_bit(bit, bitmap_bh->b_data)); |
1207 | if (inode) { | 1205 | if (inode) { |
1208 | printk(KERN_WARNING "is_bad_inode(inode)=%d\n", | 1206 | printk(KERN_ERR "is_bad_inode(inode)=%d\n", |
1209 | is_bad_inode(inode)); | 1207 | is_bad_inode(inode)); |
1210 | printk(KERN_WARNING "NEXT_ORPHAN(inode)=%u\n", | 1208 | printk(KERN_ERR "NEXT_ORPHAN(inode)=%u\n", |
1211 | NEXT_ORPHAN(inode)); | 1209 | NEXT_ORPHAN(inode)); |
1212 | printk(KERN_WARNING "max_ino=%lu\n", max_ino); | 1210 | printk(KERN_ERR "max_ino=%lu\n", max_ino); |
1213 | printk(KERN_WARNING "i_nlink=%u\n", inode->i_nlink); | 1211 | printk(KERN_ERR "i_nlink=%u\n", inode->i_nlink); |
1214 | /* Avoid freeing blocks if we got a bad deleted inode */ | 1212 | /* Avoid freeing blocks if we got a bad deleted inode */ |
1215 | if (inode->i_nlink == 0) | 1213 | if (inode->i_nlink == 0) |
1216 | inode->i_blocks = 0; | 1214 | inode->i_blocks = 0; |
1217 | iput(inode); | 1215 | iput(inode); |
1218 | } | 1216 | } |
1219 | brelse(bitmap_bh); | 1217 | brelse(bitmap_bh); |
1220 | error: | ||
1221 | return ERR_PTR(err); | 1218 | return ERR_PTR(err); |
1222 | } | 1219 | } |
1223 | 1220 | ||
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 627b7e8f9ef3..bc15c2c17633 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c | |||
@@ -649,133 +649,6 @@ out: | |||
649 | } | 649 | } |
650 | 650 | ||
651 | /* | 651 | /* |
652 | * O_DIRECT for ext3 (or indirect map) based files | ||
653 | * | ||
654 | * If the O_DIRECT write will extend the file then add this inode to the | ||
655 | * orphan list. So recovery will truncate it back to the original size | ||
656 | * if the machine crashes during the write. | ||
657 | * | ||
658 | * If the O_DIRECT write is intantiating holes inside i_size and the machine | ||
659 | * crashes then stale disk data _may_ be exposed inside the file. But current | ||
660 | * VFS code falls back into buffered path in that case so we are safe. | ||
661 | */ | ||
662 | ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter) | ||
663 | { | ||
664 | struct file *file = iocb->ki_filp; | ||
665 | struct inode *inode = file->f_mapping->host; | ||
666 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
667 | loff_t offset = iocb->ki_pos; | ||
668 | handle_t *handle; | ||
669 | ssize_t ret; | ||
670 | int orphan = 0; | ||
671 | size_t count = iov_iter_count(iter); | ||
672 | int retries = 0; | ||
673 | |||
674 | if (iov_iter_rw(iter) == WRITE) { | ||
675 | loff_t final_size = offset + count; | ||
676 | |||
677 | if (final_size > inode->i_size) { | ||
678 | /* Credits for sb + inode write */ | ||
679 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); | ||
680 | if (IS_ERR(handle)) { | ||
681 | ret = PTR_ERR(handle); | ||
682 | goto out; | ||
683 | } | ||
684 | ret = ext4_orphan_add(handle, inode); | ||
685 | if (ret) { | ||
686 | ext4_journal_stop(handle); | ||
687 | goto out; | ||
688 | } | ||
689 | orphan = 1; | ||
690 | ei->i_disksize = inode->i_size; | ||
691 | ext4_journal_stop(handle); | ||
692 | } | ||
693 | } | ||
694 | |||
695 | retry: | ||
696 | if (iov_iter_rw(iter) == READ && ext4_should_dioread_nolock(inode)) { | ||
697 | /* | ||
698 | * Nolock dioread optimization may be dynamically disabled | ||
699 | * via ext4_inode_block_unlocked_dio(). Check inode's state | ||
700 | * while holding extra i_dio_count ref. | ||
701 | */ | ||
702 | inode_dio_begin(inode); | ||
703 | smp_mb(); | ||
704 | if (unlikely(ext4_test_inode_state(inode, | ||
705 | EXT4_STATE_DIOREAD_LOCK))) { | ||
706 | inode_dio_end(inode); | ||
707 | goto locked; | ||
708 | } | ||
709 | if (IS_DAX(inode)) | ||
710 | ret = dax_do_io(iocb, inode, iter, | ||
711 | ext4_dio_get_block, NULL, 0); | ||
712 | else | ||
713 | ret = __blockdev_direct_IO(iocb, inode, | ||
714 | inode->i_sb->s_bdev, iter, | ||
715 | ext4_dio_get_block, | ||
716 | NULL, NULL, 0); | ||
717 | inode_dio_end(inode); | ||
718 | } else { | ||
719 | locked: | ||
720 | if (IS_DAX(inode)) | ||
721 | ret = dax_do_io(iocb, inode, iter, | ||
722 | ext4_dio_get_block, NULL, DIO_LOCKING); | ||
723 | else | ||
724 | ret = blockdev_direct_IO(iocb, inode, iter, | ||
725 | ext4_dio_get_block); | ||
726 | |||
727 | if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { | ||
728 | loff_t isize = i_size_read(inode); | ||
729 | loff_t end = offset + count; | ||
730 | |||
731 | if (end > isize) | ||
732 | ext4_truncate_failed_write(inode); | ||
733 | } | ||
734 | } | ||
735 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | ||
736 | goto retry; | ||
737 | |||
738 | if (orphan) { | ||
739 | int err; | ||
740 | |||
741 | /* Credits for sb + inode write */ | ||
742 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); | ||
743 | if (IS_ERR(handle)) { | ||
744 | /* This is really bad luck. We've written the data | ||
745 | * but cannot extend i_size. Bail out and pretend | ||
746 | * the write failed... */ | ||
747 | ret = PTR_ERR(handle); | ||
748 | if (inode->i_nlink) | ||
749 | ext4_orphan_del(NULL, inode); | ||
750 | |||
751 | goto out; | ||
752 | } | ||
753 | if (inode->i_nlink) | ||
754 | ext4_orphan_del(handle, inode); | ||
755 | if (ret > 0) { | ||
756 | loff_t end = offset + ret; | ||
757 | if (end > inode->i_size) { | ||
758 | ei->i_disksize = end; | ||
759 | i_size_write(inode, end); | ||
760 | /* | ||
761 | * We're going to return a positive `ret' | ||
762 | * here due to non-zero-length I/O, so there's | ||
763 | * no way of reporting error returns from | ||
764 | * ext4_mark_inode_dirty() to userspace. So | ||
765 | * ignore it. | ||
766 | */ | ||
767 | ext4_mark_inode_dirty(handle, inode); | ||
768 | } | ||
769 | } | ||
770 | err = ext4_journal_stop(handle); | ||
771 | if (ret == 0) | ||
772 | ret = err; | ||
773 | } | ||
774 | out: | ||
775 | return ret; | ||
776 | } | ||
777 | |||
778 | /* | ||
779 | * Calculate the number of metadata blocks need to reserve | 652 | * Calculate the number of metadata blocks need to reserve |
780 | * to allocate a new block at @lblocks for non extent file based file | 653 | * to allocate a new block at @lblocks for non extent file based file |
781 | */ | 654 | */ |
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 7bc6c855cc18..ff7538c26992 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c | |||
@@ -1780,7 +1780,7 @@ int empty_inline_dir(struct inode *dir, int *has_inline_data) | |||
1780 | ext4_warning(dir->i_sb, | 1780 | ext4_warning(dir->i_sb, |
1781 | "bad inline directory (dir #%lu) - " | 1781 | "bad inline directory (dir #%lu) - " |
1782 | "inode %u, rec_len %u, name_len %d" | 1782 | "inode %u, rec_len %u, name_len %d" |
1783 | "inline size %d\n", | 1783 | "inline size %d", |
1784 | dir->i_ino, le32_to_cpu(de->inode), | 1784 | dir->i_ino, le32_to_cpu(de->inode), |
1785 | le16_to_cpu(de->rec_len), de->name_len, | 1785 | le16_to_cpu(de->rec_len), de->name_len, |
1786 | inline_size); | 1786 | inline_size); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 79b298d397b4..f7140ca66e3b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -684,6 +684,24 @@ out_sem: | |||
684 | ret = check_block_validity(inode, map); | 684 | ret = check_block_validity(inode, map); |
685 | if (ret != 0) | 685 | if (ret != 0) |
686 | return ret; | 686 | return ret; |
687 | |||
688 | /* | ||
689 | * Inodes with freshly allocated blocks where contents will be | ||
690 | * visible after transaction commit must be on transaction's | ||
691 | * ordered data list. | ||
692 | */ | ||
693 | if (map->m_flags & EXT4_MAP_NEW && | ||
694 | !(map->m_flags & EXT4_MAP_UNWRITTEN) && | ||
695 | !(flags & EXT4_GET_BLOCKS_ZERO) && | ||
696 | !IS_NOQUOTA(inode) && | ||
697 | ext4_should_order_data(inode)) { | ||
698 | if (flags & EXT4_GET_BLOCKS_IO_SUBMIT) | ||
699 | ret = ext4_jbd2_inode_add_wait(handle, inode); | ||
700 | else | ||
701 | ret = ext4_jbd2_inode_add_write(handle, inode); | ||
702 | if (ret) | ||
703 | return ret; | ||
704 | } | ||
687 | } | 705 | } |
688 | return retval; | 706 | return retval; |
689 | } | 707 | } |
@@ -1289,15 +1307,6 @@ static int ext4_write_end(struct file *file, | |||
1289 | int i_size_changed = 0; | 1307 | int i_size_changed = 0; |
1290 | 1308 | ||
1291 | trace_ext4_write_end(inode, pos, len, copied); | 1309 | trace_ext4_write_end(inode, pos, len, copied); |
1292 | if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE)) { | ||
1293 | ret = ext4_jbd2_file_inode(handle, inode); | ||
1294 | if (ret) { | ||
1295 | unlock_page(page); | ||
1296 | put_page(page); | ||
1297 | goto errout; | ||
1298 | } | ||
1299 | } | ||
1300 | |||
1301 | if (ext4_has_inline_data(inode)) { | 1310 | if (ext4_has_inline_data(inode)) { |
1302 | ret = ext4_write_inline_data_end(inode, pos, len, | 1311 | ret = ext4_write_inline_data_end(inode, pos, len, |
1303 | copied, page); | 1312 | copied, page); |
@@ -2313,7 +2322,8 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) | |||
2313 | * the data was copied into the page cache. | 2322 | * the data was copied into the page cache. |
2314 | */ | 2323 | */ |
2315 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE | | 2324 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE | |
2316 | EXT4_GET_BLOCKS_METADATA_NOFAIL; | 2325 | EXT4_GET_BLOCKS_METADATA_NOFAIL | |
2326 | EXT4_GET_BLOCKS_IO_SUBMIT; | ||
2317 | dioread_nolock = ext4_should_dioread_nolock(inode); | 2327 | dioread_nolock = ext4_should_dioread_nolock(inode); |
2318 | if (dioread_nolock) | 2328 | if (dioread_nolock) |
2319 | get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; | 2329 | get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; |
@@ -2602,11 +2612,14 @@ static int ext4_writepages(struct address_space *mapping, | |||
2602 | struct blk_plug plug; | 2612 | struct blk_plug plug; |
2603 | bool give_up_on_write = false; | 2613 | bool give_up_on_write = false; |
2604 | 2614 | ||
2615 | percpu_down_read(&sbi->s_journal_flag_rwsem); | ||
2605 | trace_ext4_writepages(inode, wbc); | 2616 | trace_ext4_writepages(inode, wbc); |
2606 | 2617 | ||
2607 | if (dax_mapping(mapping)) | 2618 | if (dax_mapping(mapping)) { |
2608 | return dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, | 2619 | ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, |
2609 | wbc); | 2620 | wbc); |
2621 | goto out_writepages; | ||
2622 | } | ||
2610 | 2623 | ||
2611 | /* | 2624 | /* |
2612 | * No pages to write? This is mainly a kludge to avoid starting | 2625 | * No pages to write? This is mainly a kludge to avoid starting |
@@ -2776,6 +2789,7 @@ retry: | |||
2776 | out_writepages: | 2789 | out_writepages: |
2777 | trace_ext4_writepages_result(inode, wbc, ret, | 2790 | trace_ext4_writepages_result(inode, wbc, ret, |
2778 | nr_to_write - wbc->nr_to_write); | 2791 | nr_to_write - wbc->nr_to_write); |
2792 | percpu_up_read(&sbi->s_journal_flag_rwsem); | ||
2779 | return ret; | 2793 | return ret; |
2780 | } | 2794 | } |
2781 | 2795 | ||
@@ -3215,75 +3229,52 @@ static int ext4_releasepage(struct page *page, gfp_t wait) | |||
3215 | } | 3229 | } |
3216 | 3230 | ||
3217 | #ifdef CONFIG_FS_DAX | 3231 | #ifdef CONFIG_FS_DAX |
3218 | int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock, | 3232 | /* |
3219 | struct buffer_head *bh_result, int create) | 3233 | * Get block function for DAX IO and mmap faults. It takes care of converting |
3234 | * unwritten extents to written ones and initializes new / converted blocks | ||
3235 | * to zeros. | ||
3236 | */ | ||
3237 | int ext4_dax_get_block(struct inode *inode, sector_t iblock, | ||
3238 | struct buffer_head *bh_result, int create) | ||
3220 | { | 3239 | { |
3221 | int ret, err; | 3240 | int ret; |
3222 | int credits; | ||
3223 | struct ext4_map_blocks map; | ||
3224 | handle_t *handle = NULL; | ||
3225 | int flags = 0; | ||
3226 | |||
3227 | ext4_debug("ext4_dax_mmap_get_block: inode %lu, create flag %d\n", | ||
3228 | inode->i_ino, create); | ||
3229 | map.m_lblk = iblock; | ||
3230 | map.m_len = bh_result->b_size >> inode->i_blkbits; | ||
3231 | credits = ext4_chunk_trans_blocks(inode, map.m_len); | ||
3232 | if (create) { | ||
3233 | flags |= EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_CREATE_ZERO; | ||
3234 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits); | ||
3235 | if (IS_ERR(handle)) { | ||
3236 | ret = PTR_ERR(handle); | ||
3237 | return ret; | ||
3238 | } | ||
3239 | } | ||
3240 | 3241 | ||
3241 | ret = ext4_map_blocks(handle, inode, &map, flags); | 3242 | ext4_debug("inode %lu, create flag %d\n", inode->i_ino, create); |
3242 | if (create) { | 3243 | if (!create) |
3243 | err = ext4_journal_stop(handle); | 3244 | return _ext4_get_block(inode, iblock, bh_result, 0); |
3244 | if (ret >= 0 && err < 0) | ||
3245 | ret = err; | ||
3246 | } | ||
3247 | if (ret <= 0) | ||
3248 | goto out; | ||
3249 | if (map.m_flags & EXT4_MAP_UNWRITTEN) { | ||
3250 | int err2; | ||
3251 | 3245 | ||
3252 | /* | 3246 | ret = ext4_get_block_trans(inode, iblock, bh_result, |
3253 | * We are protected by i_mmap_sem so we know block cannot go | 3247 | EXT4_GET_BLOCKS_PRE_IO | |
3254 | * away from under us even though we dropped i_data_sem. | 3248 | EXT4_GET_BLOCKS_CREATE_ZERO); |
3255 | * Convert extent to written and write zeros there. | 3249 | if (ret < 0) |
3256 | * | 3250 | return ret; |
3257 | * Note: We may get here even when create == 0. | ||
3258 | */ | ||
3259 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits); | ||
3260 | if (IS_ERR(handle)) { | ||
3261 | ret = PTR_ERR(handle); | ||
3262 | goto out; | ||
3263 | } | ||
3264 | 3251 | ||
3265 | err = ext4_map_blocks(handle, inode, &map, | 3252 | if (buffer_unwritten(bh_result)) { |
3266 | EXT4_GET_BLOCKS_CONVERT | EXT4_GET_BLOCKS_CREATE_ZERO); | ||
3267 | if (err < 0) | ||
3268 | ret = err; | ||
3269 | err2 = ext4_journal_stop(handle); | ||
3270 | if (err2 < 0 && ret > 0) | ||
3271 | ret = err2; | ||
3272 | } | ||
3273 | out: | ||
3274 | WARN_ON_ONCE(ret == 0 && create); | ||
3275 | if (ret > 0) { | ||
3276 | map_bh(bh_result, inode->i_sb, map.m_pblk); | ||
3277 | /* | 3253 | /* |
3278 | * At least for now we have to clear BH_New so that DAX code | 3254 | * We are protected by i_mmap_sem or i_mutex so we know block |
3279 | * doesn't attempt to zero blocks again in a racy way. | 3255 | * cannot go away from under us even though we dropped |
3256 | * i_data_sem. Convert extent to written and write zeros there. | ||
3280 | */ | 3257 | */ |
3281 | map.m_flags &= ~EXT4_MAP_NEW; | 3258 | ret = ext4_get_block_trans(inode, iblock, bh_result, |
3282 | ext4_update_bh_state(bh_result, map.m_flags); | 3259 | EXT4_GET_BLOCKS_CONVERT | |
3283 | bh_result->b_size = map.m_len << inode->i_blkbits; | 3260 | EXT4_GET_BLOCKS_CREATE_ZERO); |
3284 | ret = 0; | 3261 | if (ret < 0) |
3262 | return ret; | ||
3285 | } | 3263 | } |
3286 | return ret; | 3264 | /* |
3265 | * At least for now we have to clear BH_New so that DAX code | ||
3266 | * doesn't attempt to zero blocks again in a racy way. | ||
3267 | */ | ||
3268 | clear_buffer_new(bh_result); | ||
3269 | return 0; | ||
3270 | } | ||
3271 | #else | ||
3272 | /* Just define empty function, it will never get called. */ | ||
3273 | int ext4_dax_get_block(struct inode *inode, sector_t iblock, | ||
3274 | struct buffer_head *bh_result, int create) | ||
3275 | { | ||
3276 | BUG(); | ||
3277 | return 0; | ||
3287 | } | 3278 | } |
3288 | #endif | 3279 | #endif |
3289 | 3280 | ||
@@ -3316,7 +3307,9 @@ static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3316 | } | 3307 | } |
3317 | 3308 | ||
3318 | /* | 3309 | /* |
3319 | * For ext4 extent files, ext4 will do direct-io write to holes, | 3310 | * Handling of direct IO writes. |
3311 | * | ||
3312 | * For ext4 extent files, ext4 will do direct-io write even to holes, | ||
3320 | * preallocated extents, and those write extend the file, no need to | 3313 | * preallocated extents, and those write extend the file, no need to |
3321 | * fall back to buffered IO. | 3314 | * fall back to buffered IO. |
3322 | * | 3315 | * |
@@ -3334,10 +3327,11 @@ static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3334 | * if the machine crashes during the write. | 3327 | * if the machine crashes during the write. |
3335 | * | 3328 | * |
3336 | */ | 3329 | */ |
3337 | static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter) | 3330 | static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter) |
3338 | { | 3331 | { |
3339 | struct file *file = iocb->ki_filp; | 3332 | struct file *file = iocb->ki_filp; |
3340 | struct inode *inode = file->f_mapping->host; | 3333 | struct inode *inode = file->f_mapping->host; |
3334 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
3341 | ssize_t ret; | 3335 | ssize_t ret; |
3342 | loff_t offset = iocb->ki_pos; | 3336 | loff_t offset = iocb->ki_pos; |
3343 | size_t count = iov_iter_count(iter); | 3337 | size_t count = iov_iter_count(iter); |
@@ -3345,10 +3339,25 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter) | |||
3345 | get_block_t *get_block_func = NULL; | 3339 | get_block_t *get_block_func = NULL; |
3346 | int dio_flags = 0; | 3340 | int dio_flags = 0; |
3347 | loff_t final_size = offset + count; | 3341 | loff_t final_size = offset + count; |
3342 | int orphan = 0; | ||
3343 | handle_t *handle; | ||
3348 | 3344 | ||
3349 | /* Use the old path for reads and writes beyond i_size. */ | 3345 | if (final_size > inode->i_size) { |
3350 | if (iov_iter_rw(iter) != WRITE || final_size > inode->i_size) | 3346 | /* Credits for sb + inode write */ |
3351 | return ext4_ind_direct_IO(iocb, iter); | 3347 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); |
3348 | if (IS_ERR(handle)) { | ||
3349 | ret = PTR_ERR(handle); | ||
3350 | goto out; | ||
3351 | } | ||
3352 | ret = ext4_orphan_add(handle, inode); | ||
3353 | if (ret) { | ||
3354 | ext4_journal_stop(handle); | ||
3355 | goto out; | ||
3356 | } | ||
3357 | orphan = 1; | ||
3358 | ei->i_disksize = inode->i_size; | ||
3359 | ext4_journal_stop(handle); | ||
3360 | } | ||
3352 | 3361 | ||
3353 | BUG_ON(iocb->private == NULL); | 3362 | BUG_ON(iocb->private == NULL); |
3354 | 3363 | ||
@@ -3357,8 +3366,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter) | |||
3357 | * conversion. This also disallows race between truncate() and | 3366 | * conversion. This also disallows race between truncate() and |
3358 | * overwrite DIO as i_dio_count needs to be incremented under i_mutex. | 3367 | * overwrite DIO as i_dio_count needs to be incremented under i_mutex. |
3359 | */ | 3368 | */ |
3360 | if (iov_iter_rw(iter) == WRITE) | 3369 | inode_dio_begin(inode); |
3361 | inode_dio_begin(inode); | ||
3362 | 3370 | ||
3363 | /* If we do a overwrite dio, i_mutex locking can be released */ | 3371 | /* If we do a overwrite dio, i_mutex locking can be released */ |
3364 | overwrite = *((int *)iocb->private); | 3372 | overwrite = *((int *)iocb->private); |
@@ -3367,7 +3375,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter) | |||
3367 | inode_unlock(inode); | 3375 | inode_unlock(inode); |
3368 | 3376 | ||
3369 | /* | 3377 | /* |
3370 | * We could direct write to holes and fallocate. | 3378 | * For extent mapped files we could direct write to holes and fallocate. |
3371 | * | 3379 | * |
3372 | * Allocated blocks to fill the hole are marked as unwritten to prevent | 3380 | * Allocated blocks to fill the hole are marked as unwritten to prevent |
3373 | * parallel buffered read to expose the stale data before DIO complete | 3381 | * parallel buffered read to expose the stale data before DIO complete |
@@ -3389,7 +3397,23 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter) | |||
3389 | iocb->private = NULL; | 3397 | iocb->private = NULL; |
3390 | if (overwrite) | 3398 | if (overwrite) |
3391 | get_block_func = ext4_dio_get_block_overwrite; | 3399 | get_block_func = ext4_dio_get_block_overwrite; |
3392 | else if (is_sync_kiocb(iocb)) { | 3400 | else if (IS_DAX(inode)) { |
3401 | /* | ||
3402 | * We can avoid zeroing for aligned DAX writes beyond EOF. Other | ||
3403 | * writes need zeroing either because they can race with page | ||
3404 | * faults or because they use partial blocks. | ||
3405 | */ | ||
3406 | if (round_down(offset, 1<<inode->i_blkbits) >= inode->i_size && | ||
3407 | ext4_aligned_io(inode, offset, count)) | ||
3408 | get_block_func = ext4_dio_get_block; | ||
3409 | else | ||
3410 | get_block_func = ext4_dax_get_block; | ||
3411 | dio_flags = DIO_LOCKING; | ||
3412 | } else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) || | ||
3413 | round_down(offset, 1 << inode->i_blkbits) >= inode->i_size) { | ||
3414 | get_block_func = ext4_dio_get_block; | ||
3415 | dio_flags = DIO_LOCKING | DIO_SKIP_HOLES; | ||
3416 | } else if (is_sync_kiocb(iocb)) { | ||
3393 | get_block_func = ext4_dio_get_block_unwritten_sync; | 3417 | get_block_func = ext4_dio_get_block_unwritten_sync; |
3394 | dio_flags = DIO_LOCKING; | 3418 | dio_flags = DIO_LOCKING; |
3395 | } else { | 3419 | } else { |
@@ -3399,10 +3423,10 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter) | |||
3399 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | 3423 | #ifdef CONFIG_EXT4_FS_ENCRYPTION |
3400 | BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)); | 3424 | BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)); |
3401 | #endif | 3425 | #endif |
3402 | if (IS_DAX(inode)) | 3426 | if (IS_DAX(inode)) { |
3403 | ret = dax_do_io(iocb, inode, iter, get_block_func, | 3427 | ret = dax_do_io(iocb, inode, iter, get_block_func, |
3404 | ext4_end_io_dio, dio_flags); | 3428 | ext4_end_io_dio, dio_flags); |
3405 | else | 3429 | } else |
3406 | ret = __blockdev_direct_IO(iocb, inode, | 3430 | ret = __blockdev_direct_IO(iocb, inode, |
3407 | inode->i_sb->s_bdev, iter, | 3431 | inode->i_sb->s_bdev, iter, |
3408 | get_block_func, | 3432 | get_block_func, |
@@ -3422,12 +3446,86 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter) | |||
3422 | ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); | 3446 | ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); |
3423 | } | 3447 | } |
3424 | 3448 | ||
3425 | if (iov_iter_rw(iter) == WRITE) | 3449 | inode_dio_end(inode); |
3426 | inode_dio_end(inode); | ||
3427 | /* take i_mutex locking again if we do a ovewrite dio */ | 3450 | /* take i_mutex locking again if we do a ovewrite dio */ |
3428 | if (overwrite) | 3451 | if (overwrite) |
3429 | inode_lock(inode); | 3452 | inode_lock(inode); |
3430 | 3453 | ||
3454 | if (ret < 0 && final_size > inode->i_size) | ||
3455 | ext4_truncate_failed_write(inode); | ||
3456 | |||
3457 | /* Handle extending of i_size after direct IO write */ | ||
3458 | if (orphan) { | ||
3459 | int err; | ||
3460 | |||
3461 | /* Credits for sb + inode write */ | ||
3462 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); | ||
3463 | if (IS_ERR(handle)) { | ||
3464 | /* This is really bad luck. We've written the data | ||
3465 | * but cannot extend i_size. Bail out and pretend | ||
3466 | * the write failed... */ | ||
3467 | ret = PTR_ERR(handle); | ||
3468 | if (inode->i_nlink) | ||
3469 | ext4_orphan_del(NULL, inode); | ||
3470 | |||
3471 | goto out; | ||
3472 | } | ||
3473 | if (inode->i_nlink) | ||
3474 | ext4_orphan_del(handle, inode); | ||
3475 | if (ret > 0) { | ||
3476 | loff_t end = offset + ret; | ||
3477 | if (end > inode->i_size) { | ||
3478 | ei->i_disksize = end; | ||
3479 | i_size_write(inode, end); | ||
3480 | /* | ||
3481 | * We're going to return a positive `ret' | ||
3482 | * here due to non-zero-length I/O, so there's | ||
3483 | * no way of reporting error returns from | ||
3484 | * ext4_mark_inode_dirty() to userspace. So | ||
3485 | * ignore it. | ||
3486 | */ | ||
3487 | ext4_mark_inode_dirty(handle, inode); | ||
3488 | } | ||
3489 | } | ||
3490 | err = ext4_journal_stop(handle); | ||
3491 | if (ret == 0) | ||
3492 | ret = err; | ||
3493 | } | ||
3494 | out: | ||
3495 | return ret; | ||
3496 | } | ||
3497 | |||
3498 | static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter) | ||
3499 | { | ||
3500 | int unlocked = 0; | ||
3501 | struct inode *inode = iocb->ki_filp->f_mapping->host; | ||
3502 | ssize_t ret; | ||
3503 | |||
3504 | if (ext4_should_dioread_nolock(inode)) { | ||
3505 | /* | ||
3506 | * Nolock dioread optimization may be dynamically disabled | ||
3507 | * via ext4_inode_block_unlocked_dio(). Check inode's state | ||
3508 | * while holding extra i_dio_count ref. | ||
3509 | */ | ||
3510 | inode_dio_begin(inode); | ||
3511 | smp_mb(); | ||
3512 | if (unlikely(ext4_test_inode_state(inode, | ||
3513 | EXT4_STATE_DIOREAD_LOCK))) | ||
3514 | inode_dio_end(inode); | ||
3515 | else | ||
3516 | unlocked = 1; | ||
3517 | } | ||
3518 | if (IS_DAX(inode)) { | ||
3519 | ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block, | ||
3520 | NULL, unlocked ? 0 : DIO_LOCKING); | ||
3521 | } else { | ||
3522 | ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, | ||
3523 | iter, ext4_dio_get_block, | ||
3524 | NULL, NULL, | ||
3525 | unlocked ? 0 : DIO_LOCKING); | ||
3526 | } | ||
3527 | if (unlocked) | ||
3528 | inode_dio_end(inode); | ||
3431 | return ret; | 3529 | return ret; |
3432 | } | 3530 | } |
3433 | 3531 | ||
@@ -3455,10 +3553,10 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter) | |||
3455 | return 0; | 3553 | return 0; |
3456 | 3554 | ||
3457 | trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); | 3555 | trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); |
3458 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3556 | if (iov_iter_rw(iter) == READ) |
3459 | ret = ext4_ext_direct_IO(iocb, iter); | 3557 | ret = ext4_direct_IO_read(iocb, iter); |
3460 | else | 3558 | else |
3461 | ret = ext4_ind_direct_IO(iocb, iter); | 3559 | ret = ext4_direct_IO_write(iocb, iter); |
3462 | trace_ext4_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret); | 3560 | trace_ext4_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret); |
3463 | return ret; | 3561 | return ret; |
3464 | } | 3562 | } |
@@ -3534,10 +3632,7 @@ void ext4_set_aops(struct inode *inode) | |||
3534 | { | 3632 | { |
3535 | switch (ext4_inode_journal_mode(inode)) { | 3633 | switch (ext4_inode_journal_mode(inode)) { |
3536 | case EXT4_INODE_ORDERED_DATA_MODE: | 3634 | case EXT4_INODE_ORDERED_DATA_MODE: |
3537 | ext4_set_inode_state(inode, EXT4_STATE_ORDERED_MODE); | ||
3538 | break; | ||
3539 | case EXT4_INODE_WRITEBACK_DATA_MODE: | 3635 | case EXT4_INODE_WRITEBACK_DATA_MODE: |
3540 | ext4_clear_inode_state(inode, EXT4_STATE_ORDERED_MODE); | ||
3541 | break; | 3636 | break; |
3542 | case EXT4_INODE_JOURNAL_DATA_MODE: | 3637 | case EXT4_INODE_JOURNAL_DATA_MODE: |
3543 | inode->i_mapping->a_ops = &ext4_journalled_aops; | 3638 | inode->i_mapping->a_ops = &ext4_journalled_aops; |
@@ -3630,8 +3725,8 @@ static int __ext4_block_zero_page_range(handle_t *handle, | |||
3630 | } else { | 3725 | } else { |
3631 | err = 0; | 3726 | err = 0; |
3632 | mark_buffer_dirty(bh); | 3727 | mark_buffer_dirty(bh); |
3633 | if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE)) | 3728 | if (ext4_should_order_data(inode)) |
3634 | err = ext4_jbd2_file_inode(handle, inode); | 3729 | err = ext4_jbd2_inode_add_write(handle, inode); |
3635 | } | 3730 | } |
3636 | 3731 | ||
3637 | unlock: | 3732 | unlock: |
@@ -5429,6 +5524,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) | |||
5429 | journal_t *journal; | 5524 | journal_t *journal; |
5430 | handle_t *handle; | 5525 | handle_t *handle; |
5431 | int err; | 5526 | int err; |
5527 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
5432 | 5528 | ||
5433 | /* | 5529 | /* |
5434 | * We have to be very careful here: changing a data block's | 5530 | * We have to be very careful here: changing a data block's |
@@ -5445,22 +5541,30 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) | |||
5445 | return 0; | 5541 | return 0; |
5446 | if (is_journal_aborted(journal)) | 5542 | if (is_journal_aborted(journal)) |
5447 | return -EROFS; | 5543 | return -EROFS; |
5448 | /* We have to allocate physical blocks for delalloc blocks | ||
5449 | * before flushing journal. otherwise delalloc blocks can not | ||
5450 | * be allocated any more. even more truncate on delalloc blocks | ||
5451 | * could trigger BUG by flushing delalloc blocks in journal. | ||
5452 | * There is no delalloc block in non-journal data mode. | ||
5453 | */ | ||
5454 | if (val && test_opt(inode->i_sb, DELALLOC)) { | ||
5455 | err = ext4_alloc_da_blocks(inode); | ||
5456 | if (err < 0) | ||
5457 | return err; | ||
5458 | } | ||
5459 | 5544 | ||
5460 | /* Wait for all existing dio workers */ | 5545 | /* Wait for all existing dio workers */ |
5461 | ext4_inode_block_unlocked_dio(inode); | 5546 | ext4_inode_block_unlocked_dio(inode); |
5462 | inode_dio_wait(inode); | 5547 | inode_dio_wait(inode); |
5463 | 5548 | ||
5549 | /* | ||
5550 | * Before flushing the journal and switching inode's aops, we have | ||
5551 | * to flush all dirty data the inode has. There can be outstanding | ||
5552 | * delayed allocations, there can be unwritten extents created by | ||
5553 | * fallocate or buffered writes in dioread_nolock mode covered by | ||
5554 | * dirty data which can be converted only after flushing the dirty | ||
5555 | * data (and journalled aops don't know how to handle these cases). | ||
5556 | */ | ||
5557 | if (val) { | ||
5558 | down_write(&EXT4_I(inode)->i_mmap_sem); | ||
5559 | err = filemap_write_and_wait(inode->i_mapping); | ||
5560 | if (err < 0) { | ||
5561 | up_write(&EXT4_I(inode)->i_mmap_sem); | ||
5562 | ext4_inode_resume_unlocked_dio(inode); | ||
5563 | return err; | ||
5564 | } | ||
5565 | } | ||
5566 | |||
5567 | percpu_down_write(&sbi->s_journal_flag_rwsem); | ||
5464 | jbd2_journal_lock_updates(journal); | 5568 | jbd2_journal_lock_updates(journal); |
5465 | 5569 | ||
5466 | /* | 5570 | /* |
@@ -5477,6 +5581,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) | |||
5477 | err = jbd2_journal_flush(journal); | 5581 | err = jbd2_journal_flush(journal); |
5478 | if (err < 0) { | 5582 | if (err < 0) { |
5479 | jbd2_journal_unlock_updates(journal); | 5583 | jbd2_journal_unlock_updates(journal); |
5584 | percpu_up_write(&sbi->s_journal_flag_rwsem); | ||
5480 | ext4_inode_resume_unlocked_dio(inode); | 5585 | ext4_inode_resume_unlocked_dio(inode); |
5481 | return err; | 5586 | return err; |
5482 | } | 5587 | } |
@@ -5485,6 +5590,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) | |||
5485 | ext4_set_aops(inode); | 5590 | ext4_set_aops(inode); |
5486 | 5591 | ||
5487 | jbd2_journal_unlock_updates(journal); | 5592 | jbd2_journal_unlock_updates(journal); |
5593 | percpu_up_write(&sbi->s_journal_flag_rwsem); | ||
5594 | |||
5595 | if (val) | ||
5596 | up_write(&EXT4_I(inode)->i_mmap_sem); | ||
5488 | ext4_inode_resume_unlocked_dio(inode); | 5597 | ext4_inode_resume_unlocked_dio(inode); |
5489 | 5598 | ||
5490 | /* Finally we can mark the inode as dirty. */ | 5599 | /* Finally we can mark the inode as dirty. */ |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 7497f50cb293..28cc412852af 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -365,7 +365,7 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid) | |||
365 | struct dquot *transfer_to[MAXQUOTAS] = { }; | 365 | struct dquot *transfer_to[MAXQUOTAS] = { }; |
366 | 366 | ||
367 | transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid)); | 367 | transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid)); |
368 | if (transfer_to[PRJQUOTA]) { | 368 | if (!IS_ERR(transfer_to[PRJQUOTA])) { |
369 | err = __dquot_transfer(inode, transfer_to); | 369 | err = __dquot_transfer(inode, transfer_to); |
370 | dqput(transfer_to[PRJQUOTA]); | 370 | dqput(transfer_to[PRJQUOTA]); |
371 | if (err) | 371 | if (err) |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index eeeade76012e..c1ab3ec30423 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -1266,6 +1266,7 @@ static void ext4_mb_unload_buddy(struct ext4_buddy *e4b) | |||
1266 | static int mb_find_order_for_block(struct ext4_buddy *e4b, int block) | 1266 | static int mb_find_order_for_block(struct ext4_buddy *e4b, int block) |
1267 | { | 1267 | { |
1268 | int order = 1; | 1268 | int order = 1; |
1269 | int bb_incr = 1 << (e4b->bd_blkbits - 1); | ||
1269 | void *bb; | 1270 | void *bb; |
1270 | 1271 | ||
1271 | BUG_ON(e4b->bd_bitmap == e4b->bd_buddy); | 1272 | BUG_ON(e4b->bd_bitmap == e4b->bd_buddy); |
@@ -1278,7 +1279,8 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block) | |||
1278 | /* this block is part of buddy of order 'order' */ | 1279 | /* this block is part of buddy of order 'order' */ |
1279 | return order; | 1280 | return order; |
1280 | } | 1281 | } |
1281 | bb += 1 << (e4b->bd_blkbits - order); | 1282 | bb += bb_incr; |
1283 | bb_incr >>= 1; | ||
1282 | order++; | 1284 | order++; |
1283 | } | 1285 | } |
1284 | return 0; | 1286 | return 0; |
@@ -2583,7 +2585,7 @@ int ext4_mb_init(struct super_block *sb) | |||
2583 | { | 2585 | { |
2584 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2586 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2585 | unsigned i, j; | 2587 | unsigned i, j; |
2586 | unsigned offset; | 2588 | unsigned offset, offset_incr; |
2587 | unsigned max; | 2589 | unsigned max; |
2588 | int ret; | 2590 | int ret; |
2589 | 2591 | ||
@@ -2612,11 +2614,13 @@ int ext4_mb_init(struct super_block *sb) | |||
2612 | 2614 | ||
2613 | i = 1; | 2615 | i = 1; |
2614 | offset = 0; | 2616 | offset = 0; |
2617 | offset_incr = 1 << (sb->s_blocksize_bits - 1); | ||
2615 | max = sb->s_blocksize << 2; | 2618 | max = sb->s_blocksize << 2; |
2616 | do { | 2619 | do { |
2617 | sbi->s_mb_offsets[i] = offset; | 2620 | sbi->s_mb_offsets[i] = offset; |
2618 | sbi->s_mb_maxs[i] = max; | 2621 | sbi->s_mb_maxs[i] = max; |
2619 | offset += 1 << (sb->s_blocksize_bits - i); | 2622 | offset += offset_incr; |
2623 | offset_incr = offset_incr >> 1; | ||
2620 | max = max >> 1; | 2624 | max = max >> 1; |
2621 | i++; | 2625 | i++; |
2622 | } while (i <= sb->s_blocksize_bits + 1); | 2626 | } while (i <= sb->s_blocksize_bits + 1); |
@@ -4935,7 +4939,7 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, | |||
4935 | * boundary. | 4939 | * boundary. |
4936 | */ | 4940 | */ |
4937 | if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { | 4941 | if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { |
4938 | ext4_warning(sb, "too much blocks added to group %u\n", | 4942 | ext4_warning(sb, "too much blocks added to group %u", |
4939 | block_group); | 4943 | block_group); |
4940 | err = -EINVAL; | 4944 | err = -EINVAL; |
4941 | goto error_return; | 4945 | goto error_return; |
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 24445275d330..23d436d6f8b8 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c | |||
@@ -121,7 +121,7 @@ void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, | |||
121 | __ext4_warning(sb, function, line, "%s", msg); | 121 | __ext4_warning(sb, function, line, "%s", msg); |
122 | __ext4_warning(sb, function, line, | 122 | __ext4_warning(sb, function, line, |
123 | "MMP failure info: last update time: %llu, last update " | 123 | "MMP failure info: last update time: %llu, last update " |
124 | "node: %s, last update device: %s\n", | 124 | "node: %s, last update device: %s", |
125 | (long long unsigned int) le64_to_cpu(mmp->mmp_time), | 125 | (long long unsigned int) le64_to_cpu(mmp->mmp_time), |
126 | mmp->mmp_nodename, mmp->mmp_bdevname); | 126 | mmp->mmp_nodename, mmp->mmp_bdevname); |
127 | } | 127 | } |
@@ -353,7 +353,7 @@ skip: | |||
353 | * wait for MMP interval and check mmp_seq. | 353 | * wait for MMP interval and check mmp_seq. |
354 | */ | 354 | */ |
355 | if (schedule_timeout_interruptible(HZ * wait_time) != 0) { | 355 | if (schedule_timeout_interruptible(HZ * wait_time) != 0) { |
356 | ext4_warning(sb, "MMP startup interrupted, failing mount\n"); | 356 | ext4_warning(sb, "MMP startup interrupted, failing mount"); |
357 | goto failed; | 357 | goto failed; |
358 | } | 358 | } |
359 | 359 | ||
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 325cef48b39a..a920c5d29fac 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -400,7 +400,7 @@ data_copy: | |||
400 | 400 | ||
401 | /* Even in case of data=writeback it is reasonable to pin | 401 | /* Even in case of data=writeback it is reasonable to pin |
402 | * inode to transaction, to prevent unexpected data loss */ | 402 | * inode to transaction, to prevent unexpected data loss */ |
403 | *err = ext4_jbd2_file_inode(handle, orig_inode); | 403 | *err = ext4_jbd2_inode_add_write(handle, orig_inode); |
404 | 404 | ||
405 | unlock_pages: | 405 | unlock_pages: |
406 | unlock_page(pagep[0]); | 406 | unlock_page(pagep[0]); |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 5611ec9348d7..ec4c39952e84 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -1107,6 +1107,11 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, | |||
1107 | } | 1107 | } |
1108 | 1108 | ||
1109 | while (1) { | 1109 | while (1) { |
1110 | if (fatal_signal_pending(current)) { | ||
1111 | err = -ERESTARTSYS; | ||
1112 | goto errout; | ||
1113 | } | ||
1114 | cond_resched(); | ||
1110 | block = dx_get_block(frame->at); | 1115 | block = dx_get_block(frame->at); |
1111 | ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo, | 1116 | ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo, |
1112 | start_hash, start_minor_hash); | 1117 | start_hash, start_minor_hash); |
@@ -1613,7 +1618,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi | |||
1613 | if (nokey) | 1618 | if (nokey) |
1614 | return ERR_PTR(-ENOKEY); | 1619 | return ERR_PTR(-ENOKEY); |
1615 | ext4_warning(inode->i_sb, | 1620 | ext4_warning(inode->i_sb, |
1616 | "Inconsistent encryption contexts: %lu/%lu\n", | 1621 | "Inconsistent encryption contexts: %lu/%lu", |
1617 | (unsigned long) dir->i_ino, | 1622 | (unsigned long) dir->i_ino, |
1618 | (unsigned long) inode->i_ino); | 1623 | (unsigned long) inode->i_ino); |
1619 | return ERR_PTR(-EPERM); | 1624 | return ERR_PTR(-EPERM); |
@@ -2828,7 +2833,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) | |||
2828 | * list entries can cause panics at unmount time. | 2833 | * list entries can cause panics at unmount time. |
2829 | */ | 2834 | */ |
2830 | mutex_lock(&sbi->s_orphan_lock); | 2835 | mutex_lock(&sbi->s_orphan_lock); |
2831 | list_del(&EXT4_I(inode)->i_orphan); | 2836 | list_del_init(&EXT4_I(inode)->i_orphan); |
2832 | mutex_unlock(&sbi->s_orphan_lock); | 2837 | mutex_unlock(&sbi->s_orphan_lock); |
2833 | } | 2838 | } |
2834 | } | 2839 | } |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index e4fc8ea45d78..2a01df9cc1c3 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -342,9 +342,7 @@ void ext4_io_submit(struct ext4_io_submit *io) | |||
342 | if (bio) { | 342 | if (bio) { |
343 | int io_op = io->io_wbc->sync_mode == WB_SYNC_ALL ? | 343 | int io_op = io->io_wbc->sync_mode == WB_SYNC_ALL ? |
344 | WRITE_SYNC : WRITE; | 344 | WRITE_SYNC : WRITE; |
345 | bio_get(io->io_bio); | ||
346 | submit_bio(io_op, io->io_bio); | 345 | submit_bio(io_op, io->io_bio); |
347 | bio_put(io->io_bio); | ||
348 | } | 346 | } |
349 | io->io_bio = NULL; | 347 | io->io_bio = NULL; |
350 | } | 348 | } |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 34038e3598d5..cf681004b196 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -41,7 +41,7 @@ int ext4_resize_begin(struct super_block *sb) | |||
41 | */ | 41 | */ |
42 | if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { | 42 | if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { |
43 | ext4_warning(sb, "There are errors in the filesystem, " | 43 | ext4_warning(sb, "There are errors in the filesystem, " |
44 | "so online resizing is not allowed\n"); | 44 | "so online resizing is not allowed"); |
45 | return -EPERM; | 45 | return -EPERM; |
46 | } | 46 | } |
47 | 47 | ||
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 304c712dbe12..20c5d52253b4 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -859,6 +859,7 @@ static void ext4_put_super(struct super_block *sb) | |||
859 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 859 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
860 | percpu_counter_destroy(&sbi->s_dirs_counter); | 860 | percpu_counter_destroy(&sbi->s_dirs_counter); |
861 | percpu_counter_destroy(&sbi->s_dirtyclusters_counter); | 861 | percpu_counter_destroy(&sbi->s_dirtyclusters_counter); |
862 | percpu_free_rwsem(&sbi->s_journal_flag_rwsem); | ||
862 | brelse(sbi->s_sbh); | 863 | brelse(sbi->s_sbh); |
863 | #ifdef CONFIG_QUOTA | 864 | #ifdef CONFIG_QUOTA |
864 | for (i = 0; i < EXT4_MAXQUOTAS; i++) | 865 | for (i = 0; i < EXT4_MAXQUOTAS; i++) |
@@ -3930,6 +3931,9 @@ no_journal: | |||
3930 | if (!err) | 3931 | if (!err) |
3931 | err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, | 3932 | err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, |
3932 | GFP_KERNEL); | 3933 | GFP_KERNEL); |
3934 | if (!err) | ||
3935 | err = percpu_init_rwsem(&sbi->s_journal_flag_rwsem); | ||
3936 | |||
3933 | if (err) { | 3937 | if (err) { |
3934 | ext4_msg(sb, KERN_ERR, "insufficient memory"); | 3938 | ext4_msg(sb, KERN_ERR, "insufficient memory"); |
3935 | goto failed_mount6; | 3939 | goto failed_mount6; |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 2ad98d6e19f4..70078096117d 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -219,6 +219,8 @@ static int journal_submit_data_buffers(journal_t *journal, | |||
219 | 219 | ||
220 | spin_lock(&journal->j_list_lock); | 220 | spin_lock(&journal->j_list_lock); |
221 | list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { | 221 | list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { |
222 | if (!(jinode->i_flags & JI_WRITE_DATA)) | ||
223 | continue; | ||
222 | mapping = jinode->i_vfs_inode->i_mapping; | 224 | mapping = jinode->i_vfs_inode->i_mapping; |
223 | jinode->i_flags |= JI_COMMIT_RUNNING; | 225 | jinode->i_flags |= JI_COMMIT_RUNNING; |
224 | spin_unlock(&journal->j_list_lock); | 226 | spin_unlock(&journal->j_list_lock); |
@@ -256,6 +258,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal, | |||
256 | /* For locking, see the comment in journal_submit_data_buffers() */ | 258 | /* For locking, see the comment in journal_submit_data_buffers() */ |
257 | spin_lock(&journal->j_list_lock); | 259 | spin_lock(&journal->j_list_lock); |
258 | list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { | 260 | list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { |
261 | if (!(jinode->i_flags & JI_WAIT_DATA)) | ||
262 | continue; | ||
259 | jinode->i_flags |= JI_COMMIT_RUNNING; | 263 | jinode->i_flags |= JI_COMMIT_RUNNING; |
260 | spin_unlock(&journal->j_list_lock); | 264 | spin_unlock(&journal->j_list_lock); |
261 | err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping); | 265 | err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping); |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 435f0b26ac20..b31852f76f46 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -94,7 +94,8 @@ EXPORT_SYMBOL(jbd2_journal_blocks_per_page); | |||
94 | EXPORT_SYMBOL(jbd2_journal_invalidatepage); | 94 | EXPORT_SYMBOL(jbd2_journal_invalidatepage); |
95 | EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers); | 95 | EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers); |
96 | EXPORT_SYMBOL(jbd2_journal_force_commit); | 96 | EXPORT_SYMBOL(jbd2_journal_force_commit); |
97 | EXPORT_SYMBOL(jbd2_journal_file_inode); | 97 | EXPORT_SYMBOL(jbd2_journal_inode_add_write); |
98 | EXPORT_SYMBOL(jbd2_journal_inode_add_wait); | ||
98 | EXPORT_SYMBOL(jbd2_journal_init_jbd_inode); | 99 | EXPORT_SYMBOL(jbd2_journal_init_jbd_inode); |
99 | EXPORT_SYMBOL(jbd2_journal_release_jbd_inode); | 100 | EXPORT_SYMBOL(jbd2_journal_release_jbd_inode); |
100 | EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); | 101 | EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 2c56c3e32194..1749519b362f 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -2462,7 +2462,8 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) | |||
2462 | /* | 2462 | /* |
2463 | * File inode in the inode list of the handle's transaction | 2463 | * File inode in the inode list of the handle's transaction |
2464 | */ | 2464 | */ |
2465 | int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode) | 2465 | static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode, |
2466 | unsigned long flags) | ||
2466 | { | 2467 | { |
2467 | transaction_t *transaction = handle->h_transaction; | 2468 | transaction_t *transaction = handle->h_transaction; |
2468 | journal_t *journal; | 2469 | journal_t *journal; |
@@ -2487,12 +2488,14 @@ int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode) | |||
2487 | * and if jinode->i_next_transaction == transaction, commit code | 2488 | * and if jinode->i_next_transaction == transaction, commit code |
2488 | * will only file the inode where we want it. | 2489 | * will only file the inode where we want it. |
2489 | */ | 2490 | */ |
2490 | if (jinode->i_transaction == transaction || | 2491 | if ((jinode->i_transaction == transaction || |
2491 | jinode->i_next_transaction == transaction) | 2492 | jinode->i_next_transaction == transaction) && |
2493 | (jinode->i_flags & flags) == flags) | ||
2492 | return 0; | 2494 | return 0; |
2493 | 2495 | ||
2494 | spin_lock(&journal->j_list_lock); | 2496 | spin_lock(&journal->j_list_lock); |
2495 | 2497 | jinode->i_flags |= flags; | |
2498 | /* Is inode already attached where we need it? */ | ||
2496 | if (jinode->i_transaction == transaction || | 2499 | if (jinode->i_transaction == transaction || |
2497 | jinode->i_next_transaction == transaction) | 2500 | jinode->i_next_transaction == transaction) |
2498 | goto done; | 2501 | goto done; |
@@ -2523,6 +2526,17 @@ done: | |||
2523 | return 0; | 2526 | return 0; |
2524 | } | 2527 | } |
2525 | 2528 | ||
2529 | int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *jinode) | ||
2530 | { | ||
2531 | return jbd2_journal_file_inode(handle, jinode, | ||
2532 | JI_WRITE_DATA | JI_WAIT_DATA); | ||
2533 | } | ||
2534 | |||
2535 | int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *jinode) | ||
2536 | { | ||
2537 | return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA); | ||
2538 | } | ||
2539 | |||
2526 | /* | 2540 | /* |
2527 | * File truncate and transaction commit interact with each other in a | 2541 | * File truncate and transaction commit interact with each other in a |
2528 | * non-trivial way. If a transaction writing data block A is | 2542 | * non-trivial way. If a transaction writing data block A is |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index f4cd3c3e9fb7..497a4171ef61 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
@@ -619,7 +619,7 @@ static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb, | |||
619 | 619 | ||
620 | static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode) | 620 | static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode) |
621 | { | 621 | { |
622 | return jbd2_journal_file_inode(handle, &OCFS2_I(inode)->ip_jinode); | 622 | return jbd2_journal_inode_add_write(handle, &OCFS2_I(inode)->ip_jinode); |
623 | } | 623 | } |
624 | 624 | ||
625 | static inline int ocfs2_begin_ordered_truncate(struct inode *inode, | 625 | static inline int ocfs2_begin_ordered_truncate(struct inode *inode, |
diff --git a/fs/readdir.c b/fs/readdir.c index a86c6c04b9bc..68ef06efe6bc 100644 --- a/fs/readdir.c +++ b/fs/readdir.c | |||
@@ -182,6 +182,8 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen, | |||
182 | } | 182 | } |
183 | dirent = buf->previous; | 183 | dirent = buf->previous; |
184 | if (dirent) { | 184 | if (dirent) { |
185 | if (signal_pending(current)) | ||
186 | return -EINTR; | ||
185 | if (__put_user(offset, &dirent->d_off)) | 187 | if (__put_user(offset, &dirent->d_off)) |
186 | goto efault; | 188 | goto efault; |
187 | } | 189 | } |
@@ -261,6 +263,8 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen, | |||
261 | return -EINVAL; | 263 | return -EINVAL; |
262 | dirent = buf->previous; | 264 | dirent = buf->previous; |
263 | if (dirent) { | 265 | if (dirent) { |
266 | if (signal_pending(current)) | ||
267 | return -EINTR; | ||
264 | if (__put_user(offset, &dirent->d_off)) | 268 | if (__put_user(offset, &dirent->d_off)) |
265 | goto efault; | 269 | goto efault; |
266 | } | 270 | } |
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index fd1083c46c61..efb232c5f668 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h | |||
@@ -403,11 +403,19 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) | |||
403 | 403 | ||
404 | /* Flags in jbd_inode->i_flags */ | 404 | /* Flags in jbd_inode->i_flags */ |
405 | #define __JI_COMMIT_RUNNING 0 | 405 | #define __JI_COMMIT_RUNNING 0 |
406 | /* Commit of the inode data in progress. We use this flag to protect us from | 406 | #define __JI_WRITE_DATA 1 |
407 | #define __JI_WAIT_DATA 2 | ||
408 | |||
409 | /* | ||
410 | * Commit of the inode data in progress. We use this flag to protect us from | ||
407 | * concurrent deletion of inode. We cannot use reference to inode for this | 411 | * concurrent deletion of inode. We cannot use reference to inode for this |
408 | * since we cannot afford doing last iput() on behalf of kjournald | 412 | * since we cannot afford doing last iput() on behalf of kjournald |
409 | */ | 413 | */ |
410 | #define JI_COMMIT_RUNNING (1 << __JI_COMMIT_RUNNING) | 414 | #define JI_COMMIT_RUNNING (1 << __JI_COMMIT_RUNNING) |
415 | /* Write allocated dirty buffers in this inode before commit */ | ||
416 | #define JI_WRITE_DATA (1 << __JI_WRITE_DATA) | ||
417 | /* Wait for outstanding data writes for this inode before commit */ | ||
418 | #define JI_WAIT_DATA (1 << __JI_WAIT_DATA) | ||
411 | 419 | ||
412 | /** | 420 | /** |
413 | * struct jbd_inode is the structure linking inodes in ordered mode | 421 | * struct jbd_inode is the structure linking inodes in ordered mode |
@@ -781,9 +789,6 @@ jbd2_time_diff(unsigned long start, unsigned long end) | |||
781 | * @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the | 789 | * @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the |
782 | * number that will fit in j_blocksize | 790 | * number that will fit in j_blocksize |
783 | * @j_last_sync_writer: most recent pid which did a synchronous write | 791 | * @j_last_sync_writer: most recent pid which did a synchronous write |
784 | * @j_history: Buffer storing the transactions statistics history | ||
785 | * @j_history_max: Maximum number of transactions in the statistics history | ||
786 | * @j_history_cur: Current number of transactions in the statistics history | ||
787 | * @j_history_lock: Protect the transactions statistics history | 792 | * @j_history_lock: Protect the transactions statistics history |
788 | * @j_proc_entry: procfs entry for the jbd statistics directory | 793 | * @j_proc_entry: procfs entry for the jbd statistics directory |
789 | * @j_stats: Overall statistics | 794 | * @j_stats: Overall statistics |
@@ -1270,7 +1275,8 @@ extern int jbd2_journal_clear_err (journal_t *); | |||
1270 | extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *); | 1275 | extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *); |
1271 | extern int jbd2_journal_force_commit(journal_t *); | 1276 | extern int jbd2_journal_force_commit(journal_t *); |
1272 | extern int jbd2_journal_force_commit_nested(journal_t *); | 1277 | extern int jbd2_journal_force_commit_nested(journal_t *); |
1273 | extern int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode); | 1278 | extern int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *inode); |
1279 | extern int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *inode); | ||
1274 | extern int jbd2_journal_begin_ordered_truncate(journal_t *journal, | 1280 | extern int jbd2_journal_begin_ordered_truncate(journal_t *journal, |
1275 | struct jbd2_inode *inode, loff_t new_size); | 1281 | struct jbd2_inode *inode, loff_t new_size); |
1276 | extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode); | 1282 | extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode); |
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index f231e0bb311c..bec0b647f9cc 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c | |||
@@ -37,6 +37,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw) | |||
37 | free_percpu(brw->fast_read_ctr); | 37 | free_percpu(brw->fast_read_ctr); |
38 | brw->fast_read_ctr = NULL; /* catch use after free bugs */ | 38 | brw->fast_read_ctr = NULL; /* catch use after free bugs */ |
39 | } | 39 | } |
40 | EXPORT_SYMBOL_GPL(percpu_free_rwsem); | ||
40 | 41 | ||
41 | /* | 42 | /* |
42 | * This is the fast-path for down_read/up_read. If it succeeds we rely | 43 | * This is the fast-path for down_read/up_read. If it succeeds we rely |