diff options
64 files changed, 1287 insertions, 910 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 545633d6663d..ca6f5f7a4752 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -8364,6 +8364,7 @@ F: drivers/media/dvb-frontends/mn88473* | |||
8364 | MODULE SUPPORT | 8364 | MODULE SUPPORT |
8365 | M: Jessica Yu <jeyu@redhat.com> | 8365 | M: Jessica Yu <jeyu@redhat.com> |
8366 | M: Rusty Russell <rusty@rustcorp.com.au> | 8366 | M: Rusty Russell <rusty@rustcorp.com.au> |
8367 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/jeyu/linux.git modules-next | ||
8367 | S: Maintained | 8368 | S: Maintained |
8368 | F: include/linux/module.h | 8369 | F: include/linux/module.h |
8369 | F: kernel/module.c | 8370 | F: kernel/module.c |
@@ -9996,6 +9997,14 @@ S: Supported | |||
9996 | F: Documentation/preempt-locking.txt | 9997 | F: Documentation/preempt-locking.txt |
9997 | F: include/linux/preempt.h | 9998 | F: include/linux/preempt.h |
9998 | 9999 | ||
10000 | PRINTK | ||
10001 | M: Petr Mladek <pmladek@suse.com> | ||
10002 | M: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> | ||
10003 | R: Steven Rostedt <rostedt@goodmis.org> | ||
10004 | S: Maintained | ||
10005 | F: kernel/printk/ | ||
10006 | F: include/linux/printk.h | ||
10007 | |||
9999 | PRISM54 WIRELESS DRIVER | 10008 | PRISM54 WIRELESS DRIVER |
10000 | M: "Luis R. Rodriguez" <mcgrof@gmail.com> | 10009 | M: "Luis R. Rodriguez" <mcgrof@gmail.com> |
10001 | L: linux-wireless@vger.kernel.org | 10010 | L: linux-wireless@vger.kernel.org |
@@ -1079,7 +1079,7 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, | |||
1079 | */ | 1079 | */ |
1080 | ssize_t | 1080 | ssize_t |
1081 | dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, | 1081 | dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, |
1082 | struct iomap_ops *ops) | 1082 | const struct iomap_ops *ops) |
1083 | { | 1083 | { |
1084 | struct address_space *mapping = iocb->ki_filp->f_mapping; | 1084 | struct address_space *mapping = iocb->ki_filp->f_mapping; |
1085 | struct inode *inode = mapping->host; | 1085 | struct inode *inode = mapping->host; |
@@ -1127,7 +1127,7 @@ static int dax_fault_return(int error) | |||
1127 | * necessary locking for the page fault to proceed successfully. | 1127 | * necessary locking for the page fault to proceed successfully. |
1128 | */ | 1128 | */ |
1129 | int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, | 1129 | int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, |
1130 | struct iomap_ops *ops) | 1130 | const struct iomap_ops *ops) |
1131 | { | 1131 | { |
1132 | struct address_space *mapping = vma->vm_file->f_mapping; | 1132 | struct address_space *mapping = vma->vm_file->f_mapping; |
1133 | struct inode *inode = mapping->host; | 1133 | struct inode *inode = mapping->host; |
@@ -1326,7 +1326,7 @@ static int dax_pmd_load_hole(struct vm_area_struct *vma, pmd_t *pmd, | |||
1326 | } | 1326 | } |
1327 | 1327 | ||
1328 | int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, | 1328 | int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, |
1329 | pmd_t *pmd, unsigned int flags, struct iomap_ops *ops) | 1329 | pmd_t *pmd, unsigned int flags, const struct iomap_ops *ops) |
1330 | { | 1330 | { |
1331 | struct address_space *mapping = vma->vm_file->f_mapping; | 1331 | struct address_space *mapping = vma->vm_file->f_mapping; |
1332 | unsigned long pmd_addr = address & PMD_MASK; | 1332 | unsigned long pmd_addr = address & PMD_MASK; |
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 37e2be784ac7..5e64de9c5093 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h | |||
@@ -814,7 +814,7 @@ extern const struct file_operations ext2_file_operations; | |||
814 | /* inode.c */ | 814 | /* inode.c */ |
815 | extern const struct address_space_operations ext2_aops; | 815 | extern const struct address_space_operations ext2_aops; |
816 | extern const struct address_space_operations ext2_nobh_aops; | 816 | extern const struct address_space_operations ext2_nobh_aops; |
817 | extern struct iomap_ops ext2_iomap_ops; | 817 | extern const struct iomap_ops ext2_iomap_ops; |
818 | 818 | ||
819 | /* namei.c */ | 819 | /* namei.c */ |
820 | extern const struct inode_operations ext2_dir_inode_operations; | 820 | extern const struct inode_operations ext2_dir_inode_operations; |
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index f073bfca694b..128cce540645 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -842,13 +842,13 @@ ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length, | |||
842 | return 0; | 842 | return 0; |
843 | } | 843 | } |
844 | 844 | ||
845 | struct iomap_ops ext2_iomap_ops = { | 845 | const struct iomap_ops ext2_iomap_ops = { |
846 | .iomap_begin = ext2_iomap_begin, | 846 | .iomap_begin = ext2_iomap_begin, |
847 | .iomap_end = ext2_iomap_end, | 847 | .iomap_end = ext2_iomap_end, |
848 | }; | 848 | }; |
849 | #else | 849 | #else |
850 | /* Define empty ops for !CONFIG_FS_DAX case to avoid ugly ifdefs */ | 850 | /* Define empty ops for !CONFIG_FS_DAX case to avoid ugly ifdefs */ |
851 | struct iomap_ops ext2_iomap_ops; | 851 | const struct iomap_ops ext2_iomap_ops; |
852 | #endif /* CONFIG_FS_DAX */ | 852 | #endif /* CONFIG_FS_DAX */ |
853 | 853 | ||
854 | int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 854 | int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 01d52b98f9a7..cee23b684f47 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -3244,7 +3244,7 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end) | |||
3244 | } | 3244 | } |
3245 | } | 3245 | } |
3246 | 3246 | ||
3247 | extern struct iomap_ops ext4_iomap_ops; | 3247 | extern const struct iomap_ops ext4_iomap_ops; |
3248 | 3248 | ||
3249 | #endif /* __KERNEL__ */ | 3249 | #endif /* __KERNEL__ */ |
3250 | 3250 | ||
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index f622d4a577e3..75212a6e69f8 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -3450,7 +3450,7 @@ orphan_del: | |||
3450 | return ret; | 3450 | return ret; |
3451 | } | 3451 | } |
3452 | 3452 | ||
3453 | struct iomap_ops ext4_iomap_ops = { | 3453 | const struct iomap_ops ext4_iomap_ops = { |
3454 | .iomap_begin = ext4_iomap_begin, | 3454 | .iomap_begin = ext4_iomap_begin, |
3455 | .iomap_end = ext4_iomap_end, | 3455 | .iomap_end = ext4_iomap_end, |
3456 | }; | 3456 | }; |
diff --git a/fs/internal.h b/fs/internal.h index b63cf3af2dc2..11c6d89dce9c 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -182,7 +182,7 @@ typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len, | |||
182 | void *data, struct iomap *iomap); | 182 | void *data, struct iomap *iomap); |
183 | 183 | ||
184 | loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length, | 184 | loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length, |
185 | unsigned flags, struct iomap_ops *ops, void *data, | 185 | unsigned flags, const struct iomap_ops *ops, void *data, |
186 | iomap_actor_t actor); | 186 | iomap_actor_t actor); |
187 | 187 | ||
188 | /* direct-io.c: */ | 188 | /* direct-io.c: */ |
diff --git a/fs/iomap.c b/fs/iomap.c index a51cb4c07d4d..d89f70bbb952 100644 --- a/fs/iomap.c +++ b/fs/iomap.c | |||
@@ -41,7 +41,7 @@ | |||
41 | */ | 41 | */ |
42 | loff_t | 42 | loff_t |
43 | iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags, | 43 | iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags, |
44 | struct iomap_ops *ops, void *data, iomap_actor_t actor) | 44 | const struct iomap_ops *ops, void *data, iomap_actor_t actor) |
45 | { | 45 | { |
46 | struct iomap iomap = { 0 }; | 46 | struct iomap iomap = { 0 }; |
47 | loff_t written = 0, ret; | 47 | loff_t written = 0, ret; |
@@ -235,7 +235,7 @@ again: | |||
235 | 235 | ||
236 | ssize_t | 236 | ssize_t |
237 | iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *iter, | 237 | iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *iter, |
238 | struct iomap_ops *ops) | 238 | const struct iomap_ops *ops) |
239 | { | 239 | { |
240 | struct inode *inode = iocb->ki_filp->f_mapping->host; | 240 | struct inode *inode = iocb->ki_filp->f_mapping->host; |
241 | loff_t pos = iocb->ki_pos, ret = 0, written = 0; | 241 | loff_t pos = iocb->ki_pos, ret = 0, written = 0; |
@@ -318,7 +318,7 @@ iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data, | |||
318 | 318 | ||
319 | int | 319 | int |
320 | iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len, | 320 | iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len, |
321 | struct iomap_ops *ops) | 321 | const struct iomap_ops *ops) |
322 | { | 322 | { |
323 | loff_t ret; | 323 | loff_t ret; |
324 | 324 | ||
@@ -398,7 +398,7 @@ iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count, | |||
398 | 398 | ||
399 | int | 399 | int |
400 | iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, | 400 | iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, |
401 | struct iomap_ops *ops) | 401 | const struct iomap_ops *ops) |
402 | { | 402 | { |
403 | loff_t ret; | 403 | loff_t ret; |
404 | 404 | ||
@@ -418,7 +418,7 @@ EXPORT_SYMBOL_GPL(iomap_zero_range); | |||
418 | 418 | ||
419 | int | 419 | int |
420 | iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, | 420 | iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, |
421 | struct iomap_ops *ops) | 421 | const struct iomap_ops *ops) |
422 | { | 422 | { |
423 | unsigned blocksize = (1 << inode->i_blkbits); | 423 | unsigned blocksize = (1 << inode->i_blkbits); |
424 | unsigned off = pos & (blocksize - 1); | 424 | unsigned off = pos & (blocksize - 1); |
@@ -446,7 +446,7 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length, | |||
446 | } | 446 | } |
447 | 447 | ||
448 | int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, | 448 | int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, |
449 | struct iomap_ops *ops) | 449 | const struct iomap_ops *ops) |
450 | { | 450 | { |
451 | struct page *page = vmf->page; | 451 | struct page *page = vmf->page; |
452 | struct inode *inode = file_inode(vma->vm_file); | 452 | struct inode *inode = file_inode(vma->vm_file); |
@@ -545,7 +545,7 @@ iomap_fiemap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, | |||
545 | } | 545 | } |
546 | 546 | ||
547 | int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi, | 547 | int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi, |
548 | loff_t start, loff_t len, struct iomap_ops *ops) | 548 | loff_t start, loff_t len, const struct iomap_ops *ops) |
549 | { | 549 | { |
550 | struct fiemap_ctx ctx; | 550 | struct fiemap_ctx ctx; |
551 | loff_t ret; | 551 | loff_t ret; |
@@ -839,8 +839,8 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, | |||
839 | } | 839 | } |
840 | 840 | ||
841 | ssize_t | 841 | ssize_t |
842 | iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, struct iomap_ops *ops, | 842 | iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, |
843 | iomap_dio_end_io_t end_io) | 843 | const struct iomap_ops *ops, iomap_dio_end_io_t end_io) |
844 | { | 844 | { |
845 | struct address_space *mapping = iocb->ki_filp->f_mapping; | 845 | struct address_space *mapping = iocb->ki_filp->f_mapping; |
846 | struct inode *inode = file_inode(iocb->ki_filp); | 846 | struct inode *inode = file_inode(iocb->ki_filp); |
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 9f06a211e157..369adcc18c02 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c | |||
@@ -221,20 +221,22 @@ xfs_alloc_get_rec( | |||
221 | * Compute aligned version of the found extent. | 221 | * Compute aligned version of the found extent. |
222 | * Takes alignment and min length into account. | 222 | * Takes alignment and min length into account. |
223 | */ | 223 | */ |
224 | STATIC void | 224 | STATIC bool |
225 | xfs_alloc_compute_aligned( | 225 | xfs_alloc_compute_aligned( |
226 | xfs_alloc_arg_t *args, /* allocation argument structure */ | 226 | xfs_alloc_arg_t *args, /* allocation argument structure */ |
227 | xfs_agblock_t foundbno, /* starting block in found extent */ | 227 | xfs_agblock_t foundbno, /* starting block in found extent */ |
228 | xfs_extlen_t foundlen, /* length in found extent */ | 228 | xfs_extlen_t foundlen, /* length in found extent */ |
229 | xfs_agblock_t *resbno, /* result block number */ | 229 | xfs_agblock_t *resbno, /* result block number */ |
230 | xfs_extlen_t *reslen) /* result length */ | 230 | xfs_extlen_t *reslen, /* result length */ |
231 | unsigned *busy_gen) | ||
231 | { | 232 | { |
232 | xfs_agblock_t bno; | 233 | xfs_agblock_t bno = foundbno; |
233 | xfs_extlen_t len; | 234 | xfs_extlen_t len = foundlen; |
234 | xfs_extlen_t diff; | 235 | xfs_extlen_t diff; |
236 | bool busy; | ||
235 | 237 | ||
236 | /* Trim busy sections out of found extent */ | 238 | /* Trim busy sections out of found extent */ |
237 | xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len); | 239 | busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen); |
238 | 240 | ||
239 | /* | 241 | /* |
240 | * If we have a largish extent that happens to start before min_agbno, | 242 | * If we have a largish extent that happens to start before min_agbno, |
@@ -259,6 +261,8 @@ xfs_alloc_compute_aligned( | |||
259 | *resbno = bno; | 261 | *resbno = bno; |
260 | *reslen = len; | 262 | *reslen = len; |
261 | } | 263 | } |
264 | |||
265 | return busy; | ||
262 | } | 266 | } |
263 | 267 | ||
264 | /* | 268 | /* |
@@ -737,10 +741,11 @@ xfs_alloc_ag_vextent_exact( | |||
737 | int error; | 741 | int error; |
738 | xfs_agblock_t fbno; /* start block of found extent */ | 742 | xfs_agblock_t fbno; /* start block of found extent */ |
739 | xfs_extlen_t flen; /* length of found extent */ | 743 | xfs_extlen_t flen; /* length of found extent */ |
740 | xfs_agblock_t tbno; /* start block of trimmed extent */ | 744 | xfs_agblock_t tbno; /* start block of busy extent */ |
741 | xfs_extlen_t tlen; /* length of trimmed extent */ | 745 | xfs_extlen_t tlen; /* length of busy extent */ |
742 | xfs_agblock_t tend; /* end block of trimmed extent */ | 746 | xfs_agblock_t tend; /* end block of busy extent */ |
743 | int i; /* success/failure of operation */ | 747 | int i; /* success/failure of operation */ |
748 | unsigned busy_gen; | ||
744 | 749 | ||
745 | ASSERT(args->alignment == 1); | 750 | ASSERT(args->alignment == 1); |
746 | 751 | ||
@@ -773,7 +778,9 @@ xfs_alloc_ag_vextent_exact( | |||
773 | /* | 778 | /* |
774 | * Check for overlapping busy extents. | 779 | * Check for overlapping busy extents. |
775 | */ | 780 | */ |
776 | xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen); | 781 | tbno = fbno; |
782 | tlen = flen; | ||
783 | xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen); | ||
777 | 784 | ||
778 | /* | 785 | /* |
779 | * Give up if the start of the extent is busy, or the freespace isn't | 786 | * Give up if the start of the extent is busy, or the freespace isn't |
@@ -853,6 +860,7 @@ xfs_alloc_find_best_extent( | |||
853 | xfs_agblock_t sdiff; | 860 | xfs_agblock_t sdiff; |
854 | int error; | 861 | int error; |
855 | int i; | 862 | int i; |
863 | unsigned busy_gen; | ||
856 | 864 | ||
857 | /* The good extent is perfect, no need to search. */ | 865 | /* The good extent is perfect, no need to search. */ |
858 | if (!gdiff) | 866 | if (!gdiff) |
@@ -866,7 +874,8 @@ xfs_alloc_find_best_extent( | |||
866 | if (error) | 874 | if (error) |
867 | goto error0; | 875 | goto error0; |
868 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); | 876 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
869 | xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena); | 877 | xfs_alloc_compute_aligned(args, *sbno, *slen, |
878 | sbnoa, slena, &busy_gen); | ||
870 | 879 | ||
871 | /* | 880 | /* |
872 | * The good extent is closer than this one. | 881 | * The good extent is closer than this one. |
@@ -955,7 +964,8 @@ xfs_alloc_ag_vextent_near( | |||
955 | xfs_extlen_t ltlena; /* aligned ... */ | 964 | xfs_extlen_t ltlena; /* aligned ... */ |
956 | xfs_agblock_t ltnew; /* useful start bno of left side */ | 965 | xfs_agblock_t ltnew; /* useful start bno of left side */ |
957 | xfs_extlen_t rlen; /* length of returned extent */ | 966 | xfs_extlen_t rlen; /* length of returned extent */ |
958 | int forced = 0; | 967 | bool busy; |
968 | unsigned busy_gen; | ||
959 | #ifdef DEBUG | 969 | #ifdef DEBUG |
960 | /* | 970 | /* |
961 | * Randomly don't execute the first algorithm. | 971 | * Randomly don't execute the first algorithm. |
@@ -982,6 +992,7 @@ restart: | |||
982 | ltlen = 0; | 992 | ltlen = 0; |
983 | gtlena = 0; | 993 | gtlena = 0; |
984 | ltlena = 0; | 994 | ltlena = 0; |
995 | busy = false; | ||
985 | 996 | ||
986 | /* | 997 | /* |
987 | * Get a cursor for the by-size btree. | 998 | * Get a cursor for the by-size btree. |
@@ -1064,8 +1075,8 @@ restart: | |||
1064 | if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) | 1075 | if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) |
1065 | goto error0; | 1076 | goto error0; |
1066 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); | 1077 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
1067 | xfs_alloc_compute_aligned(args, ltbno, ltlen, | 1078 | busy = xfs_alloc_compute_aligned(args, ltbno, ltlen, |
1068 | <bnoa, <lena); | 1079 | <bnoa, <lena, &busy_gen); |
1069 | if (ltlena < args->minlen) | 1080 | if (ltlena < args->minlen) |
1070 | continue; | 1081 | continue; |
1071 | if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno) | 1082 | if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno) |
@@ -1183,8 +1194,8 @@ restart: | |||
1183 | if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) | 1194 | if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) |
1184 | goto error0; | 1195 | goto error0; |
1185 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); | 1196 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
1186 | xfs_alloc_compute_aligned(args, ltbno, ltlen, | 1197 | busy |= xfs_alloc_compute_aligned(args, ltbno, ltlen, |
1187 | <bnoa, <lena); | 1198 | <bnoa, <lena, &busy_gen); |
1188 | if (ltlena >= args->minlen && ltbnoa >= args->min_agbno) | 1199 | if (ltlena >= args->minlen && ltbnoa >= args->min_agbno) |
1189 | break; | 1200 | break; |
1190 | if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) | 1201 | if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) |
@@ -1199,8 +1210,8 @@ restart: | |||
1199 | if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) | 1210 | if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) |
1200 | goto error0; | 1211 | goto error0; |
1201 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); | 1212 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
1202 | xfs_alloc_compute_aligned(args, gtbno, gtlen, | 1213 | busy |= xfs_alloc_compute_aligned(args, gtbno, gtlen, |
1203 | >bnoa, >lena); | 1214 | >bnoa, >lena, &busy_gen); |
1204 | if (gtlena >= args->minlen && gtbnoa <= args->max_agbno) | 1215 | if (gtlena >= args->minlen && gtbnoa <= args->max_agbno) |
1205 | break; | 1216 | break; |
1206 | if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) | 1217 | if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) |
@@ -1261,9 +1272,9 @@ restart: | |||
1261 | if (bno_cur_lt == NULL && bno_cur_gt == NULL) { | 1272 | if (bno_cur_lt == NULL && bno_cur_gt == NULL) { |
1262 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | 1273 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); |
1263 | 1274 | ||
1264 | if (!forced++) { | 1275 | if (busy) { |
1265 | trace_xfs_alloc_near_busy(args); | 1276 | trace_xfs_alloc_near_busy(args); |
1266 | xfs_log_force(args->mp, XFS_LOG_SYNC); | 1277 | xfs_extent_busy_flush(args->mp, args->pag, busy_gen); |
1267 | goto restart; | 1278 | goto restart; |
1268 | } | 1279 | } |
1269 | trace_xfs_alloc_size_neither(args); | 1280 | trace_xfs_alloc_size_neither(args); |
@@ -1344,7 +1355,8 @@ xfs_alloc_ag_vextent_size( | |||
1344 | int i; /* temp status variable */ | 1355 | int i; /* temp status variable */ |
1345 | xfs_agblock_t rbno; /* returned block number */ | 1356 | xfs_agblock_t rbno; /* returned block number */ |
1346 | xfs_extlen_t rlen; /* length of returned extent */ | 1357 | xfs_extlen_t rlen; /* length of returned extent */ |
1347 | int forced = 0; | 1358 | bool busy; |
1359 | unsigned busy_gen; | ||
1348 | 1360 | ||
1349 | restart: | 1361 | restart: |
1350 | /* | 1362 | /* |
@@ -1353,6 +1365,7 @@ restart: | |||
1353 | cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, | 1365 | cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, |
1354 | args->agno, XFS_BTNUM_CNT); | 1366 | args->agno, XFS_BTNUM_CNT); |
1355 | bno_cur = NULL; | 1367 | bno_cur = NULL; |
1368 | busy = false; | ||
1356 | 1369 | ||
1357 | /* | 1370 | /* |
1358 | * Look for an entry >= maxlen+alignment-1 blocks. | 1371 | * Look for an entry >= maxlen+alignment-1 blocks. |
@@ -1362,14 +1375,13 @@ restart: | |||
1362 | goto error0; | 1375 | goto error0; |
1363 | 1376 | ||
1364 | /* | 1377 | /* |
1365 | * If none or we have busy extents that we cannot allocate from, then | 1378 | * If none then we have to settle for a smaller extent. In the case that |
1366 | * we have to settle for a smaller extent. In the case that there are | 1379 | * there are no large extents, this will return the last entry in the |
1367 | * no large extents, this will return the last entry in the tree unless | 1380 | * tree unless the tree is empty. In the case that there are only busy |
1368 | * the tree is empty. In the case that there are only busy large | 1381 | * large extents, this will return the largest small extent unless there |
1369 | * extents, this will return the largest small extent unless there | ||
1370 | * are no smaller extents available. | 1382 | * are no smaller extents available. |
1371 | */ | 1383 | */ |
1372 | if (!i || forced > 1) { | 1384 | if (!i) { |
1373 | error = xfs_alloc_ag_vextent_small(args, cnt_cur, | 1385 | error = xfs_alloc_ag_vextent_small(args, cnt_cur, |
1374 | &fbno, &flen, &i); | 1386 | &fbno, &flen, &i); |
1375 | if (error) | 1387 | if (error) |
@@ -1380,13 +1392,11 @@ restart: | |||
1380 | return 0; | 1392 | return 0; |
1381 | } | 1393 | } |
1382 | ASSERT(i == 1); | 1394 | ASSERT(i == 1); |
1383 | xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen); | 1395 | busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno, |
1396 | &rlen, &busy_gen); | ||
1384 | } else { | 1397 | } else { |
1385 | /* | 1398 | /* |
1386 | * Search for a non-busy extent that is large enough. | 1399 | * Search for a non-busy extent that is large enough. |
1387 | * If we are at low space, don't check, or if we fall of | ||
1388 | * the end of the btree, turn off the busy check and | ||
1389 | * restart. | ||
1390 | */ | 1400 | */ |
1391 | for (;;) { | 1401 | for (;;) { |
1392 | error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); | 1402 | error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); |
@@ -1394,8 +1404,8 @@ restart: | |||
1394 | goto error0; | 1404 | goto error0; |
1395 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); | 1405 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
1396 | 1406 | ||
1397 | xfs_alloc_compute_aligned(args, fbno, flen, | 1407 | busy = xfs_alloc_compute_aligned(args, fbno, flen, |
1398 | &rbno, &rlen); | 1408 | &rbno, &rlen, &busy_gen); |
1399 | 1409 | ||
1400 | if (rlen >= args->maxlen) | 1410 | if (rlen >= args->maxlen) |
1401 | break; | 1411 | break; |
@@ -1407,18 +1417,13 @@ restart: | |||
1407 | /* | 1417 | /* |
1408 | * Our only valid extents must have been busy. | 1418 | * Our only valid extents must have been busy. |
1409 | * Make it unbusy by forcing the log out and | 1419 | * Make it unbusy by forcing the log out and |
1410 | * retrying. If we've been here before, forcing | 1420 | * retrying. |
1411 | * the log isn't making the extents available, | ||
1412 | * which means they have probably been freed in | ||
1413 | * this transaction. In that case, we have to | ||
1414 | * give up on them and we'll attempt a minlen | ||
1415 | * allocation the next time around. | ||
1416 | */ | 1421 | */ |
1417 | xfs_btree_del_cursor(cnt_cur, | 1422 | xfs_btree_del_cursor(cnt_cur, |
1418 | XFS_BTREE_NOERROR); | 1423 | XFS_BTREE_NOERROR); |
1419 | trace_xfs_alloc_size_busy(args); | 1424 | trace_xfs_alloc_size_busy(args); |
1420 | if (!forced++) | 1425 | xfs_extent_busy_flush(args->mp, |
1421 | xfs_log_force(args->mp, XFS_LOG_SYNC); | 1426 | args->pag, busy_gen); |
1422 | goto restart; | 1427 | goto restart; |
1423 | } | 1428 | } |
1424 | } | 1429 | } |
@@ -1454,8 +1459,8 @@ restart: | |||
1454 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); | 1459 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
1455 | if (flen < bestrlen) | 1460 | if (flen < bestrlen) |
1456 | break; | 1461 | break; |
1457 | xfs_alloc_compute_aligned(args, fbno, flen, | 1462 | busy = xfs_alloc_compute_aligned(args, fbno, flen, |
1458 | &rbno, &rlen); | 1463 | &rbno, &rlen, &busy_gen); |
1459 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); | 1464 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); |
1460 | XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 || | 1465 | XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 || |
1461 | (rlen <= flen && rbno + rlen <= fbno + flen), | 1466 | (rlen <= flen && rbno + rlen <= fbno + flen), |
@@ -1484,10 +1489,10 @@ restart: | |||
1484 | */ | 1489 | */ |
1485 | args->len = rlen; | 1490 | args->len = rlen; |
1486 | if (rlen < args->minlen) { | 1491 | if (rlen < args->minlen) { |
1487 | if (!forced++) { | 1492 | if (busy) { |
1488 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | 1493 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); |
1489 | trace_xfs_alloc_size_busy(args); | 1494 | trace_xfs_alloc_size_busy(args); |
1490 | xfs_log_force(args->mp, XFS_LOG_SYNC); | 1495 | xfs_extent_busy_flush(args->mp, args->pag, busy_gen); |
1491 | goto restart; | 1496 | goto restart; |
1492 | } | 1497 | } |
1493 | goto out_nominleft; | 1498 | goto out_nominleft; |
@@ -2659,21 +2664,11 @@ xfs_alloc_vextent( | |||
2659 | args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); | 2664 | args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); |
2660 | args->type = XFS_ALLOCTYPE_NEAR_BNO; | 2665 | args->type = XFS_ALLOCTYPE_NEAR_BNO; |
2661 | /* FALLTHROUGH */ | 2666 | /* FALLTHROUGH */ |
2662 | case XFS_ALLOCTYPE_ANY_AG: | ||
2663 | case XFS_ALLOCTYPE_START_AG: | ||
2664 | case XFS_ALLOCTYPE_FIRST_AG: | 2667 | case XFS_ALLOCTYPE_FIRST_AG: |
2665 | /* | 2668 | /* |
2666 | * Rotate through the allocation groups looking for a winner. | 2669 | * Rotate through the allocation groups looking for a winner. |
2667 | */ | 2670 | */ |
2668 | if (type == XFS_ALLOCTYPE_ANY_AG) { | 2671 | if (type == XFS_ALLOCTYPE_FIRST_AG) { |
2669 | /* | ||
2670 | * Start with the last place we left off. | ||
2671 | */ | ||
2672 | args->agno = sagno = (mp->m_agfrotor / rotorstep) % | ||
2673 | mp->m_sb.sb_agcount; | ||
2674 | args->type = XFS_ALLOCTYPE_THIS_AG; | ||
2675 | flags = XFS_ALLOC_FLAG_TRYLOCK; | ||
2676 | } else if (type == XFS_ALLOCTYPE_FIRST_AG) { | ||
2677 | /* | 2672 | /* |
2678 | * Start with allocation group given by bno. | 2673 | * Start with allocation group given by bno. |
2679 | */ | 2674 | */ |
@@ -2682,8 +2677,6 @@ xfs_alloc_vextent( | |||
2682 | sagno = 0; | 2677 | sagno = 0; |
2683 | flags = 0; | 2678 | flags = 0; |
2684 | } else { | 2679 | } else { |
2685 | if (type == XFS_ALLOCTYPE_START_AG) | ||
2686 | args->type = XFS_ALLOCTYPE_THIS_AG; | ||
2687 | /* | 2680 | /* |
2688 | * Start with the given allocation group. | 2681 | * Start with the given allocation group. |
2689 | */ | 2682 | */ |
@@ -2751,7 +2744,7 @@ xfs_alloc_vextent( | |||
2751 | } | 2744 | } |
2752 | xfs_perag_put(args->pag); | 2745 | xfs_perag_put(args->pag); |
2753 | } | 2746 | } |
2754 | if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) { | 2747 | if (bump_rotor) { |
2755 | if (args->agno == sagno) | 2748 | if (args->agno == sagno) |
2756 | mp->m_agfrotor = (mp->m_agfrotor + 1) % | 2749 | mp->m_agfrotor = (mp->m_agfrotor + 1) % |
2757 | (mp->m_sb.sb_agcount * rotorstep); | 2750 | (mp->m_sb.sb_agcount * rotorstep); |
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 1d0f48a501a3..2a8d0fa6fbbe 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h | |||
@@ -29,9 +29,7 @@ extern struct workqueue_struct *xfs_alloc_wq; | |||
29 | /* | 29 | /* |
30 | * Freespace allocation types. Argument to xfs_alloc_[v]extent. | 30 | * Freespace allocation types. Argument to xfs_alloc_[v]extent. |
31 | */ | 31 | */ |
32 | #define XFS_ALLOCTYPE_ANY_AG 0x01 /* allocate anywhere, use rotor */ | ||
33 | #define XFS_ALLOCTYPE_FIRST_AG 0x02 /* ... start at ag 0 */ | 32 | #define XFS_ALLOCTYPE_FIRST_AG 0x02 /* ... start at ag 0 */ |
34 | #define XFS_ALLOCTYPE_START_AG 0x04 /* anywhere, start in this a.g. */ | ||
35 | #define XFS_ALLOCTYPE_THIS_AG 0x08 /* anywhere in this a.g. */ | 33 | #define XFS_ALLOCTYPE_THIS_AG 0x08 /* anywhere in this a.g. */ |
36 | #define XFS_ALLOCTYPE_START_BNO 0x10 /* near this block else anywhere */ | 34 | #define XFS_ALLOCTYPE_START_BNO 0x10 /* near this block else anywhere */ |
37 | #define XFS_ALLOCTYPE_NEAR_BNO 0x20 /* in this a.g. and near this block */ | 35 | #define XFS_ALLOCTYPE_NEAR_BNO 0x20 /* in this a.g. and near this block */ |
@@ -41,9 +39,7 @@ extern struct workqueue_struct *xfs_alloc_wq; | |||
41 | typedef unsigned int xfs_alloctype_t; | 39 | typedef unsigned int xfs_alloctype_t; |
42 | 40 | ||
43 | #define XFS_ALLOC_TYPES \ | 41 | #define XFS_ALLOC_TYPES \ |
44 | { XFS_ALLOCTYPE_ANY_AG, "ANY_AG" }, \ | ||
45 | { XFS_ALLOCTYPE_FIRST_AG, "FIRST_AG" }, \ | 42 | { XFS_ALLOCTYPE_FIRST_AG, "FIRST_AG" }, \ |
46 | { XFS_ALLOCTYPE_START_AG, "START_AG" }, \ | ||
47 | { XFS_ALLOCTYPE_THIS_AG, "THIS_AG" }, \ | 43 | { XFS_ALLOCTYPE_THIS_AG, "THIS_AG" }, \ |
48 | { XFS_ALLOCTYPE_START_BNO, "START_BNO" }, \ | 44 | { XFS_ALLOCTYPE_START_BNO, "START_BNO" }, \ |
49 | { XFS_ALLOCTYPE_NEAR_BNO, "NEAR_BNO" }, \ | 45 | { XFS_ALLOCTYPE_NEAR_BNO, "NEAR_BNO" }, \ |
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index bfc00de5c6f1..a9c66d47757a 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c | |||
@@ -740,15 +740,9 @@ xfs_bmap_extents_to_btree( | |||
740 | * Fill in the root. | 740 | * Fill in the root. |
741 | */ | 741 | */ |
742 | block = ifp->if_broot; | 742 | block = ifp->if_broot; |
743 | if (xfs_sb_version_hascrc(&mp->m_sb)) | 743 | xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL, |
744 | xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL, | 744 | XFS_BTNUM_BMAP, 1, 1, ip->i_ino, |
745 | XFS_BMAP_CRC_MAGIC, 1, 1, ip->i_ino, | ||
746 | XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS); | ||
747 | else | ||
748 | xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL, | ||
749 | XFS_BMAP_MAGIC, 1, 1, ip->i_ino, | ||
750 | XFS_BTREE_LONG_PTRS); | 745 | XFS_BTREE_LONG_PTRS); |
751 | |||
752 | /* | 746 | /* |
753 | * Need a cursor. Can't allocate until bb_level is filled in. | 747 | * Need a cursor. Can't allocate until bb_level is filled in. |
754 | */ | 748 | */ |
@@ -804,9 +798,7 @@ try_another_ag: | |||
804 | */ | 798 | */ |
805 | ASSERT(args.fsbno != NULLFSBLOCK); | 799 | ASSERT(args.fsbno != NULLFSBLOCK); |
806 | ASSERT(*firstblock == NULLFSBLOCK || | 800 | ASSERT(*firstblock == NULLFSBLOCK || |
807 | args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) || | 801 | args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock)); |
808 | (dfops->dop_low && | ||
809 | args.agno > XFS_FSB_TO_AGNO(mp, *firstblock))); | ||
810 | *firstblock = cur->bc_private.b.firstblock = args.fsbno; | 802 | *firstblock = cur->bc_private.b.firstblock = args.fsbno; |
811 | cur->bc_private.b.allocated++; | 803 | cur->bc_private.b.allocated++; |
812 | ip->i_d.di_nblocks++; | 804 | ip->i_d.di_nblocks++; |
@@ -817,13 +809,8 @@ try_another_ag: | |||
817 | */ | 809 | */ |
818 | abp->b_ops = &xfs_bmbt_buf_ops; | 810 | abp->b_ops = &xfs_bmbt_buf_ops; |
819 | ablock = XFS_BUF_TO_BLOCK(abp); | 811 | ablock = XFS_BUF_TO_BLOCK(abp); |
820 | if (xfs_sb_version_hascrc(&mp->m_sb)) | 812 | xfs_btree_init_block_int(mp, ablock, abp->b_bn, |
821 | xfs_btree_init_block_int(mp, ablock, abp->b_bn, | 813 | XFS_BTNUM_BMAP, 0, 0, ip->i_ino, |
822 | XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino, | ||
823 | XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS); | ||
824 | else | ||
825 | xfs_btree_init_block_int(mp, ablock, abp->b_bn, | ||
826 | XFS_BMAP_MAGIC, 0, 0, ip->i_ino, | ||
827 | XFS_BTREE_LONG_PTRS); | 814 | XFS_BTREE_LONG_PTRS); |
828 | 815 | ||
829 | arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); | 816 | arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); |
@@ -1278,7 +1265,6 @@ xfs_bmap_read_extents( | |||
1278 | /* REFERENCED */ | 1265 | /* REFERENCED */ |
1279 | xfs_extnum_t room; /* number of entries there's room for */ | 1266 | xfs_extnum_t room; /* number of entries there's room for */ |
1280 | 1267 | ||
1281 | bno = NULLFSBLOCK; | ||
1282 | mp = ip->i_mount; | 1268 | mp = ip->i_mount; |
1283 | ifp = XFS_IFORK_PTR(ip, whichfork); | 1269 | ifp = XFS_IFORK_PTR(ip, whichfork); |
1284 | exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE : | 1270 | exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE : |
@@ -1291,9 +1277,7 @@ xfs_bmap_read_extents( | |||
1291 | ASSERT(level > 0); | 1277 | ASSERT(level > 0); |
1292 | pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); | 1278 | pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); |
1293 | bno = be64_to_cpu(*pp); | 1279 | bno = be64_to_cpu(*pp); |
1294 | ASSERT(bno != NULLFSBLOCK); | 1280 | |
1295 | ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); | ||
1296 | ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); | ||
1297 | /* | 1281 | /* |
1298 | * Go down the tree until leaf level is reached, following the first | 1282 | * Go down the tree until leaf level is reached, following the first |
1299 | * pointer (leftmost) at each level. | 1283 | * pointer (leftmost) at each level. |
@@ -1864,6 +1848,7 @@ xfs_bmap_add_extent_delay_real( | |||
1864 | */ | 1848 | */ |
1865 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); | 1849 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); |
1866 | xfs_bmbt_set_startblock(ep, new->br_startblock); | 1850 | xfs_bmbt_set_startblock(ep, new->br_startblock); |
1851 | xfs_bmbt_set_state(ep, new->br_state); | ||
1867 | trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); | 1852 | trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); |
1868 | 1853 | ||
1869 | (*nextents)++; | 1854 | (*nextents)++; |
@@ -2202,6 +2187,7 @@ STATIC int /* error */ | |||
2202 | xfs_bmap_add_extent_unwritten_real( | 2187 | xfs_bmap_add_extent_unwritten_real( |
2203 | struct xfs_trans *tp, | 2188 | struct xfs_trans *tp, |
2204 | xfs_inode_t *ip, /* incore inode pointer */ | 2189 | xfs_inode_t *ip, /* incore inode pointer */ |
2190 | int whichfork, | ||
2205 | xfs_extnum_t *idx, /* extent number to update/insert */ | 2191 | xfs_extnum_t *idx, /* extent number to update/insert */ |
2206 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ | 2192 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ |
2207 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | 2193 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ |
@@ -2221,12 +2207,14 @@ xfs_bmap_add_extent_unwritten_real( | |||
2221 | /* left is 0, right is 1, prev is 2 */ | 2207 | /* left is 0, right is 1, prev is 2 */ |
2222 | int rval=0; /* return value (logging flags) */ | 2208 | int rval=0; /* return value (logging flags) */ |
2223 | int state = 0;/* state bits, accessed thru macros */ | 2209 | int state = 0;/* state bits, accessed thru macros */ |
2224 | struct xfs_mount *mp = tp->t_mountp; | 2210 | struct xfs_mount *mp = ip->i_mount; |
2225 | 2211 | ||
2226 | *logflagsp = 0; | 2212 | *logflagsp = 0; |
2227 | 2213 | ||
2228 | cur = *curp; | 2214 | cur = *curp; |
2229 | ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); | 2215 | ifp = XFS_IFORK_PTR(ip, whichfork); |
2216 | if (whichfork == XFS_COW_FORK) | ||
2217 | state |= BMAP_COWFORK; | ||
2230 | 2218 | ||
2231 | ASSERT(*idx >= 0); | 2219 | ASSERT(*idx >= 0); |
2232 | ASSERT(*idx <= xfs_iext_count(ifp)); | 2220 | ASSERT(*idx <= xfs_iext_count(ifp)); |
@@ -2285,7 +2273,7 @@ xfs_bmap_add_extent_unwritten_real( | |||
2285 | * Don't set contiguous if the combined extent would be too large. | 2273 | * Don't set contiguous if the combined extent would be too large. |
2286 | * Also check for all-three-contiguous being too large. | 2274 | * Also check for all-three-contiguous being too large. |
2287 | */ | 2275 | */ |
2288 | if (*idx < xfs_iext_count(&ip->i_df) - 1) { | 2276 | if (*idx < xfs_iext_count(ifp) - 1) { |
2289 | state |= BMAP_RIGHT_VALID; | 2277 | state |= BMAP_RIGHT_VALID; |
2290 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); | 2278 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); |
2291 | if (isnullstartblock(RIGHT.br_startblock)) | 2279 | if (isnullstartblock(RIGHT.br_startblock)) |
@@ -2325,7 +2313,8 @@ xfs_bmap_add_extent_unwritten_real( | |||
2325 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | 2313 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
2326 | 2314 | ||
2327 | xfs_iext_remove(ip, *idx + 1, 2, state); | 2315 | xfs_iext_remove(ip, *idx + 1, 2, state); |
2328 | ip->i_d.di_nextents -= 2; | 2316 | XFS_IFORK_NEXT_SET(ip, whichfork, |
2317 | XFS_IFORK_NEXTENTS(ip, whichfork) - 2); | ||
2329 | if (cur == NULL) | 2318 | if (cur == NULL) |
2330 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 2319 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
2331 | else { | 2320 | else { |
@@ -2368,7 +2357,8 @@ xfs_bmap_add_extent_unwritten_real( | |||
2368 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | 2357 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
2369 | 2358 | ||
2370 | xfs_iext_remove(ip, *idx + 1, 1, state); | 2359 | xfs_iext_remove(ip, *idx + 1, 1, state); |
2371 | ip->i_d.di_nextents--; | 2360 | XFS_IFORK_NEXT_SET(ip, whichfork, |
2361 | XFS_IFORK_NEXTENTS(ip, whichfork) - 1); | ||
2372 | if (cur == NULL) | 2362 | if (cur == NULL) |
2373 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 2363 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
2374 | else { | 2364 | else { |
@@ -2403,7 +2393,8 @@ xfs_bmap_add_extent_unwritten_real( | |||
2403 | xfs_bmbt_set_state(ep, newext); | 2393 | xfs_bmbt_set_state(ep, newext); |
2404 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | 2394 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
2405 | xfs_iext_remove(ip, *idx + 1, 1, state); | 2395 | xfs_iext_remove(ip, *idx + 1, 1, state); |
2406 | ip->i_d.di_nextents--; | 2396 | XFS_IFORK_NEXT_SET(ip, whichfork, |
2397 | XFS_IFORK_NEXTENTS(ip, whichfork) - 1); | ||
2407 | if (cur == NULL) | 2398 | if (cur == NULL) |
2408 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 2399 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
2409 | else { | 2400 | else { |
@@ -2515,7 +2506,8 @@ xfs_bmap_add_extent_unwritten_real( | |||
2515 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | 2506 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
2516 | 2507 | ||
2517 | xfs_iext_insert(ip, *idx, 1, new, state); | 2508 | xfs_iext_insert(ip, *idx, 1, new, state); |
2518 | ip->i_d.di_nextents++; | 2509 | XFS_IFORK_NEXT_SET(ip, whichfork, |
2510 | XFS_IFORK_NEXTENTS(ip, whichfork) + 1); | ||
2519 | if (cur == NULL) | 2511 | if (cur == NULL) |
2520 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 2512 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
2521 | else { | 2513 | else { |
@@ -2593,7 +2585,8 @@ xfs_bmap_add_extent_unwritten_real( | |||
2593 | ++*idx; | 2585 | ++*idx; |
2594 | xfs_iext_insert(ip, *idx, 1, new, state); | 2586 | xfs_iext_insert(ip, *idx, 1, new, state); |
2595 | 2587 | ||
2596 | ip->i_d.di_nextents++; | 2588 | XFS_IFORK_NEXT_SET(ip, whichfork, |
2589 | XFS_IFORK_NEXTENTS(ip, whichfork) + 1); | ||
2597 | if (cur == NULL) | 2590 | if (cur == NULL) |
2598 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 2591 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
2599 | else { | 2592 | else { |
@@ -2641,7 +2634,8 @@ xfs_bmap_add_extent_unwritten_real( | |||
2641 | ++*idx; | 2634 | ++*idx; |
2642 | xfs_iext_insert(ip, *idx, 2, &r[0], state); | 2635 | xfs_iext_insert(ip, *idx, 2, &r[0], state); |
2643 | 2636 | ||
2644 | ip->i_d.di_nextents += 2; | 2637 | XFS_IFORK_NEXT_SET(ip, whichfork, |
2638 | XFS_IFORK_NEXTENTS(ip, whichfork) + 2); | ||
2645 | if (cur == NULL) | 2639 | if (cur == NULL) |
2646 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 2640 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
2647 | else { | 2641 | else { |
@@ -2695,17 +2689,17 @@ xfs_bmap_add_extent_unwritten_real( | |||
2695 | } | 2689 | } |
2696 | 2690 | ||
2697 | /* update reverse mappings */ | 2691 | /* update reverse mappings */ |
2698 | error = xfs_rmap_convert_extent(mp, dfops, ip, XFS_DATA_FORK, new); | 2692 | error = xfs_rmap_convert_extent(mp, dfops, ip, whichfork, new); |
2699 | if (error) | 2693 | if (error) |
2700 | goto done; | 2694 | goto done; |
2701 | 2695 | ||
2702 | /* convert to a btree if necessary */ | 2696 | /* convert to a btree if necessary */ |
2703 | if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) { | 2697 | if (xfs_bmap_needs_btree(ip, whichfork)) { |
2704 | int tmp_logflags; /* partial log flag return val */ | 2698 | int tmp_logflags; /* partial log flag return val */ |
2705 | 2699 | ||
2706 | ASSERT(cur == NULL); | 2700 | ASSERT(cur == NULL); |
2707 | error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur, | 2701 | error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur, |
2708 | 0, &tmp_logflags, XFS_DATA_FORK); | 2702 | 0, &tmp_logflags, whichfork); |
2709 | *logflagsp |= tmp_logflags; | 2703 | *logflagsp |= tmp_logflags; |
2710 | if (error) | 2704 | if (error) |
2711 | goto done; | 2705 | goto done; |
@@ -2717,7 +2711,7 @@ xfs_bmap_add_extent_unwritten_real( | |||
2717 | *curp = cur; | 2711 | *curp = cur; |
2718 | } | 2712 | } |
2719 | 2713 | ||
2720 | xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK); | 2714 | xfs_bmap_check_leaf_extents(*curp, ip, whichfork); |
2721 | done: | 2715 | done: |
2722 | *logflagsp |= rval; | 2716 | *logflagsp |= rval; |
2723 | return error; | 2717 | return error; |
@@ -2809,7 +2803,8 @@ xfs_bmap_add_extent_hole_delay( | |||
2809 | oldlen = startblockval(left.br_startblock) + | 2803 | oldlen = startblockval(left.br_startblock) + |
2810 | startblockval(new->br_startblock) + | 2804 | startblockval(new->br_startblock) + |
2811 | startblockval(right.br_startblock); | 2805 | startblockval(right.br_startblock); |
2812 | newlen = xfs_bmap_worst_indlen(ip, temp); | 2806 | newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), |
2807 | oldlen); | ||
2813 | xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), | 2808 | xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), |
2814 | nullstartblock((int)newlen)); | 2809 | nullstartblock((int)newlen)); |
2815 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | 2810 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
@@ -2830,7 +2825,8 @@ xfs_bmap_add_extent_hole_delay( | |||
2830 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); | 2825 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); |
2831 | oldlen = startblockval(left.br_startblock) + | 2826 | oldlen = startblockval(left.br_startblock) + |
2832 | startblockval(new->br_startblock); | 2827 | startblockval(new->br_startblock); |
2833 | newlen = xfs_bmap_worst_indlen(ip, temp); | 2828 | newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), |
2829 | oldlen); | ||
2834 | xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), | 2830 | xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), |
2835 | nullstartblock((int)newlen)); | 2831 | nullstartblock((int)newlen)); |
2836 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | 2832 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
@@ -2846,7 +2842,8 @@ xfs_bmap_add_extent_hole_delay( | |||
2846 | temp = new->br_blockcount + right.br_blockcount; | 2842 | temp = new->br_blockcount + right.br_blockcount; |
2847 | oldlen = startblockval(new->br_startblock) + | 2843 | oldlen = startblockval(new->br_startblock) + |
2848 | startblockval(right.br_startblock); | 2844 | startblockval(right.br_startblock); |
2849 | newlen = xfs_bmap_worst_indlen(ip, temp); | 2845 | newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), |
2846 | oldlen); | ||
2850 | xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), | 2847 | xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), |
2851 | new->br_startoff, | 2848 | new->br_startoff, |
2852 | nullstartblock((int)newlen), temp, right.br_state); | 2849 | nullstartblock((int)newlen), temp, right.br_state); |
@@ -2899,13 +2896,14 @@ xfs_bmap_add_extent_hole_real( | |||
2899 | ASSERT(!isnullstartblock(new->br_startblock)); | 2896 | ASSERT(!isnullstartblock(new->br_startblock)); |
2900 | ASSERT(!bma->cur || | 2897 | ASSERT(!bma->cur || |
2901 | !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); | 2898 | !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); |
2902 | ASSERT(whichfork != XFS_COW_FORK); | ||
2903 | 2899 | ||
2904 | XFS_STATS_INC(mp, xs_add_exlist); | 2900 | XFS_STATS_INC(mp, xs_add_exlist); |
2905 | 2901 | ||
2906 | state = 0; | 2902 | state = 0; |
2907 | if (whichfork == XFS_ATTR_FORK) | 2903 | if (whichfork == XFS_ATTR_FORK) |
2908 | state |= BMAP_ATTRFORK; | 2904 | state |= BMAP_ATTRFORK; |
2905 | if (whichfork == XFS_COW_FORK) | ||
2906 | state |= BMAP_COWFORK; | ||
2909 | 2907 | ||
2910 | /* | 2908 | /* |
2911 | * Check and set flags if this segment has a left neighbor. | 2909 | * Check and set flags if this segment has a left neighbor. |
@@ -3822,17 +3820,13 @@ xfs_bmap_btalloc( | |||
3822 | * the first block that was allocated. | 3820 | * the first block that was allocated. |
3823 | */ | 3821 | */ |
3824 | ASSERT(*ap->firstblock == NULLFSBLOCK || | 3822 | ASSERT(*ap->firstblock == NULLFSBLOCK || |
3825 | XFS_FSB_TO_AGNO(mp, *ap->firstblock) == | 3823 | XFS_FSB_TO_AGNO(mp, *ap->firstblock) <= |
3826 | XFS_FSB_TO_AGNO(mp, args.fsbno) || | 3824 | XFS_FSB_TO_AGNO(mp, args.fsbno)); |
3827 | (ap->dfops->dop_low && | ||
3828 | XFS_FSB_TO_AGNO(mp, *ap->firstblock) < | ||
3829 | XFS_FSB_TO_AGNO(mp, args.fsbno))); | ||
3830 | 3825 | ||
3831 | ap->blkno = args.fsbno; | 3826 | ap->blkno = args.fsbno; |
3832 | if (*ap->firstblock == NULLFSBLOCK) | 3827 | if (*ap->firstblock == NULLFSBLOCK) |
3833 | *ap->firstblock = args.fsbno; | 3828 | *ap->firstblock = args.fsbno; |
3834 | ASSERT(nullfb || fb_agno == args.agno || | 3829 | ASSERT(nullfb || fb_agno <= args.agno); |
3835 | (ap->dfops->dop_low && fb_agno < args.agno)); | ||
3836 | ap->length = args.len; | 3830 | ap->length = args.len; |
3837 | if (!(ap->flags & XFS_BMAPI_COWFORK)) | 3831 | if (!(ap->flags & XFS_BMAPI_COWFORK)) |
3838 | ap->ip->i_d.di_nblocks += args.len; | 3832 | ap->ip->i_d.di_nblocks += args.len; |
@@ -4368,10 +4362,16 @@ xfs_bmapi_allocate( | |||
4368 | bma->got.br_state = XFS_EXT_NORM; | 4362 | bma->got.br_state = XFS_EXT_NORM; |
4369 | 4363 | ||
4370 | /* | 4364 | /* |
4371 | * A wasdelay extent has been initialized, so shouldn't be flagged | 4365 | * In the data fork, a wasdelay extent has been initialized, so |
4372 | * as unwritten. | 4366 | * shouldn't be flagged as unwritten. |
4367 | * | ||
4368 | * For the cow fork, however, we convert delalloc reservations | ||
4369 | * (extents allocated for speculative preallocation) to | ||
4370 | * allocated unwritten extents, and only convert the unwritten | ||
4371 | * extents to real extents when we're about to write the data. | ||
4373 | */ | 4372 | */ |
4374 | if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) && | 4373 | if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) && |
4374 | (bma->flags & XFS_BMAPI_PREALLOC) && | ||
4375 | xfs_sb_version_hasextflgbit(&mp->m_sb)) | 4375 | xfs_sb_version_hasextflgbit(&mp->m_sb)) |
4376 | bma->got.br_state = XFS_EXT_UNWRITTEN; | 4376 | bma->got.br_state = XFS_EXT_UNWRITTEN; |
4377 | 4377 | ||
@@ -4422,8 +4422,6 @@ xfs_bmapi_convert_unwritten( | |||
4422 | (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) | 4422 | (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) |
4423 | return 0; | 4423 | return 0; |
4424 | 4424 | ||
4425 | ASSERT(whichfork != XFS_COW_FORK); | ||
4426 | |||
4427 | /* | 4425 | /* |
4428 | * Modify (by adding) the state flag, if writing. | 4426 | * Modify (by adding) the state flag, if writing. |
4429 | */ | 4427 | */ |
@@ -4448,8 +4446,8 @@ xfs_bmapi_convert_unwritten( | |||
4448 | return error; | 4446 | return error; |
4449 | } | 4447 | } |
4450 | 4448 | ||
4451 | error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx, | 4449 | error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork, |
4452 | &bma->cur, mval, bma->firstblock, bma->dfops, | 4450 | &bma->idx, &bma->cur, mval, bma->firstblock, bma->dfops, |
4453 | &tmp_logflags); | 4451 | &tmp_logflags); |
4454 | /* | 4452 | /* |
4455 | * Log the inode core unconditionally in the unwritten extent conversion | 4453 | * Log the inode core unconditionally in the unwritten extent conversion |
@@ -4458,8 +4456,12 @@ xfs_bmapi_convert_unwritten( | |||
4458 | * in the transaction for the sake of fsync(), even if nothing has | 4456 | * in the transaction for the sake of fsync(), even if nothing has |
4459 | * changed, because fsync() will not force the log for this transaction | 4457 | * changed, because fsync() will not force the log for this transaction |
4460 | * unless it sees the inode pinned. | 4458 | * unless it sees the inode pinned. |
4459 | * | ||
4460 | * Note: If we're only converting cow fork extents, there aren't | ||
4461 | * any on-disk updates to make, so we don't need to log anything. | ||
4461 | */ | 4462 | */ |
4462 | bma->logflags |= tmp_logflags | XFS_ILOG_CORE; | 4463 | if (whichfork != XFS_COW_FORK) |
4464 | bma->logflags |= tmp_logflags | XFS_ILOG_CORE; | ||
4463 | if (error) | 4465 | if (error) |
4464 | return error; | 4466 | return error; |
4465 | 4467 | ||
@@ -4533,15 +4535,15 @@ xfs_bmapi_write( | |||
4533 | ASSERT(*nmap >= 1); | 4535 | ASSERT(*nmap >= 1); |
4534 | ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); | 4536 | ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); |
4535 | ASSERT(!(flags & XFS_BMAPI_IGSTATE)); | 4537 | ASSERT(!(flags & XFS_BMAPI_IGSTATE)); |
4536 | ASSERT(tp != NULL); | 4538 | ASSERT(tp != NULL || |
4539 | (flags & (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)) == | ||
4540 | (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)); | ||
4537 | ASSERT(len > 0); | 4541 | ASSERT(len > 0); |
4538 | ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); | 4542 | ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); |
4539 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 4543 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
4540 | ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK); | 4544 | ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK); |
4541 | ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP)); | 4545 | ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP)); |
4542 | ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP)); | 4546 | ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP)); |
4543 | ASSERT(!(flags & XFS_BMAPI_PREALLOC) || whichfork != XFS_COW_FORK); | ||
4544 | ASSERT(!(flags & XFS_BMAPI_CONVERT) || whichfork != XFS_COW_FORK); | ||
4545 | 4547 | ||
4546 | /* zeroing is for currently only for data extents, not metadata */ | 4548 | /* zeroing is for currently only for data extents, not metadata */ |
4547 | ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) != | 4549 | ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) != |
@@ -4746,13 +4748,9 @@ error0: | |||
4746 | if (bma.cur) { | 4748 | if (bma.cur) { |
4747 | if (!error) { | 4749 | if (!error) { |
4748 | ASSERT(*firstblock == NULLFSBLOCK || | 4750 | ASSERT(*firstblock == NULLFSBLOCK || |
4749 | XFS_FSB_TO_AGNO(mp, *firstblock) == | 4751 | XFS_FSB_TO_AGNO(mp, *firstblock) <= |
4750 | XFS_FSB_TO_AGNO(mp, | 4752 | XFS_FSB_TO_AGNO(mp, |
4751 | bma.cur->bc_private.b.firstblock) || | 4753 | bma.cur->bc_private.b.firstblock)); |
4752 | (dfops->dop_low && | ||
4753 | XFS_FSB_TO_AGNO(mp, *firstblock) < | ||
4754 | XFS_FSB_TO_AGNO(mp, | ||
4755 | bma.cur->bc_private.b.firstblock))); | ||
4756 | *firstblock = bma.cur->bc_private.b.firstblock; | 4754 | *firstblock = bma.cur->bc_private.b.firstblock; |
4757 | } | 4755 | } |
4758 | xfs_btree_del_cursor(bma.cur, | 4756 | xfs_btree_del_cursor(bma.cur, |
@@ -4787,34 +4785,59 @@ xfs_bmap_split_indlen( | |||
4787 | xfs_filblks_t len2 = *indlen2; | 4785 | xfs_filblks_t len2 = *indlen2; |
4788 | xfs_filblks_t nres = len1 + len2; /* new total res. */ | 4786 | xfs_filblks_t nres = len1 + len2; /* new total res. */ |
4789 | xfs_filblks_t stolen = 0; | 4787 | xfs_filblks_t stolen = 0; |
4788 | xfs_filblks_t resfactor; | ||
4790 | 4789 | ||
4791 | /* | 4790 | /* |
4792 | * Steal as many blocks as we can to try and satisfy the worst case | 4791 | * Steal as many blocks as we can to try and satisfy the worst case |
4793 | * indlen for both new extents. | 4792 | * indlen for both new extents. |
4794 | */ | 4793 | */ |
4795 | while (nres > ores && avail) { | 4794 | if (ores < nres && avail) |
4796 | nres--; | 4795 | stolen = XFS_FILBLKS_MIN(nres - ores, avail); |
4797 | avail--; | 4796 | ores += stolen; |
4798 | stolen++; | 4797 | |
4799 | } | 4798 | /* nothing else to do if we've satisfied the new reservation */ |
4799 | if (ores >= nres) | ||
4800 | return stolen; | ||
4801 | |||
4802 | /* | ||
4803 | * We can't meet the total required reservation for the two extents. | ||
4804 | * Calculate the percent of the overall shortage between both extents | ||
4805 | * and apply this percentage to each of the requested indlen values. | ||
4806 | * This distributes the shortage fairly and reduces the chances that one | ||
4807 | * of the two extents is left with nothing when extents are repeatedly | ||
4808 | * split. | ||
4809 | */ | ||
4810 | resfactor = (ores * 100); | ||
4811 | do_div(resfactor, nres); | ||
4812 | len1 *= resfactor; | ||
4813 | do_div(len1, 100); | ||
4814 | len2 *= resfactor; | ||
4815 | do_div(len2, 100); | ||
4816 | ASSERT(len1 + len2 <= ores); | ||
4817 | ASSERT(len1 < *indlen1 && len2 < *indlen2); | ||
4800 | 4818 | ||
4801 | /* | 4819 | /* |
4802 | * The only blocks available are those reserved for the original | 4820 | * Hand out the remainder to each extent. If one of the two reservations |
4803 | * extent and what we can steal from the extent being removed. | 4821 | * is zero, we want to make sure that one gets a block first. The loop |
4804 | * If this still isn't enough to satisfy the combined | 4822 | * below starts with len1, so hand len2 a block right off the bat if it |
4805 | * requirements for the two new extents, skim blocks off of each | 4823 | * is zero. |
4806 | * of the new reservations until they match what is available. | ||
4807 | */ | 4824 | */ |
4808 | while (nres > ores) { | 4825 | ores -= (len1 + len2); |
4809 | if (len1) { | 4826 | ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores); |
4810 | len1--; | 4827 | if (ores && !len2 && *indlen2) { |
4811 | nres--; | 4828 | len2++; |
4829 | ores--; | ||
4830 | } | ||
4831 | while (ores) { | ||
4832 | if (len1 < *indlen1) { | ||
4833 | len1++; | ||
4834 | ores--; | ||
4812 | } | 4835 | } |
4813 | if (nres == ores) | 4836 | if (!ores) |
4814 | break; | 4837 | break; |
4815 | if (len2) { | 4838 | if (len2 < *indlen2) { |
4816 | len2--; | 4839 | len2++; |
4817 | nres--; | 4840 | ores--; |
4818 | } | 4841 | } |
4819 | } | 4842 | } |
4820 | 4843 | ||
@@ -5556,8 +5579,8 @@ __xfs_bunmapi( | |||
5556 | } | 5579 | } |
5557 | del.br_state = XFS_EXT_UNWRITTEN; | 5580 | del.br_state = XFS_EXT_UNWRITTEN; |
5558 | error = xfs_bmap_add_extent_unwritten_real(tp, ip, | 5581 | error = xfs_bmap_add_extent_unwritten_real(tp, ip, |
5559 | &lastx, &cur, &del, firstblock, dfops, | 5582 | whichfork, &lastx, &cur, &del, |
5560 | &logflags); | 5583 | firstblock, dfops, &logflags); |
5561 | if (error) | 5584 | if (error) |
5562 | goto error0; | 5585 | goto error0; |
5563 | goto nodelete; | 5586 | goto nodelete; |
@@ -5610,8 +5633,9 @@ __xfs_bunmapi( | |||
5610 | prev.br_state = XFS_EXT_UNWRITTEN; | 5633 | prev.br_state = XFS_EXT_UNWRITTEN; |
5611 | lastx--; | 5634 | lastx--; |
5612 | error = xfs_bmap_add_extent_unwritten_real(tp, | 5635 | error = xfs_bmap_add_extent_unwritten_real(tp, |
5613 | ip, &lastx, &cur, &prev, | 5636 | ip, whichfork, &lastx, &cur, |
5614 | firstblock, dfops, &logflags); | 5637 | &prev, firstblock, dfops, |
5638 | &logflags); | ||
5615 | if (error) | 5639 | if (error) |
5616 | goto error0; | 5640 | goto error0; |
5617 | goto nodelete; | 5641 | goto nodelete; |
@@ -5619,8 +5643,9 @@ __xfs_bunmapi( | |||
5619 | ASSERT(del.br_state == XFS_EXT_NORM); | 5643 | ASSERT(del.br_state == XFS_EXT_NORM); |
5620 | del.br_state = XFS_EXT_UNWRITTEN; | 5644 | del.br_state = XFS_EXT_UNWRITTEN; |
5621 | error = xfs_bmap_add_extent_unwritten_real(tp, | 5645 | error = xfs_bmap_add_extent_unwritten_real(tp, |
5622 | ip, &lastx, &cur, &del, | 5646 | ip, whichfork, &lastx, &cur, |
5623 | firstblock, dfops, &logflags); | 5647 | &del, firstblock, dfops, |
5648 | &logflags); | ||
5624 | if (error) | 5649 | if (error) |
5625 | goto error0; | 5650 | goto error0; |
5626 | goto nodelete; | 5651 | goto nodelete; |
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index d9be241fc86f..f93072b58a58 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c | |||
@@ -71,15 +71,9 @@ xfs_bmdr_to_bmbt( | |||
71 | xfs_bmbt_key_t *tkp; | 71 | xfs_bmbt_key_t *tkp; |
72 | __be64 *tpp; | 72 | __be64 *tpp; |
73 | 73 | ||
74 | if (xfs_sb_version_hascrc(&mp->m_sb)) | 74 | xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL, |
75 | xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL, | 75 | XFS_BTNUM_BMAP, 0, 0, ip->i_ino, |
76 | XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino, | ||
77 | XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS); | ||
78 | else | ||
79 | xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL, | ||
80 | XFS_BMAP_MAGIC, 0, 0, ip->i_ino, | ||
81 | XFS_BTREE_LONG_PTRS); | 76 | XFS_BTREE_LONG_PTRS); |
82 | |||
83 | rblock->bb_level = dblock->bb_level; | 77 | rblock->bb_level = dblock->bb_level; |
84 | ASSERT(be16_to_cpu(rblock->bb_level) > 0); | 78 | ASSERT(be16_to_cpu(rblock->bb_level) > 0); |
85 | rblock->bb_numrecs = dblock->bb_numrecs; | 79 | rblock->bb_numrecs = dblock->bb_numrecs; |
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 21e6a6ab6b9a..c3decedc9455 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c | |||
@@ -50,8 +50,18 @@ static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { | |||
50 | XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC, | 50 | XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC, |
51 | XFS_REFC_CRC_MAGIC } | 51 | XFS_REFC_CRC_MAGIC } |
52 | }; | 52 | }; |
53 | #define xfs_btree_magic(cur) \ | 53 | |
54 | xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum] | 54 | __uint32_t |
55 | xfs_btree_magic( | ||
56 | int crc, | ||
57 | xfs_btnum_t btnum) | ||
58 | { | ||
59 | __uint32_t magic = xfs_magics[crc][btnum]; | ||
60 | |||
61 | /* Ensure we asked for crc for crc-only magics. */ | ||
62 | ASSERT(magic != 0); | ||
63 | return magic; | ||
64 | } | ||
55 | 65 | ||
56 | STATIC int /* error (0 or EFSCORRUPTED) */ | 66 | STATIC int /* error (0 or EFSCORRUPTED) */ |
57 | xfs_btree_check_lblock( | 67 | xfs_btree_check_lblock( |
@@ -62,10 +72,13 @@ xfs_btree_check_lblock( | |||
62 | { | 72 | { |
63 | int lblock_ok = 1; /* block passes checks */ | 73 | int lblock_ok = 1; /* block passes checks */ |
64 | struct xfs_mount *mp; /* file system mount point */ | 74 | struct xfs_mount *mp; /* file system mount point */ |
75 | xfs_btnum_t btnum = cur->bc_btnum; | ||
76 | int crc; | ||
65 | 77 | ||
66 | mp = cur->bc_mp; | 78 | mp = cur->bc_mp; |
79 | crc = xfs_sb_version_hascrc(&mp->m_sb); | ||
67 | 80 | ||
68 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | 81 | if (crc) { |
69 | lblock_ok = lblock_ok && | 82 | lblock_ok = lblock_ok && |
70 | uuid_equal(&block->bb_u.l.bb_uuid, | 83 | uuid_equal(&block->bb_u.l.bb_uuid, |
71 | &mp->m_sb.sb_meta_uuid) && | 84 | &mp->m_sb.sb_meta_uuid) && |
@@ -74,7 +87,7 @@ xfs_btree_check_lblock( | |||
74 | } | 87 | } |
75 | 88 | ||
76 | lblock_ok = lblock_ok && | 89 | lblock_ok = lblock_ok && |
77 | be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) && | 90 | be32_to_cpu(block->bb_magic) == xfs_btree_magic(crc, btnum) && |
78 | be16_to_cpu(block->bb_level) == level && | 91 | be16_to_cpu(block->bb_level) == level && |
79 | be16_to_cpu(block->bb_numrecs) <= | 92 | be16_to_cpu(block->bb_numrecs) <= |
80 | cur->bc_ops->get_maxrecs(cur, level) && | 93 | cur->bc_ops->get_maxrecs(cur, level) && |
@@ -110,13 +123,16 @@ xfs_btree_check_sblock( | |||
110 | struct xfs_agf *agf; /* ag. freespace structure */ | 123 | struct xfs_agf *agf; /* ag. freespace structure */ |
111 | xfs_agblock_t agflen; /* native ag. freespace length */ | 124 | xfs_agblock_t agflen; /* native ag. freespace length */ |
112 | int sblock_ok = 1; /* block passes checks */ | 125 | int sblock_ok = 1; /* block passes checks */ |
126 | xfs_btnum_t btnum = cur->bc_btnum; | ||
127 | int crc; | ||
113 | 128 | ||
114 | mp = cur->bc_mp; | 129 | mp = cur->bc_mp; |
130 | crc = xfs_sb_version_hascrc(&mp->m_sb); | ||
115 | agbp = cur->bc_private.a.agbp; | 131 | agbp = cur->bc_private.a.agbp; |
116 | agf = XFS_BUF_TO_AGF(agbp); | 132 | agf = XFS_BUF_TO_AGF(agbp); |
117 | agflen = be32_to_cpu(agf->agf_length); | 133 | agflen = be32_to_cpu(agf->agf_length); |
118 | 134 | ||
119 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | 135 | if (crc) { |
120 | sblock_ok = sblock_ok && | 136 | sblock_ok = sblock_ok && |
121 | uuid_equal(&block->bb_u.s.bb_uuid, | 137 | uuid_equal(&block->bb_u.s.bb_uuid, |
122 | &mp->m_sb.sb_meta_uuid) && | 138 | &mp->m_sb.sb_meta_uuid) && |
@@ -125,7 +141,7 @@ xfs_btree_check_sblock( | |||
125 | } | 141 | } |
126 | 142 | ||
127 | sblock_ok = sblock_ok && | 143 | sblock_ok = sblock_ok && |
128 | be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) && | 144 | be32_to_cpu(block->bb_magic) == xfs_btree_magic(crc, btnum) && |
129 | be16_to_cpu(block->bb_level) == level && | 145 | be16_to_cpu(block->bb_level) == level && |
130 | be16_to_cpu(block->bb_numrecs) <= | 146 | be16_to_cpu(block->bb_numrecs) <= |
131 | cur->bc_ops->get_maxrecs(cur, level) && | 147 | cur->bc_ops->get_maxrecs(cur, level) && |
@@ -810,7 +826,8 @@ xfs_btree_read_bufl( | |||
810 | xfs_daddr_t d; /* real disk block address */ | 826 | xfs_daddr_t d; /* real disk block address */ |
811 | int error; | 827 | int error; |
812 | 828 | ||
813 | ASSERT(fsbno != NULLFSBLOCK); | 829 | if (!XFS_FSB_SANITY_CHECK(mp, fsbno)) |
830 | return -EFSCORRUPTED; | ||
814 | d = XFS_FSB_TO_DADDR(mp, fsbno); | 831 | d = XFS_FSB_TO_DADDR(mp, fsbno); |
815 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, | 832 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, |
816 | mp->m_bsize, lock, &bp, ops); | 833 | mp->m_bsize, lock, &bp, ops); |
@@ -1084,12 +1101,15 @@ xfs_btree_init_block_int( | |||
1084 | struct xfs_mount *mp, | 1101 | struct xfs_mount *mp, |
1085 | struct xfs_btree_block *buf, | 1102 | struct xfs_btree_block *buf, |
1086 | xfs_daddr_t blkno, | 1103 | xfs_daddr_t blkno, |
1087 | __u32 magic, | 1104 | xfs_btnum_t btnum, |
1088 | __u16 level, | 1105 | __u16 level, |
1089 | __u16 numrecs, | 1106 | __u16 numrecs, |
1090 | __u64 owner, | 1107 | __u64 owner, |
1091 | unsigned int flags) | 1108 | unsigned int flags) |
1092 | { | 1109 | { |
1110 | int crc = xfs_sb_version_hascrc(&mp->m_sb); | ||
1111 | __u32 magic = xfs_btree_magic(crc, btnum); | ||
1112 | |||
1093 | buf->bb_magic = cpu_to_be32(magic); | 1113 | buf->bb_magic = cpu_to_be32(magic); |
1094 | buf->bb_level = cpu_to_be16(level); | 1114 | buf->bb_level = cpu_to_be16(level); |
1095 | buf->bb_numrecs = cpu_to_be16(numrecs); | 1115 | buf->bb_numrecs = cpu_to_be16(numrecs); |
@@ -1097,7 +1117,7 @@ xfs_btree_init_block_int( | |||
1097 | if (flags & XFS_BTREE_LONG_PTRS) { | 1117 | if (flags & XFS_BTREE_LONG_PTRS) { |
1098 | buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK); | 1118 | buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK); |
1099 | buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK); | 1119 | buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK); |
1100 | if (flags & XFS_BTREE_CRC_BLOCKS) { | 1120 | if (crc) { |
1101 | buf->bb_u.l.bb_blkno = cpu_to_be64(blkno); | 1121 | buf->bb_u.l.bb_blkno = cpu_to_be64(blkno); |
1102 | buf->bb_u.l.bb_owner = cpu_to_be64(owner); | 1122 | buf->bb_u.l.bb_owner = cpu_to_be64(owner); |
1103 | uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid); | 1123 | uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid); |
@@ -1110,7 +1130,7 @@ xfs_btree_init_block_int( | |||
1110 | 1130 | ||
1111 | buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); | 1131 | buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); |
1112 | buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); | 1132 | buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); |
1113 | if (flags & XFS_BTREE_CRC_BLOCKS) { | 1133 | if (crc) { |
1114 | buf->bb_u.s.bb_blkno = cpu_to_be64(blkno); | 1134 | buf->bb_u.s.bb_blkno = cpu_to_be64(blkno); |
1115 | buf->bb_u.s.bb_owner = cpu_to_be32(__owner); | 1135 | buf->bb_u.s.bb_owner = cpu_to_be32(__owner); |
1116 | uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid); | 1136 | uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid); |
@@ -1123,14 +1143,14 @@ void | |||
1123 | xfs_btree_init_block( | 1143 | xfs_btree_init_block( |
1124 | struct xfs_mount *mp, | 1144 | struct xfs_mount *mp, |
1125 | struct xfs_buf *bp, | 1145 | struct xfs_buf *bp, |
1126 | __u32 magic, | 1146 | xfs_btnum_t btnum, |
1127 | __u16 level, | 1147 | __u16 level, |
1128 | __u16 numrecs, | 1148 | __u16 numrecs, |
1129 | __u64 owner, | 1149 | __u64 owner, |
1130 | unsigned int flags) | 1150 | unsigned int flags) |
1131 | { | 1151 | { |
1132 | xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, | 1152 | xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, |
1133 | magic, level, numrecs, owner, flags); | 1153 | btnum, level, numrecs, owner, flags); |
1134 | } | 1154 | } |
1135 | 1155 | ||
1136 | STATIC void | 1156 | STATIC void |
@@ -1140,7 +1160,7 @@ xfs_btree_init_block_cur( | |||
1140 | int level, | 1160 | int level, |
1141 | int numrecs) | 1161 | int numrecs) |
1142 | { | 1162 | { |
1143 | __u64 owner; | 1163 | __u64 owner; |
1144 | 1164 | ||
1145 | /* | 1165 | /* |
1146 | * we can pull the owner from the cursor right now as the different | 1166 | * we can pull the owner from the cursor right now as the different |
@@ -1154,7 +1174,7 @@ xfs_btree_init_block_cur( | |||
1154 | owner = cur->bc_private.a.agno; | 1174 | owner = cur->bc_private.a.agno; |
1155 | 1175 | ||
1156 | xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, | 1176 | xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, |
1157 | xfs_btree_magic(cur), level, numrecs, | 1177 | cur->bc_btnum, level, numrecs, |
1158 | owner, cur->bc_flags); | 1178 | owner, cur->bc_flags); |
1159 | } | 1179 | } |
1160 | 1180 | ||
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index b69b947c4c1b..4bb62580a7fd 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h | |||
@@ -76,6 +76,8 @@ union xfs_btree_rec { | |||
76 | #define XFS_BTNUM_RMAP ((xfs_btnum_t)XFS_BTNUM_RMAPi) | 76 | #define XFS_BTNUM_RMAP ((xfs_btnum_t)XFS_BTNUM_RMAPi) |
77 | #define XFS_BTNUM_REFC ((xfs_btnum_t)XFS_BTNUM_REFCi) | 77 | #define XFS_BTNUM_REFC ((xfs_btnum_t)XFS_BTNUM_REFCi) |
78 | 78 | ||
79 | __uint32_t xfs_btree_magic(int crc, xfs_btnum_t btnum); | ||
80 | |||
79 | /* | 81 | /* |
80 | * For logging record fields. | 82 | * For logging record fields. |
81 | */ | 83 | */ |
@@ -378,7 +380,7 @@ void | |||
378 | xfs_btree_init_block( | 380 | xfs_btree_init_block( |
379 | struct xfs_mount *mp, | 381 | struct xfs_mount *mp, |
380 | struct xfs_buf *bp, | 382 | struct xfs_buf *bp, |
381 | __u32 magic, | 383 | xfs_btnum_t btnum, |
382 | __u16 level, | 384 | __u16 level, |
383 | __u16 numrecs, | 385 | __u16 numrecs, |
384 | __u64 owner, | 386 | __u64 owner, |
@@ -389,7 +391,7 @@ xfs_btree_init_block_int( | |||
389 | struct xfs_mount *mp, | 391 | struct xfs_mount *mp, |
390 | struct xfs_btree_block *buf, | 392 | struct xfs_btree_block *buf, |
391 | xfs_daddr_t blkno, | 393 | xfs_daddr_t blkno, |
392 | __u32 magic, | 394 | xfs_btnum_t btnum, |
393 | __u16 level, | 395 | __u16 level, |
394 | __u16 numrecs, | 396 | __u16 numrecs, |
395 | __u64 owner, | 397 | __u64 owner, |
@@ -456,7 +458,7 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block) | |||
456 | #define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b)) | 458 | #define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b)) |
457 | 459 | ||
458 | #define XFS_FSB_SANITY_CHECK(mp,fsb) \ | 460 | #define XFS_FSB_SANITY_CHECK(mp,fsb) \ |
459 | (XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ | 461 | (fsb && XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ |
460 | XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks) | 462 | XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks) |
461 | 463 | ||
462 | /* | 464 | /* |
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index f2dc1a950c85..1bdf2888295b 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c | |||
@@ -2633,7 +2633,7 @@ out_free: | |||
2633 | /* | 2633 | /* |
2634 | * Readahead the dir/attr block. | 2634 | * Readahead the dir/attr block. |
2635 | */ | 2635 | */ |
2636 | xfs_daddr_t | 2636 | int |
2637 | xfs_da_reada_buf( | 2637 | xfs_da_reada_buf( |
2638 | struct xfs_inode *dp, | 2638 | struct xfs_inode *dp, |
2639 | xfs_dablk_t bno, | 2639 | xfs_dablk_t bno, |
@@ -2664,7 +2664,5 @@ out_free: | |||
2664 | if (mapp != &map) | 2664 | if (mapp != &map) |
2665 | kmem_free(mapp); | 2665 | kmem_free(mapp); |
2666 | 2666 | ||
2667 | if (error) | 2667 | return error; |
2668 | return -1; | ||
2669 | return mappedbno; | ||
2670 | } | 2668 | } |
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index 98c75cbe6ac2..4e29cb6a3627 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h | |||
@@ -201,7 +201,7 @@ int xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp, | |||
201 | xfs_dablk_t bno, xfs_daddr_t mappedbno, | 201 | xfs_dablk_t bno, xfs_daddr_t mappedbno, |
202 | struct xfs_buf **bpp, int whichfork, | 202 | struct xfs_buf **bpp, int whichfork, |
203 | const struct xfs_buf_ops *ops); | 203 | const struct xfs_buf_ops *ops); |
204 | xfs_daddr_t xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno, | 204 | int xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno, |
205 | xfs_daddr_t mapped_bno, int whichfork, | 205 | xfs_daddr_t mapped_bno, int whichfork, |
206 | const struct xfs_buf_ops *ops); | 206 | const struct xfs_buf_ops *ops); |
207 | int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, | 207 | int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, |
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 75a557432d0f..bbd1238852b3 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c | |||
@@ -155,6 +155,42 @@ const struct xfs_buf_ops xfs_dir3_free_buf_ops = { | |||
155 | .verify_write = xfs_dir3_free_write_verify, | 155 | .verify_write = xfs_dir3_free_write_verify, |
156 | }; | 156 | }; |
157 | 157 | ||
158 | /* Everything ok in the free block header? */ | ||
159 | static bool | ||
160 | xfs_dir3_free_header_check( | ||
161 | struct xfs_inode *dp, | ||
162 | xfs_dablk_t fbno, | ||
163 | struct xfs_buf *bp) | ||
164 | { | ||
165 | struct xfs_mount *mp = dp->i_mount; | ||
166 | unsigned int firstdb; | ||
167 | int maxbests; | ||
168 | |||
169 | maxbests = dp->d_ops->free_max_bests(mp->m_dir_geo); | ||
170 | firstdb = (xfs_dir2_da_to_db(mp->m_dir_geo, fbno) - | ||
171 | xfs_dir2_byte_to_db(mp->m_dir_geo, XFS_DIR2_FREE_OFFSET)) * | ||
172 | maxbests; | ||
173 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
174 | struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; | ||
175 | |||
176 | if (be32_to_cpu(hdr3->firstdb) != firstdb) | ||
177 | return false; | ||
178 | if (be32_to_cpu(hdr3->nvalid) > maxbests) | ||
179 | return false; | ||
180 | if (be32_to_cpu(hdr3->nvalid) < be32_to_cpu(hdr3->nused)) | ||
181 | return false; | ||
182 | } else { | ||
183 | struct xfs_dir2_free_hdr *hdr = bp->b_addr; | ||
184 | |||
185 | if (be32_to_cpu(hdr->firstdb) != firstdb) | ||
186 | return false; | ||
187 | if (be32_to_cpu(hdr->nvalid) > maxbests) | ||
188 | return false; | ||
189 | if (be32_to_cpu(hdr->nvalid) < be32_to_cpu(hdr->nused)) | ||
190 | return false; | ||
191 | } | ||
192 | return true; | ||
193 | } | ||
158 | 194 | ||
159 | static int | 195 | static int |
160 | __xfs_dir3_free_read( | 196 | __xfs_dir3_free_read( |
@@ -168,11 +204,22 @@ __xfs_dir3_free_read( | |||
168 | 204 | ||
169 | err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, | 205 | err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, |
170 | XFS_DATA_FORK, &xfs_dir3_free_buf_ops); | 206 | XFS_DATA_FORK, &xfs_dir3_free_buf_ops); |
207 | if (err || !*bpp) | ||
208 | return err; | ||
209 | |||
210 | /* Check things that we can't do in the verifier. */ | ||
211 | if (!xfs_dir3_free_header_check(dp, fbno, *bpp)) { | ||
212 | xfs_buf_ioerror(*bpp, -EFSCORRUPTED); | ||
213 | xfs_verifier_error(*bpp); | ||
214 | xfs_trans_brelse(tp, *bpp); | ||
215 | return -EFSCORRUPTED; | ||
216 | } | ||
171 | 217 | ||
172 | /* try read returns without an error or *bpp if it lands in a hole */ | 218 | /* try read returns without an error or *bpp if it lands in a hole */ |
173 | if (!err && tp && *bpp) | 219 | if (tp) |
174 | xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_FREE_BUF); | 220 | xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_FREE_BUF); |
175 | return err; | 221 | |
222 | return 0; | ||
176 | } | 223 | } |
177 | 224 | ||
178 | int | 225 | int |
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index f272abff11e1..d41ade5d293e 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c | |||
@@ -51,8 +51,7 @@ xfs_ialloc_cluster_alignment( | |||
51 | struct xfs_mount *mp) | 51 | struct xfs_mount *mp) |
52 | { | 52 | { |
53 | if (xfs_sb_version_hasalign(&mp->m_sb) && | 53 | if (xfs_sb_version_hasalign(&mp->m_sb) && |
54 | mp->m_sb.sb_inoalignmt >= | 54 | mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp)) |
55 | XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) | ||
56 | return mp->m_sb.sb_inoalignmt; | 55 | return mp->m_sb.sb_inoalignmt; |
57 | return 1; | 56 | return 1; |
58 | } | 57 | } |
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 222e103356c6..25c1e078aef6 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include "xfs_inode.h" | 26 | #include "xfs_inode.h" |
27 | #include "xfs_trans.h" | 27 | #include "xfs_trans.h" |
28 | #include "xfs_inode_item.h" | 28 | #include "xfs_inode_item.h" |
29 | #include "xfs_btree.h" | ||
29 | #include "xfs_bmap_btree.h" | 30 | #include "xfs_bmap_btree.h" |
30 | #include "xfs_bmap.h" | 31 | #include "xfs_bmap.h" |
31 | #include "xfs_error.h" | 32 | #include "xfs_error.h" |
@@ -429,11 +430,13 @@ xfs_iformat_btree( | |||
429 | /* REFERENCED */ | 430 | /* REFERENCED */ |
430 | int nrecs; | 431 | int nrecs; |
431 | int size; | 432 | int size; |
433 | int level; | ||
432 | 434 | ||
433 | ifp = XFS_IFORK_PTR(ip, whichfork); | 435 | ifp = XFS_IFORK_PTR(ip, whichfork); |
434 | dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); | 436 | dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); |
435 | size = XFS_BMAP_BROOT_SPACE(mp, dfp); | 437 | size = XFS_BMAP_BROOT_SPACE(mp, dfp); |
436 | nrecs = be16_to_cpu(dfp->bb_numrecs); | 438 | nrecs = be16_to_cpu(dfp->bb_numrecs); |
439 | level = be16_to_cpu(dfp->bb_level); | ||
437 | 440 | ||
438 | /* | 441 | /* |
439 | * blow out if -- fork has less extents than can fit in | 442 | * blow out if -- fork has less extents than can fit in |
@@ -446,7 +449,8 @@ xfs_iformat_btree( | |||
446 | XFS_IFORK_MAXEXT(ip, whichfork) || | 449 | XFS_IFORK_MAXEXT(ip, whichfork) || |
447 | XFS_BMDR_SPACE_CALC(nrecs) > | 450 | XFS_BMDR_SPACE_CALC(nrecs) > |
448 | XFS_DFORK_SIZE(dip, mp, whichfork) || | 451 | XFS_DFORK_SIZE(dip, mp, whichfork) || |
449 | XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { | 452 | XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) || |
453 | level == 0 || level > XFS_BTREE_MAXLEVELS) { | ||
450 | xfs_warn(mp, "corrupt inode %Lu (btree).", | 454 | xfs_warn(mp, "corrupt inode %Lu (btree).", |
451 | (unsigned long long) ip->i_ino); | 455 | (unsigned long long) ip->i_ino); |
452 | XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, | 456 | XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, |
@@ -497,15 +501,14 @@ xfs_iread_extents( | |||
497 | * We know that the size is valid (it's checked in iformat_btree) | 501 | * We know that the size is valid (it's checked in iformat_btree) |
498 | */ | 502 | */ |
499 | ifp->if_bytes = ifp->if_real_bytes = 0; | 503 | ifp->if_bytes = ifp->if_real_bytes = 0; |
500 | ifp->if_flags |= XFS_IFEXTENTS; | ||
501 | xfs_iext_add(ifp, 0, nextents); | 504 | xfs_iext_add(ifp, 0, nextents); |
502 | error = xfs_bmap_read_extents(tp, ip, whichfork); | 505 | error = xfs_bmap_read_extents(tp, ip, whichfork); |
503 | if (error) { | 506 | if (error) { |
504 | xfs_iext_destroy(ifp); | 507 | xfs_iext_destroy(ifp); |
505 | ifp->if_flags &= ~XFS_IFEXTENTS; | ||
506 | return error; | 508 | return error; |
507 | } | 509 | } |
508 | xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip)); | 510 | xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip)); |
511 | ifp->if_flags |= XFS_IFEXTENTS; | ||
509 | return 0; | 512 | return 0; |
510 | } | 513 | } |
511 | /* | 514 | /* |
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h index d9f65e2d5cc8..29a01ec89dd0 100644 --- a/fs/xfs/libxfs/xfs_log_recover.h +++ b/fs/xfs/libxfs/xfs_log_recover.h | |||
@@ -42,7 +42,6 @@ typedef struct xlog_recover_item { | |||
42 | xfs_log_iovec_t *ri_buf; /* ptr to regions buffer */ | 42 | xfs_log_iovec_t *ri_buf; /* ptr to regions buffer */ |
43 | } xlog_recover_item_t; | 43 | } xlog_recover_item_t; |
44 | 44 | ||
45 | struct xlog_tid; | ||
46 | typedef struct xlog_recover { | 45 | typedef struct xlog_recover { |
47 | struct hlist_node r_list; | 46 | struct hlist_node r_list; |
48 | xlog_tid_t r_log_tid; /* log's transaction id */ | 47 | xlog_tid_t r_log_tid; /* log's transaction id */ |
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 631e7c0e0a29..1ff9df7a3ce8 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -481,6 +481,12 @@ xfs_submit_ioend( | |||
481 | struct xfs_ioend *ioend, | 481 | struct xfs_ioend *ioend, |
482 | int status) | 482 | int status) |
483 | { | 483 | { |
484 | /* Convert CoW extents to regular */ | ||
485 | if (!status && ioend->io_type == XFS_IO_COW) { | ||
486 | status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), | ||
487 | ioend->io_offset, ioend->io_size); | ||
488 | } | ||
489 | |||
484 | /* Reserve log space if we might write beyond the on-disk inode size. */ | 490 | /* Reserve log space if we might write beyond the on-disk inode size. */ |
485 | if (!status && | 491 | if (!status && |
486 | ioend->io_type != XFS_IO_UNWRITTEN && | 492 | ioend->io_type != XFS_IO_UNWRITTEN && |
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index c1417919ab0a..8b75dcea5966 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c | |||
@@ -88,7 +88,6 @@ int | |||
88 | xfs_bmap_rtalloc( | 88 | xfs_bmap_rtalloc( |
89 | struct xfs_bmalloca *ap) /* bmap alloc argument struct */ | 89 | struct xfs_bmalloca *ap) /* bmap alloc argument struct */ |
90 | { | 90 | { |
91 | xfs_alloctype_t atype = 0; /* type for allocation routines */ | ||
92 | int error; /* error return value */ | 91 | int error; /* error return value */ |
93 | xfs_mount_t *mp; /* mount point structure */ | 92 | xfs_mount_t *mp; /* mount point structure */ |
94 | xfs_extlen_t prod = 0; /* product factor for allocators */ | 93 | xfs_extlen_t prod = 0; /* product factor for allocators */ |
@@ -155,18 +154,14 @@ xfs_bmap_rtalloc( | |||
155 | /* | 154 | /* |
156 | * Realtime allocation, done through xfs_rtallocate_extent. | 155 | * Realtime allocation, done through xfs_rtallocate_extent. |
157 | */ | 156 | */ |
158 | atype = ap->blkno == 0 ? XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO; | ||
159 | do_div(ap->blkno, mp->m_sb.sb_rextsize); | 157 | do_div(ap->blkno, mp->m_sb.sb_rextsize); |
160 | rtb = ap->blkno; | 158 | rtb = ap->blkno; |
161 | ap->length = ralen; | 159 | ap->length = ralen; |
162 | if ((error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length, | 160 | error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length, |
163 | &ralen, atype, ap->wasdel, prod, &rtb))) | 161 | &ralen, ap->wasdel, prod, &rtb); |
164 | return error; | 162 | if (error) |
165 | if (rtb == NULLFSBLOCK && prod > 1 && | ||
166 | (error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, | ||
167 | ap->length, &ralen, atype, | ||
168 | ap->wasdel, 1, &rtb))) | ||
169 | return error; | 163 | return error; |
164 | |||
170 | ap->blkno = rtb; | 165 | ap->blkno = rtb; |
171 | if (ap->blkno != NULLFSBLOCK) { | 166 | if (ap->blkno != NULLFSBLOCK) { |
172 | ap->blkno *= mp->m_sb.sb_rextsize; | 167 | ap->blkno *= mp->m_sb.sb_rextsize; |
@@ -787,11 +782,9 @@ xfs_getbmap( | |||
787 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 782 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); |
788 | 783 | ||
789 | for (i = 0; i < cur_ext; i++) { | 784 | for (i = 0; i < cur_ext; i++) { |
790 | int full = 0; /* user array is full */ | ||
791 | |||
792 | /* format results & advance arg */ | 785 | /* format results & advance arg */ |
793 | error = formatter(&arg, &out[i], &full); | 786 | error = formatter(&arg, &out[i]); |
794 | if (error || full) | 787 | if (error) |
795 | break; | 788 | break; |
796 | } | 789 | } |
797 | 790 | ||
@@ -917,17 +910,18 @@ xfs_can_free_eofblocks(struct xfs_inode *ip, bool force) | |||
917 | */ | 910 | */ |
918 | int | 911 | int |
919 | xfs_free_eofblocks( | 912 | xfs_free_eofblocks( |
920 | xfs_mount_t *mp, | 913 | struct xfs_inode *ip) |
921 | xfs_inode_t *ip, | ||
922 | bool need_iolock) | ||
923 | { | 914 | { |
924 | xfs_trans_t *tp; | 915 | struct xfs_trans *tp; |
925 | int error; | 916 | int error; |
926 | xfs_fileoff_t end_fsb; | 917 | xfs_fileoff_t end_fsb; |
927 | xfs_fileoff_t last_fsb; | 918 | xfs_fileoff_t last_fsb; |
928 | xfs_filblks_t map_len; | 919 | xfs_filblks_t map_len; |
929 | int nimaps; | 920 | int nimaps; |
930 | xfs_bmbt_irec_t imap; | 921 | struct xfs_bmbt_irec imap; |
922 | struct xfs_mount *mp = ip->i_mount; | ||
923 | |||
924 | ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); | ||
931 | 925 | ||
932 | /* | 926 | /* |
933 | * Figure out if there are any blocks beyond the end | 927 | * Figure out if there are any blocks beyond the end |
@@ -944,6 +938,10 @@ xfs_free_eofblocks( | |||
944 | error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0); | 938 | error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0); |
945 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 939 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
946 | 940 | ||
941 | /* | ||
942 | * If there are blocks after the end of file, truncate the file to its | ||
943 | * current size to free them up. | ||
944 | */ | ||
947 | if (!error && (nimaps != 0) && | 945 | if (!error && (nimaps != 0) && |
948 | (imap.br_startblock != HOLESTARTBLOCK || | 946 | (imap.br_startblock != HOLESTARTBLOCK || |
949 | ip->i_delayed_blks)) { | 947 | ip->i_delayed_blks)) { |
@@ -954,22 +952,13 @@ xfs_free_eofblocks( | |||
954 | if (error) | 952 | if (error) |
955 | return error; | 953 | return error; |
956 | 954 | ||
957 | /* | 955 | /* wait on dio to ensure i_size has settled */ |
958 | * There are blocks after the end of file. | 956 | inode_dio_wait(VFS_I(ip)); |
959 | * Free them up now by truncating the file to | ||
960 | * its current size. | ||
961 | */ | ||
962 | if (need_iolock) { | ||
963 | if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) | ||
964 | return -EAGAIN; | ||
965 | } | ||
966 | 957 | ||
967 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, | 958 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, |
968 | &tp); | 959 | &tp); |
969 | if (error) { | 960 | if (error) { |
970 | ASSERT(XFS_FORCED_SHUTDOWN(mp)); | 961 | ASSERT(XFS_FORCED_SHUTDOWN(mp)); |
971 | if (need_iolock) | ||
972 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
973 | return error; | 962 | return error; |
974 | } | 963 | } |
975 | 964 | ||
@@ -997,8 +986,6 @@ xfs_free_eofblocks( | |||
997 | } | 986 | } |
998 | 987 | ||
999 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 988 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
1000 | if (need_iolock) | ||
1001 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
1002 | } | 989 | } |
1003 | return error; | 990 | return error; |
1004 | } | 991 | } |
@@ -1393,10 +1380,16 @@ xfs_shift_file_space( | |||
1393 | xfs_fileoff_t stop_fsb; | 1380 | xfs_fileoff_t stop_fsb; |
1394 | xfs_fileoff_t next_fsb; | 1381 | xfs_fileoff_t next_fsb; |
1395 | xfs_fileoff_t shift_fsb; | 1382 | xfs_fileoff_t shift_fsb; |
1383 | uint resblks; | ||
1396 | 1384 | ||
1397 | ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT); | 1385 | ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT); |
1398 | 1386 | ||
1399 | if (direction == SHIFT_LEFT) { | 1387 | if (direction == SHIFT_LEFT) { |
1388 | /* | ||
1389 | * Reserve blocks to cover potential extent merges after left | ||
1390 | * shift operations. | ||
1391 | */ | ||
1392 | resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); | ||
1400 | next_fsb = XFS_B_TO_FSB(mp, offset + len); | 1393 | next_fsb = XFS_B_TO_FSB(mp, offset + len); |
1401 | stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size); | 1394 | stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size); |
1402 | } else { | 1395 | } else { |
@@ -1404,6 +1397,7 @@ xfs_shift_file_space( | |||
1404 | * If right shift, delegate the work of initialization of | 1397 | * If right shift, delegate the work of initialization of |
1405 | * next_fsb to xfs_bmap_shift_extent as it has ilock held. | 1398 | * next_fsb to xfs_bmap_shift_extent as it has ilock held. |
1406 | */ | 1399 | */ |
1400 | resblks = 0; | ||
1407 | next_fsb = NULLFSBLOCK; | 1401 | next_fsb = NULLFSBLOCK; |
1408 | stop_fsb = XFS_B_TO_FSB(mp, offset); | 1402 | stop_fsb = XFS_B_TO_FSB(mp, offset); |
1409 | } | 1403 | } |
@@ -1415,7 +1409,7 @@ xfs_shift_file_space( | |||
1415 | * into the accessible region of the file. | 1409 | * into the accessible region of the file. |
1416 | */ | 1410 | */ |
1417 | if (xfs_can_free_eofblocks(ip, true)) { | 1411 | if (xfs_can_free_eofblocks(ip, true)) { |
1418 | error = xfs_free_eofblocks(mp, ip, false); | 1412 | error = xfs_free_eofblocks(ip); |
1419 | if (error) | 1413 | if (error) |
1420 | return error; | 1414 | return error; |
1421 | } | 1415 | } |
@@ -1445,21 +1439,14 @@ xfs_shift_file_space( | |||
1445 | } | 1439 | } |
1446 | 1440 | ||
1447 | while (!error && !done) { | 1441 | while (!error && !done) { |
1448 | /* | 1442 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, |
1449 | * We would need to reserve permanent block for transaction. | 1443 | &tp); |
1450 | * This will come into picture when after shifting extent into | ||
1451 | * hole we found that adjacent extents can be merged which | ||
1452 | * may lead to freeing of a block during record update. | ||
1453 | */ | ||
1454 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, | ||
1455 | XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp); | ||
1456 | if (error) | 1444 | if (error) |
1457 | break; | 1445 | break; |
1458 | 1446 | ||
1459 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 1447 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
1460 | error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, | 1448 | error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, |
1461 | ip->i_gdquot, ip->i_pdquot, | 1449 | ip->i_gdquot, ip->i_pdquot, resblks, 0, |
1462 | XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, | ||
1463 | XFS_QMOPT_RES_REGBLKS); | 1450 | XFS_QMOPT_RES_REGBLKS); |
1464 | if (error) | 1451 | if (error) |
1465 | goto out_trans_cancel; | 1452 | goto out_trans_cancel; |
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 68a621a8e0c0..135d8267e284 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h | |||
@@ -35,7 +35,7 @@ int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, | |||
35 | xfs_fileoff_t start_fsb, xfs_fileoff_t length); | 35 | xfs_fileoff_t start_fsb, xfs_fileoff_t length); |
36 | 36 | ||
37 | /* bmap to userspace formatter - copy to user & advance pointer */ | 37 | /* bmap to userspace formatter - copy to user & advance pointer */ |
38 | typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *, int *); | 38 | typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *); |
39 | int xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv, | 39 | int xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv, |
40 | xfs_bmap_format_t formatter, void *arg); | 40 | xfs_bmap_format_t formatter, void *arg); |
41 | 41 | ||
@@ -63,8 +63,7 @@ int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset, | |||
63 | 63 | ||
64 | /* EOF block manipulation functions */ | 64 | /* EOF block manipulation functions */ |
65 | bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); | 65 | bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); |
66 | int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip, | 66 | int xfs_free_eofblocks(struct xfs_inode *ip); |
67 | bool need_iolock); | ||
68 | 67 | ||
69 | int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip, | 68 | int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip, |
70 | struct xfs_swapext *sx); | 69 | struct xfs_swapext *sx); |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 2975cb2319f4..0306168af332 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -1162,6 +1162,7 @@ xfs_buf_iodone_callbacks( | |||
1162 | */ | 1162 | */ |
1163 | bp->b_last_error = 0; | 1163 | bp->b_last_error = 0; |
1164 | bp->b_retries = 0; | 1164 | bp->b_retries = 0; |
1165 | bp->b_first_retry_time = 0; | ||
1165 | 1166 | ||
1166 | xfs_buf_do_callbacks(bp); | 1167 | xfs_buf_do_callbacks(bp); |
1167 | bp->b_fspriv = NULL; | 1168 | bp->b_fspriv = NULL; |
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 4ff499aa7338..d796ffac7296 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c | |||
@@ -208,32 +208,3 @@ xfs_ioc_trim( | |||
208 | return -EFAULT; | 208 | return -EFAULT; |
209 | return 0; | 209 | return 0; |
210 | } | 210 | } |
211 | |||
212 | int | ||
213 | xfs_discard_extents( | ||
214 | struct xfs_mount *mp, | ||
215 | struct list_head *list) | ||
216 | { | ||
217 | struct xfs_extent_busy *busyp; | ||
218 | int error = 0; | ||
219 | |||
220 | list_for_each_entry(busyp, list, list) { | ||
221 | trace_xfs_discard_extent(mp, busyp->agno, busyp->bno, | ||
222 | busyp->length); | ||
223 | |||
224 | error = blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, | ||
225 | XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno), | ||
226 | XFS_FSB_TO_BB(mp, busyp->length), | ||
227 | GFP_NOFS, 0); | ||
228 | if (error && error != -EOPNOTSUPP) { | ||
229 | xfs_info(mp, | ||
230 | "discard failed for extent [0x%llx,%u], error %d", | ||
231 | (unsigned long long)busyp->bno, | ||
232 | busyp->length, | ||
233 | error); | ||
234 | return error; | ||
235 | } | ||
236 | } | ||
237 | |||
238 | return 0; | ||
239 | } | ||
diff --git a/fs/xfs/xfs_discard.h b/fs/xfs/xfs_discard.h index 344879aea646..0f070f9e44e1 100644 --- a/fs/xfs/xfs_discard.h +++ b/fs/xfs/xfs_discard.h | |||
@@ -5,6 +5,5 @@ struct fstrim_range; | |||
5 | struct list_head; | 5 | struct list_head; |
6 | 6 | ||
7 | extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *); | 7 | extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *); |
8 | extern int xfs_discard_extents(struct xfs_mount *, struct list_head *); | ||
9 | 8 | ||
10 | #endif /* XFS_DISCARD_H */ | 9 | #endif /* XFS_DISCARD_H */ |
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index 162dc186cf04..77760dbf0242 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c | |||
@@ -45,18 +45,7 @@ xfs_extent_busy_insert( | |||
45 | struct rb_node **rbp; | 45 | struct rb_node **rbp; |
46 | struct rb_node *parent = NULL; | 46 | struct rb_node *parent = NULL; |
47 | 47 | ||
48 | new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_MAYFAIL); | 48 | new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_SLEEP); |
49 | if (!new) { | ||
50 | /* | ||
51 | * No Memory! Since it is now not possible to track the free | ||
52 | * block, make this a synchronous transaction to insure that | ||
53 | * the block is not reused before this transaction commits. | ||
54 | */ | ||
55 | trace_xfs_extent_busy_enomem(tp->t_mountp, agno, bno, len); | ||
56 | xfs_trans_set_sync(tp); | ||
57 | return; | ||
58 | } | ||
59 | |||
60 | new->agno = agno; | 49 | new->agno = agno; |
61 | new->bno = bno; | 50 | new->bno = bno; |
62 | new->length = len; | 51 | new->length = len; |
@@ -345,25 +334,31 @@ restart: | |||
345 | * subset of the extent that is not busy. If *rlen is smaller than | 334 | * subset of the extent that is not busy. If *rlen is smaller than |
346 | * args->minlen no suitable extent could be found, and the higher level | 335 | * args->minlen no suitable extent could be found, and the higher level |
347 | * code needs to force out the log and retry the allocation. | 336 | * code needs to force out the log and retry the allocation. |
337 | * | ||
338 | * Return the current busy generation for the AG if the extent is busy. This | ||
339 | * value can be used to wait for at least one of the currently busy extents | ||
340 | * to be cleared. Note that the busy list is not guaranteed to be empty after | ||
341 | * the gen is woken. The state of a specific extent must always be confirmed | ||
342 | * with another call to xfs_extent_busy_trim() before it can be used. | ||
348 | */ | 343 | */ |
349 | void | 344 | bool |
350 | xfs_extent_busy_trim( | 345 | xfs_extent_busy_trim( |
351 | struct xfs_alloc_arg *args, | 346 | struct xfs_alloc_arg *args, |
352 | xfs_agblock_t bno, | 347 | xfs_agblock_t *bno, |
353 | xfs_extlen_t len, | 348 | xfs_extlen_t *len, |
354 | xfs_agblock_t *rbno, | 349 | unsigned *busy_gen) |
355 | xfs_extlen_t *rlen) | ||
356 | { | 350 | { |
357 | xfs_agblock_t fbno; | 351 | xfs_agblock_t fbno; |
358 | xfs_extlen_t flen; | 352 | xfs_extlen_t flen; |
359 | struct rb_node *rbp; | 353 | struct rb_node *rbp; |
354 | bool ret = false; | ||
360 | 355 | ||
361 | ASSERT(len > 0); | 356 | ASSERT(*len > 0); |
362 | 357 | ||
363 | spin_lock(&args->pag->pagb_lock); | 358 | spin_lock(&args->pag->pagb_lock); |
364 | restart: | 359 | restart: |
365 | fbno = bno; | 360 | fbno = *bno; |
366 | flen = len; | 361 | flen = *len; |
367 | rbp = args->pag->pagb_tree.rb_node; | 362 | rbp = args->pag->pagb_tree.rb_node; |
368 | while (rbp && flen >= args->minlen) { | 363 | while (rbp && flen >= args->minlen) { |
369 | struct xfs_extent_busy *busyp = | 364 | struct xfs_extent_busy *busyp = |
@@ -515,24 +510,25 @@ restart: | |||
515 | 510 | ||
516 | flen = fend - fbno; | 511 | flen = fend - fbno; |
517 | } | 512 | } |
518 | spin_unlock(&args->pag->pagb_lock); | 513 | out: |
519 | 514 | ||
520 | if (fbno != bno || flen != len) { | 515 | if (fbno != *bno || flen != *len) { |
521 | trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len, | 516 | trace_xfs_extent_busy_trim(args->mp, args->agno, *bno, *len, |
522 | fbno, flen); | 517 | fbno, flen); |
518 | *bno = fbno; | ||
519 | *len = flen; | ||
520 | *busy_gen = args->pag->pagb_gen; | ||
521 | ret = true; | ||
523 | } | 522 | } |
524 | *rbno = fbno; | 523 | spin_unlock(&args->pag->pagb_lock); |
525 | *rlen = flen; | 524 | return ret; |
526 | return; | ||
527 | fail: | 525 | fail: |
528 | /* | 526 | /* |
529 | * Return a zero extent length as failure indications. All callers | 527 | * Return a zero extent length as failure indications. All callers |
530 | * re-check if the trimmed extent satisfies the minlen requirement. | 528 | * re-check if the trimmed extent satisfies the minlen requirement. |
531 | */ | 529 | */ |
532 | spin_unlock(&args->pag->pagb_lock); | 530 | flen = 0; |
533 | trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len, fbno, 0); | 531 | goto out; |
534 | *rbno = fbno; | ||
535 | *rlen = 0; | ||
536 | } | 532 | } |
537 | 533 | ||
538 | STATIC void | 534 | STATIC void |
@@ -551,6 +547,21 @@ xfs_extent_busy_clear_one( | |||
551 | kmem_free(busyp); | 547 | kmem_free(busyp); |
552 | } | 548 | } |
553 | 549 | ||
550 | static void | ||
551 | xfs_extent_busy_put_pag( | ||
552 | struct xfs_perag *pag, | ||
553 | bool wakeup) | ||
554 | __releases(pag->pagb_lock) | ||
555 | { | ||
556 | if (wakeup) { | ||
557 | pag->pagb_gen++; | ||
558 | wake_up_all(&pag->pagb_wait); | ||
559 | } | ||
560 | |||
561 | spin_unlock(&pag->pagb_lock); | ||
562 | xfs_perag_put(pag); | ||
563 | } | ||
564 | |||
554 | /* | 565 | /* |
555 | * Remove all extents on the passed in list from the busy extents tree. | 566 | * Remove all extents on the passed in list from the busy extents tree. |
556 | * If do_discard is set skip extents that need to be discarded, and mark | 567 | * If do_discard is set skip extents that need to be discarded, and mark |
@@ -565,27 +576,76 @@ xfs_extent_busy_clear( | |||
565 | struct xfs_extent_busy *busyp, *n; | 576 | struct xfs_extent_busy *busyp, *n; |
566 | struct xfs_perag *pag = NULL; | 577 | struct xfs_perag *pag = NULL; |
567 | xfs_agnumber_t agno = NULLAGNUMBER; | 578 | xfs_agnumber_t agno = NULLAGNUMBER; |
579 | bool wakeup = false; | ||
568 | 580 | ||
569 | list_for_each_entry_safe(busyp, n, list, list) { | 581 | list_for_each_entry_safe(busyp, n, list, list) { |
570 | if (busyp->agno != agno) { | 582 | if (busyp->agno != agno) { |
571 | if (pag) { | 583 | if (pag) |
572 | spin_unlock(&pag->pagb_lock); | 584 | xfs_extent_busy_put_pag(pag, wakeup); |
573 | xfs_perag_put(pag); | ||
574 | } | ||
575 | pag = xfs_perag_get(mp, busyp->agno); | ||
576 | spin_lock(&pag->pagb_lock); | ||
577 | agno = busyp->agno; | 585 | agno = busyp->agno; |
586 | pag = xfs_perag_get(mp, agno); | ||
587 | spin_lock(&pag->pagb_lock); | ||
588 | wakeup = false; | ||
578 | } | 589 | } |
579 | 590 | ||
580 | if (do_discard && busyp->length && | 591 | if (do_discard && busyp->length && |
581 | !(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD)) | 592 | !(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD)) { |
582 | busyp->flags = XFS_EXTENT_BUSY_DISCARDED; | 593 | busyp->flags = XFS_EXTENT_BUSY_DISCARDED; |
583 | else | 594 | } else { |
584 | xfs_extent_busy_clear_one(mp, pag, busyp); | 595 | xfs_extent_busy_clear_one(mp, pag, busyp); |
596 | wakeup = true; | ||
597 | } | ||
585 | } | 598 | } |
586 | 599 | ||
587 | if (pag) { | 600 | if (pag) |
588 | spin_unlock(&pag->pagb_lock); | 601 | xfs_extent_busy_put_pag(pag, wakeup); |
602 | } | ||
603 | |||
604 | /* | ||
605 | * Flush out all busy extents for this AG. | ||
606 | */ | ||
607 | void | ||
608 | xfs_extent_busy_flush( | ||
609 | struct xfs_mount *mp, | ||
610 | struct xfs_perag *pag, | ||
611 | unsigned busy_gen) | ||
612 | { | ||
613 | DEFINE_WAIT (wait); | ||
614 | int log_flushed = 0, error; | ||
615 | |||
616 | trace_xfs_log_force(mp, 0, _THIS_IP_); | ||
617 | error = _xfs_log_force(mp, XFS_LOG_SYNC, &log_flushed); | ||
618 | if (error) | ||
619 | return; | ||
620 | |||
621 | do { | ||
622 | prepare_to_wait(&pag->pagb_wait, &wait, TASK_KILLABLE); | ||
623 | if (busy_gen != READ_ONCE(pag->pagb_gen)) | ||
624 | break; | ||
625 | schedule(); | ||
626 | } while (1); | ||
627 | |||
628 | finish_wait(&pag->pagb_wait, &wait); | ||
629 | } | ||
630 | |||
631 | void | ||
632 | xfs_extent_busy_wait_all( | ||
633 | struct xfs_mount *mp) | ||
634 | { | ||
635 | DEFINE_WAIT (wait); | ||
636 | xfs_agnumber_t agno; | ||
637 | |||
638 | for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { | ||
639 | struct xfs_perag *pag = xfs_perag_get(mp, agno); | ||
640 | |||
641 | do { | ||
642 | prepare_to_wait(&pag->pagb_wait, &wait, TASK_KILLABLE); | ||
643 | if (RB_EMPTY_ROOT(&pag->pagb_tree)) | ||
644 | break; | ||
645 | schedule(); | ||
646 | } while (1); | ||
647 | finish_wait(&pag->pagb_wait, &wait); | ||
648 | |||
589 | xfs_perag_put(pag); | 649 | xfs_perag_put(pag); |
590 | } | 650 | } |
591 | } | 651 | } |
@@ -596,9 +656,17 @@ xfs_extent_busy_clear( | |||
596 | int | 656 | int |
597 | xfs_extent_busy_ag_cmp( | 657 | xfs_extent_busy_ag_cmp( |
598 | void *priv, | 658 | void *priv, |
599 | struct list_head *a, | 659 | struct list_head *l1, |
600 | struct list_head *b) | 660 | struct list_head *l2) |
601 | { | 661 | { |
602 | return container_of(a, struct xfs_extent_busy, list)->agno - | 662 | struct xfs_extent_busy *b1 = |
603 | container_of(b, struct xfs_extent_busy, list)->agno; | 663 | container_of(l1, struct xfs_extent_busy, list); |
664 | struct xfs_extent_busy *b2 = | ||
665 | container_of(l2, struct xfs_extent_busy, list); | ||
666 | s32 diff; | ||
667 | |||
668 | diff = b1->agno - b2->agno; | ||
669 | if (!diff) | ||
670 | diff = b1->bno - b2->bno; | ||
671 | return diff; | ||
604 | } | 672 | } |
diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h index bfff284d2dcc..60195ea1b84a 100644 --- a/fs/xfs/xfs_extent_busy.h +++ b/fs/xfs/xfs_extent_busy.h | |||
@@ -58,9 +58,16 @@ void | |||
58 | xfs_extent_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno, | 58 | xfs_extent_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno, |
59 | xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata); | 59 | xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata); |
60 | 60 | ||
61 | bool | ||
62 | xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t *bno, | ||
63 | xfs_extlen_t *len, unsigned *busy_gen); | ||
64 | |||
65 | void | ||
66 | xfs_extent_busy_flush(struct xfs_mount *mp, struct xfs_perag *pag, | ||
67 | unsigned busy_gen); | ||
68 | |||
61 | void | 69 | void |
62 | xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t bno, | 70 | xfs_extent_busy_wait_all(struct xfs_mount *mp); |
63 | xfs_extlen_t len, xfs_agblock_t *rbno, xfs_extlen_t *rlen); | ||
64 | 71 | ||
65 | int | 72 | int |
66 | xfs_extent_busy_ag_cmp(void *priv, struct list_head *a, struct list_head *b); | 73 | xfs_extent_busy_ag_cmp(void *priv, struct list_head *a, struct list_head *b); |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index bbb9eb6811b2..086440e79b86 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -527,6 +527,15 @@ xfs_file_dio_aio_write( | |||
527 | if ((iocb->ki_pos & mp->m_blockmask) || | 527 | if ((iocb->ki_pos & mp->m_blockmask) || |
528 | ((iocb->ki_pos + count) & mp->m_blockmask)) { | 528 | ((iocb->ki_pos + count) & mp->m_blockmask)) { |
529 | unaligned_io = 1; | 529 | unaligned_io = 1; |
530 | |||
531 | /* | ||
532 | * We can't properly handle unaligned direct I/O to reflink | ||
533 | * files yet, as we can't unshare a partial block. | ||
534 | */ | ||
535 | if (xfs_is_reflink_inode(ip)) { | ||
536 | trace_xfs_reflink_bounce_dio_write(ip, iocb->ki_pos, count); | ||
537 | return -EREMCHG; | ||
538 | } | ||
530 | iolock = XFS_IOLOCK_EXCL; | 539 | iolock = XFS_IOLOCK_EXCL; |
531 | } else { | 540 | } else { |
532 | iolock = XFS_IOLOCK_SHARED; | 541 | iolock = XFS_IOLOCK_SHARED; |
@@ -552,14 +561,6 @@ xfs_file_dio_aio_write( | |||
552 | } | 561 | } |
553 | 562 | ||
554 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos); | 563 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos); |
555 | |||
556 | /* If this is a block-aligned directio CoW, remap immediately. */ | ||
557 | if (xfs_is_reflink_inode(ip) && !unaligned_io) { | ||
558 | ret = xfs_reflink_allocate_cow_range(ip, iocb->ki_pos, count); | ||
559 | if (ret) | ||
560 | goto out; | ||
561 | } | ||
562 | |||
563 | ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io); | 564 | ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io); |
564 | out: | 565 | out: |
565 | xfs_iunlock(ip, iolock); | 566 | xfs_iunlock(ip, iolock); |
@@ -614,8 +615,10 @@ xfs_file_buffered_aio_write( | |||
614 | struct xfs_inode *ip = XFS_I(inode); | 615 | struct xfs_inode *ip = XFS_I(inode); |
615 | ssize_t ret; | 616 | ssize_t ret; |
616 | int enospc = 0; | 617 | int enospc = 0; |
617 | int iolock = XFS_IOLOCK_EXCL; | 618 | int iolock; |
618 | 619 | ||
620 | write_retry: | ||
621 | iolock = XFS_IOLOCK_EXCL; | ||
619 | xfs_ilock(ip, iolock); | 622 | xfs_ilock(ip, iolock); |
620 | 623 | ||
621 | ret = xfs_file_aio_write_checks(iocb, from, &iolock); | 624 | ret = xfs_file_aio_write_checks(iocb, from, &iolock); |
@@ -625,7 +628,6 @@ xfs_file_buffered_aio_write( | |||
625 | /* We can write back this queue in page reclaim */ | 628 | /* We can write back this queue in page reclaim */ |
626 | current->backing_dev_info = inode_to_bdi(inode); | 629 | current->backing_dev_info = inode_to_bdi(inode); |
627 | 630 | ||
628 | write_retry: | ||
629 | trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos); | 631 | trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos); |
630 | ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops); | 632 | ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops); |
631 | if (likely(ret >= 0)) | 633 | if (likely(ret >= 0)) |
@@ -641,18 +643,21 @@ write_retry: | |||
641 | * running at the same time. | 643 | * running at the same time. |
642 | */ | 644 | */ |
643 | if (ret == -EDQUOT && !enospc) { | 645 | if (ret == -EDQUOT && !enospc) { |
646 | xfs_iunlock(ip, iolock); | ||
644 | enospc = xfs_inode_free_quota_eofblocks(ip); | 647 | enospc = xfs_inode_free_quota_eofblocks(ip); |
645 | if (enospc) | 648 | if (enospc) |
646 | goto write_retry; | 649 | goto write_retry; |
647 | enospc = xfs_inode_free_quota_cowblocks(ip); | 650 | enospc = xfs_inode_free_quota_cowblocks(ip); |
648 | if (enospc) | 651 | if (enospc) |
649 | goto write_retry; | 652 | goto write_retry; |
653 | iolock = 0; | ||
650 | } else if (ret == -ENOSPC && !enospc) { | 654 | } else if (ret == -ENOSPC && !enospc) { |
651 | struct xfs_eofblocks eofb = {0}; | 655 | struct xfs_eofblocks eofb = {0}; |
652 | 656 | ||
653 | enospc = 1; | 657 | enospc = 1; |
654 | xfs_flush_inodes(ip->i_mount); | 658 | xfs_flush_inodes(ip->i_mount); |
655 | eofb.eof_scan_owner = ip->i_ino; /* for locking */ | 659 | |
660 | xfs_iunlock(ip, iolock); | ||
656 | eofb.eof_flags = XFS_EOF_FLAGS_SYNC; | 661 | eofb.eof_flags = XFS_EOF_FLAGS_SYNC; |
657 | xfs_icache_free_eofblocks(ip->i_mount, &eofb); | 662 | xfs_icache_free_eofblocks(ip->i_mount, &eofb); |
658 | goto write_retry; | 663 | goto write_retry; |
@@ -660,7 +665,8 @@ write_retry: | |||
660 | 665 | ||
661 | current->backing_dev_info = NULL; | 666 | current->backing_dev_info = NULL; |
662 | out: | 667 | out: |
663 | xfs_iunlock(ip, iolock); | 668 | if (iolock) |
669 | xfs_iunlock(ip, iolock); | ||
664 | return ret; | 670 | return ret; |
665 | } | 671 | } |
666 | 672 | ||
@@ -908,9 +914,9 @@ xfs_dir_open( | |||
908 | */ | 914 | */ |
909 | mode = xfs_ilock_data_map_shared(ip); | 915 | mode = xfs_ilock_data_map_shared(ip); |
910 | if (ip->i_d.di_nextents > 0) | 916 | if (ip->i_d.di_nextents > 0) |
911 | xfs_dir3_data_readahead(ip, 0, -1); | 917 | error = xfs_dir3_data_readahead(ip, 0, -1); |
912 | xfs_iunlock(ip, mode); | 918 | xfs_iunlock(ip, mode); |
913 | return 0; | 919 | return error; |
914 | } | 920 | } |
915 | 921 | ||
916 | STATIC int | 922 | STATIC int |
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 242e8091296d..6ccaae9eb0ee 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
@@ -352,12 +352,7 @@ xfs_growfs_data_private( | |||
352 | goto error0; | 352 | goto error0; |
353 | } | 353 | } |
354 | 354 | ||
355 | if (xfs_sb_version_hascrc(&mp->m_sb)) | 355 | xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, agno, 0); |
356 | xfs_btree_init_block(mp, bp, XFS_ABTB_CRC_MAGIC, 0, 1, | ||
357 | agno, XFS_BTREE_CRC_BLOCKS); | ||
358 | else | ||
359 | xfs_btree_init_block(mp, bp, XFS_ABTB_MAGIC, 0, 1, | ||
360 | agno, 0); | ||
361 | 356 | ||
362 | arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); | 357 | arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); |
363 | arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); | 358 | arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); |
@@ -381,12 +376,7 @@ xfs_growfs_data_private( | |||
381 | goto error0; | 376 | goto error0; |
382 | } | 377 | } |
383 | 378 | ||
384 | if (xfs_sb_version_hascrc(&mp->m_sb)) | 379 | xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, agno, 0); |
385 | xfs_btree_init_block(mp, bp, XFS_ABTC_CRC_MAGIC, 0, 1, | ||
386 | agno, XFS_BTREE_CRC_BLOCKS); | ||
387 | else | ||
388 | xfs_btree_init_block(mp, bp, XFS_ABTC_MAGIC, 0, 1, | ||
389 | agno, 0); | ||
390 | 380 | ||
391 | arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); | 381 | arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); |
392 | arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); | 382 | arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); |
@@ -413,8 +403,8 @@ xfs_growfs_data_private( | |||
413 | goto error0; | 403 | goto error0; |
414 | } | 404 | } |
415 | 405 | ||
416 | xfs_btree_init_block(mp, bp, XFS_RMAP_CRC_MAGIC, 0, 0, | 406 | xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 0, |
417 | agno, XFS_BTREE_CRC_BLOCKS); | 407 | agno, 0); |
418 | block = XFS_BUF_TO_BLOCK(bp); | 408 | block = XFS_BUF_TO_BLOCK(bp); |
419 | 409 | ||
420 | 410 | ||
@@ -488,12 +478,7 @@ xfs_growfs_data_private( | |||
488 | goto error0; | 478 | goto error0; |
489 | } | 479 | } |
490 | 480 | ||
491 | if (xfs_sb_version_hascrc(&mp->m_sb)) | 481 | xfs_btree_init_block(mp, bp, XFS_BTNUM_INO , 0, 0, agno, 0); |
492 | xfs_btree_init_block(mp, bp, XFS_IBT_CRC_MAGIC, 0, 0, | ||
493 | agno, XFS_BTREE_CRC_BLOCKS); | ||
494 | else | ||
495 | xfs_btree_init_block(mp, bp, XFS_IBT_MAGIC, 0, 0, | ||
496 | agno, 0); | ||
497 | 482 | ||
498 | error = xfs_bwrite(bp); | 483 | error = xfs_bwrite(bp); |
499 | xfs_buf_relse(bp); | 484 | xfs_buf_relse(bp); |
@@ -513,13 +498,8 @@ xfs_growfs_data_private( | |||
513 | goto error0; | 498 | goto error0; |
514 | } | 499 | } |
515 | 500 | ||
516 | if (xfs_sb_version_hascrc(&mp->m_sb)) | 501 | xfs_btree_init_block(mp, bp, XFS_BTNUM_FINO, |
517 | xfs_btree_init_block(mp, bp, XFS_FIBT_CRC_MAGIC, | 502 | 0, 0, agno, 0); |
518 | 0, 0, agno, | ||
519 | XFS_BTREE_CRC_BLOCKS); | ||
520 | else | ||
521 | xfs_btree_init_block(mp, bp, XFS_FIBT_MAGIC, 0, | ||
522 | 0, agno, 0); | ||
523 | 503 | ||
524 | error = xfs_bwrite(bp); | 504 | error = xfs_bwrite(bp); |
525 | xfs_buf_relse(bp); | 505 | xfs_buf_relse(bp); |
@@ -540,9 +520,8 @@ xfs_growfs_data_private( | |||
540 | goto error0; | 520 | goto error0; |
541 | } | 521 | } |
542 | 522 | ||
543 | xfs_btree_init_block(mp, bp, XFS_REFC_CRC_MAGIC, | 523 | xfs_btree_init_block(mp, bp, XFS_BTNUM_REFC, |
544 | 0, 0, agno, | 524 | 0, 0, agno, 0); |
545 | XFS_BTREE_CRC_BLOCKS); | ||
546 | 525 | ||
547 | error = xfs_bwrite(bp); | 526 | error = xfs_bwrite(bp); |
548 | xfs_buf_relse(bp); | 527 | xfs_buf_relse(bp); |
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 70ca4f608321..7234b9748c36 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c | |||
@@ -1322,13 +1322,10 @@ xfs_inode_free_eofblocks( | |||
1322 | int flags, | 1322 | int flags, |
1323 | void *args) | 1323 | void *args) |
1324 | { | 1324 | { |
1325 | int ret; | 1325 | int ret = 0; |
1326 | struct xfs_eofblocks *eofb = args; | 1326 | struct xfs_eofblocks *eofb = args; |
1327 | bool need_iolock = true; | ||
1328 | int match; | 1327 | int match; |
1329 | 1328 | ||
1330 | ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0)); | ||
1331 | |||
1332 | if (!xfs_can_free_eofblocks(ip, false)) { | 1329 | if (!xfs_can_free_eofblocks(ip, false)) { |
1333 | /* inode could be preallocated or append-only */ | 1330 | /* inode could be preallocated or append-only */ |
1334 | trace_xfs_inode_free_eofblocks_invalid(ip); | 1331 | trace_xfs_inode_free_eofblocks_invalid(ip); |
@@ -1356,21 +1353,19 @@ xfs_inode_free_eofblocks( | |||
1356 | if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && | 1353 | if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && |
1357 | XFS_ISIZE(ip) < eofb->eof_min_file_size) | 1354 | XFS_ISIZE(ip) < eofb->eof_min_file_size) |
1358 | return 0; | 1355 | return 0; |
1359 | |||
1360 | /* | ||
1361 | * A scan owner implies we already hold the iolock. Skip it in | ||
1362 | * xfs_free_eofblocks() to avoid deadlock. This also eliminates | ||
1363 | * the possibility of EAGAIN being returned. | ||
1364 | */ | ||
1365 | if (eofb->eof_scan_owner == ip->i_ino) | ||
1366 | need_iolock = false; | ||
1367 | } | 1356 | } |
1368 | 1357 | ||
1369 | ret = xfs_free_eofblocks(ip->i_mount, ip, need_iolock); | 1358 | /* |
1370 | 1359 | * If the caller is waiting, return -EAGAIN to keep the background | |
1371 | /* don't revisit the inode if we're not waiting */ | 1360 | * scanner moving and revisit the inode in a subsequent pass. |
1372 | if (ret == -EAGAIN && !(flags & SYNC_WAIT)) | 1361 | */ |
1373 | ret = 0; | 1362 | if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { |
1363 | if (flags & SYNC_WAIT) | ||
1364 | ret = -EAGAIN; | ||
1365 | return ret; | ||
1366 | } | ||
1367 | ret = xfs_free_eofblocks(ip); | ||
1368 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
1374 | 1369 | ||
1375 | return ret; | 1370 | return ret; |
1376 | } | 1371 | } |
@@ -1417,15 +1412,10 @@ __xfs_inode_free_quota_eofblocks( | |||
1417 | struct xfs_eofblocks eofb = {0}; | 1412 | struct xfs_eofblocks eofb = {0}; |
1418 | struct xfs_dquot *dq; | 1413 | struct xfs_dquot *dq; |
1419 | 1414 | ||
1420 | ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); | ||
1421 | |||
1422 | /* | 1415 | /* |
1423 | * Set the scan owner to avoid a potential livelock. Otherwise, the scan | 1416 | * Run a sync scan to increase effectiveness and use the union filter to |
1424 | * can repeatedly trylock on the inode we're currently processing. We | ||
1425 | * run a sync scan to increase effectiveness and use the union filter to | ||
1426 | * cover all applicable quotas in a single scan. | 1417 | * cover all applicable quotas in a single scan. |
1427 | */ | 1418 | */ |
1428 | eofb.eof_scan_owner = ip->i_ino; | ||
1429 | eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC; | 1419 | eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC; |
1430 | 1420 | ||
1431 | if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) { | 1421 | if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) { |
@@ -1577,12 +1567,9 @@ xfs_inode_free_cowblocks( | |||
1577 | { | 1567 | { |
1578 | int ret; | 1568 | int ret; |
1579 | struct xfs_eofblocks *eofb = args; | 1569 | struct xfs_eofblocks *eofb = args; |
1580 | bool need_iolock = true; | ||
1581 | int match; | 1570 | int match; |
1582 | struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); | 1571 | struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); |
1583 | 1572 | ||
1584 | ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0)); | ||
1585 | |||
1586 | /* | 1573 | /* |
1587 | * Just clear the tag if we have an empty cow fork or none at all. It's | 1574 | * Just clear the tag if we have an empty cow fork or none at all. It's |
1588 | * possible the inode was fully unshared since it was originally tagged. | 1575 | * possible the inode was fully unshared since it was originally tagged. |
@@ -1615,28 +1602,16 @@ xfs_inode_free_cowblocks( | |||
1615 | if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && | 1602 | if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && |
1616 | XFS_ISIZE(ip) < eofb->eof_min_file_size) | 1603 | XFS_ISIZE(ip) < eofb->eof_min_file_size) |
1617 | return 0; | 1604 | return 0; |
1618 | |||
1619 | /* | ||
1620 | * A scan owner implies we already hold the iolock. Skip it in | ||
1621 | * xfs_free_eofblocks() to avoid deadlock. This also eliminates | ||
1622 | * the possibility of EAGAIN being returned. | ||
1623 | */ | ||
1624 | if (eofb->eof_scan_owner == ip->i_ino) | ||
1625 | need_iolock = false; | ||
1626 | } | 1605 | } |
1627 | 1606 | ||
1628 | /* Free the CoW blocks */ | 1607 | /* Free the CoW blocks */ |
1629 | if (need_iolock) { | 1608 | xfs_ilock(ip, XFS_IOLOCK_EXCL); |
1630 | xfs_ilock(ip, XFS_IOLOCK_EXCL); | 1609 | xfs_ilock(ip, XFS_MMAPLOCK_EXCL); |
1631 | xfs_ilock(ip, XFS_MMAPLOCK_EXCL); | ||
1632 | } | ||
1633 | 1610 | ||
1634 | ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF); | 1611 | ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF); |
1635 | 1612 | ||
1636 | if (need_iolock) { | 1613 | xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); |
1637 | xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); | 1614 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
1638 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
1639 | } | ||
1640 | 1615 | ||
1641 | return ret; | 1616 | return ret; |
1642 | } | 1617 | } |
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index a1e02f4708ab..8a7c849b4dea 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h | |||
@@ -27,7 +27,6 @@ struct xfs_eofblocks { | |||
27 | kgid_t eof_gid; | 27 | kgid_t eof_gid; |
28 | prid_t eof_prid; | 28 | prid_t eof_prid; |
29 | __u64 eof_min_file_size; | 29 | __u64 eof_min_file_size; |
30 | xfs_ino_t eof_scan_owner; | ||
31 | }; | 30 | }; |
32 | 31 | ||
33 | #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ | 32 | #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ |
@@ -102,7 +101,6 @@ xfs_fs_eofblocks_from_user( | |||
102 | dst->eof_flags = src->eof_flags; | 101 | dst->eof_flags = src->eof_flags; |
103 | dst->eof_prid = src->eof_prid; | 102 | dst->eof_prid = src->eof_prid; |
104 | dst->eof_min_file_size = src->eof_min_file_size; | 103 | dst->eof_min_file_size = src->eof_min_file_size; |
105 | dst->eof_scan_owner = NULLFSINO; | ||
106 | 104 | ||
107 | dst->eof_uid = INVALID_UID; | 105 | dst->eof_uid = INVALID_UID; |
108 | if (src->eof_flags & XFS_EOF_FLAGS_UID) { | 106 | if (src->eof_flags & XFS_EOF_FLAGS_UID) { |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index de32f0fe47c8..edfa6a55b064 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -1692,32 +1692,34 @@ xfs_release( | |||
1692 | if (xfs_can_free_eofblocks(ip, false)) { | 1692 | if (xfs_can_free_eofblocks(ip, false)) { |
1693 | 1693 | ||
1694 | /* | 1694 | /* |
1695 | * Check if the inode is being opened, written and closed | ||
1696 | * frequently and we have delayed allocation blocks outstanding | ||
1697 | * (e.g. streaming writes from the NFS server), truncating the | ||
1698 | * blocks past EOF will cause fragmentation to occur. | ||
1699 | * | ||
1700 | * In this case don't do the truncation, but we have to be | ||
1701 | * careful how we detect this case. Blocks beyond EOF show up as | ||
1702 | * i_delayed_blks even when the inode is clean, so we need to | ||
1703 | * truncate them away first before checking for a dirty release. | ||
1704 | * Hence on the first dirty close we will still remove the | ||
1705 | * speculative allocation, but after that we will leave it in | ||
1706 | * place. | ||
1707 | */ | ||
1708 | if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) | ||
1709 | return 0; | ||
1710 | /* | ||
1695 | * If we can't get the iolock just skip truncating the blocks | 1711 | * If we can't get the iolock just skip truncating the blocks |
1696 | * past EOF because we could deadlock with the mmap_sem | 1712 | * past EOF because we could deadlock with the mmap_sem |
1697 | * otherwise. We'll get another chance to drop them once the | 1713 | * otherwise. We'll get another chance to drop them once the |
1698 | * last reference to the inode is dropped, so we'll never leak | 1714 | * last reference to the inode is dropped, so we'll never leak |
1699 | * blocks permanently. | 1715 | * blocks permanently. |
1700 | * | ||
1701 | * Further, check if the inode is being opened, written and | ||
1702 | * closed frequently and we have delayed allocation blocks | ||
1703 | * outstanding (e.g. streaming writes from the NFS server), | ||
1704 | * truncating the blocks past EOF will cause fragmentation to | ||
1705 | * occur. | ||
1706 | * | ||
1707 | * In this case don't do the truncation, either, but we have to | ||
1708 | * be careful how we detect this case. Blocks beyond EOF show | ||
1709 | * up as i_delayed_blks even when the inode is clean, so we | ||
1710 | * need to truncate them away first before checking for a dirty | ||
1711 | * release. Hence on the first dirty close we will still remove | ||
1712 | * the speculative allocation, but after that we will leave it | ||
1713 | * in place. | ||
1714 | */ | 1716 | */ |
1715 | if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) | 1717 | if (xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { |
1716 | return 0; | 1718 | error = xfs_free_eofblocks(ip); |
1717 | 1719 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | |
1718 | error = xfs_free_eofblocks(mp, ip, true); | 1720 | if (error) |
1719 | if (error && error != -EAGAIN) | 1721 | return error; |
1720 | return error; | 1722 | } |
1721 | 1723 | ||
1722 | /* delalloc blocks after truncation means it really is dirty */ | 1724 | /* delalloc blocks after truncation means it really is dirty */ |
1723 | if (ip->i_delayed_blks) | 1725 | if (ip->i_delayed_blks) |
@@ -1904,8 +1906,11 @@ xfs_inactive( | |||
1904 | * cache. Post-eof blocks must be freed, lest we end up with | 1906 | * cache. Post-eof blocks must be freed, lest we end up with |
1905 | * broken free space accounting. | 1907 | * broken free space accounting. |
1906 | */ | 1908 | */ |
1907 | if (xfs_can_free_eofblocks(ip, true)) | 1909 | if (xfs_can_free_eofblocks(ip, true)) { |
1908 | xfs_free_eofblocks(mp, ip, false); | 1910 | xfs_ilock(ip, XFS_IOLOCK_EXCL); |
1911 | xfs_free_eofblocks(ip); | ||
1912 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
1913 | } | ||
1909 | 1914 | ||
1910 | return; | 1915 | return; |
1911 | } | 1916 | } |
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index c67cfb451fd3..cf1363dbf32b 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c | |||
@@ -1524,7 +1524,7 @@ out_drop_write: | |||
1524 | } | 1524 | } |
1525 | 1525 | ||
1526 | STATIC int | 1526 | STATIC int |
1527 | xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full) | 1527 | xfs_getbmap_format(void **ap, struct getbmapx *bmv) |
1528 | { | 1528 | { |
1529 | struct getbmap __user *base = (struct getbmap __user *)*ap; | 1529 | struct getbmap __user *base = (struct getbmap __user *)*ap; |
1530 | 1530 | ||
@@ -1567,7 +1567,7 @@ xfs_ioc_getbmap( | |||
1567 | } | 1567 | } |
1568 | 1568 | ||
1569 | STATIC int | 1569 | STATIC int |
1570 | xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full) | 1570 | xfs_getbmapx_format(void **ap, struct getbmapx *bmv) |
1571 | { | 1571 | { |
1572 | struct getbmapx __user *base = (struct getbmapx __user *)*ap; | 1572 | struct getbmapx __user *base = (struct getbmapx __user *)*ap; |
1573 | 1573 | ||
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 1aa3abd67b36..41662fb14e87 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -162,7 +162,7 @@ xfs_iomap_write_direct( | |||
162 | xfs_fileoff_t last_fsb; | 162 | xfs_fileoff_t last_fsb; |
163 | xfs_filblks_t count_fsb, resaligned; | 163 | xfs_filblks_t count_fsb, resaligned; |
164 | xfs_fsblock_t firstfsb; | 164 | xfs_fsblock_t firstfsb; |
165 | xfs_extlen_t extsz, temp; | 165 | xfs_extlen_t extsz; |
166 | int nimaps; | 166 | int nimaps; |
167 | int quota_flag; | 167 | int quota_flag; |
168 | int rt; | 168 | int rt; |
@@ -203,14 +203,7 @@ xfs_iomap_write_direct( | |||
203 | } | 203 | } |
204 | count_fsb = last_fsb - offset_fsb; | 204 | count_fsb = last_fsb - offset_fsb; |
205 | ASSERT(count_fsb > 0); | 205 | ASSERT(count_fsb > 0); |
206 | 206 | resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb, extsz); | |
207 | resaligned = count_fsb; | ||
208 | if (unlikely(extsz)) { | ||
209 | if ((temp = do_mod(offset_fsb, extsz))) | ||
210 | resaligned += temp; | ||
211 | if ((temp = do_mod(resaligned, extsz))) | ||
212 | resaligned += extsz - temp; | ||
213 | } | ||
214 | 207 | ||
215 | if (unlikely(rt)) { | 208 | if (unlikely(rt)) { |
216 | resrtextents = qblocks = resaligned; | 209 | resrtextents = qblocks = resaligned; |
@@ -685,7 +678,7 @@ xfs_iomap_write_allocate( | |||
685 | int nres; | 678 | int nres; |
686 | 679 | ||
687 | if (whichfork == XFS_COW_FORK) | 680 | if (whichfork == XFS_COW_FORK) |
688 | flags |= XFS_BMAPI_COWFORK; | 681 | flags |= XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC; |
689 | 682 | ||
690 | /* | 683 | /* |
691 | * Make sure that the dquots are there. | 684 | * Make sure that the dquots are there. |
@@ -1002,47 +995,31 @@ xfs_file_iomap_begin( | |||
1002 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | 995 | offset_fsb = XFS_B_TO_FSBT(mp, offset); |
1003 | end_fsb = XFS_B_TO_FSB(mp, offset + length); | 996 | end_fsb = XFS_B_TO_FSB(mp, offset + length); |
1004 | 997 | ||
1005 | if (xfs_is_reflink_inode(ip) && | ||
1006 | (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT)) { | ||
1007 | shared = xfs_reflink_find_cow_mapping(ip, offset, &imap); | ||
1008 | if (shared) { | ||
1009 | xfs_iunlock(ip, lockmode); | ||
1010 | goto alloc_done; | ||
1011 | } | ||
1012 | ASSERT(!isnullstartblock(imap.br_startblock)); | ||
1013 | } | ||
1014 | |||
1015 | error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, | 998 | error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, |
1016 | &nimaps, 0); | 999 | &nimaps, 0); |
1017 | if (error) | 1000 | if (error) |
1018 | goto out_unlock; | 1001 | goto out_unlock; |
1019 | 1002 | ||
1020 | if ((flags & IOMAP_REPORT) || | 1003 | if (flags & IOMAP_REPORT) { |
1021 | (xfs_is_reflink_inode(ip) && | ||
1022 | (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT))) { | ||
1023 | /* Trim the mapping to the nearest shared extent boundary. */ | 1004 | /* Trim the mapping to the nearest shared extent boundary. */ |
1024 | error = xfs_reflink_trim_around_shared(ip, &imap, &shared, | 1005 | error = xfs_reflink_trim_around_shared(ip, &imap, &shared, |
1025 | &trimmed); | 1006 | &trimmed); |
1026 | if (error) | 1007 | if (error) |
1027 | goto out_unlock; | 1008 | goto out_unlock; |
1028 | |||
1029 | /* | ||
1030 | * We're here because we're trying to do a directio write to a | ||
1031 | * region that isn't aligned to a filesystem block. If the | ||
1032 | * extent is shared, fall back to buffered mode to handle the | ||
1033 | * RMW. | ||
1034 | */ | ||
1035 | if (!(flags & IOMAP_REPORT) && shared) { | ||
1036 | trace_xfs_reflink_bounce_dio_write(ip, &imap); | ||
1037 | error = -EREMCHG; | ||
1038 | goto out_unlock; | ||
1039 | } | ||
1040 | } | 1009 | } |
1041 | 1010 | ||
1042 | if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { | 1011 | if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { |
1043 | error = xfs_reflink_reserve_cow(ip, &imap, &shared); | 1012 | if (flags & IOMAP_DIRECT) { |
1044 | if (error) | 1013 | /* may drop and re-acquire the ilock */ |
1045 | goto out_unlock; | 1014 | error = xfs_reflink_allocate_cow(ip, &imap, &shared, |
1015 | &lockmode); | ||
1016 | if (error) | ||
1017 | goto out_unlock; | ||
1018 | } else { | ||
1019 | error = xfs_reflink_reserve_cow(ip, &imap, &shared); | ||
1020 | if (error) | ||
1021 | goto out_unlock; | ||
1022 | } | ||
1046 | 1023 | ||
1047 | end_fsb = imap.br_startoff + imap.br_blockcount; | 1024 | end_fsb = imap.br_startoff + imap.br_blockcount; |
1048 | length = XFS_FSB_TO_B(mp, end_fsb) - offset; | 1025 | length = XFS_FSB_TO_B(mp, end_fsb) - offset; |
@@ -1071,7 +1048,6 @@ xfs_file_iomap_begin( | |||
1071 | if (error) | 1048 | if (error) |
1072 | return error; | 1049 | return error; |
1073 | 1050 | ||
1074 | alloc_done: | ||
1075 | iomap->flags = IOMAP_F_NEW; | 1051 | iomap->flags = IOMAP_F_NEW; |
1076 | trace_xfs_iomap_alloc(ip, offset, length, 0, &imap); | 1052 | trace_xfs_iomap_alloc(ip, offset, length, 0, &imap); |
1077 | } else { | 1053 | } else { |
@@ -1102,7 +1078,19 @@ xfs_file_iomap_end_delalloc( | |||
1102 | xfs_fileoff_t end_fsb; | 1078 | xfs_fileoff_t end_fsb; |
1103 | int error = 0; | 1079 | int error = 0; |
1104 | 1080 | ||
1105 | start_fsb = XFS_B_TO_FSB(mp, offset + written); | 1081 | /* behave as if the write failed if drop writes is enabled */ |
1082 | if (xfs_mp_drop_writes(mp)) | ||
1083 | written = 0; | ||
1084 | |||
1085 | /* | ||
1086 | * start_fsb refers to the first unused block after a short write. If | ||
1087 | * nothing was written, round offset down to point at the first block in | ||
1088 | * the range. | ||
1089 | */ | ||
1090 | if (unlikely(!written)) | ||
1091 | start_fsb = XFS_B_TO_FSBT(mp, offset); | ||
1092 | else | ||
1093 | start_fsb = XFS_B_TO_FSB(mp, offset + written); | ||
1106 | end_fsb = XFS_B_TO_FSB(mp, offset + length); | 1094 | end_fsb = XFS_B_TO_FSB(mp, offset + length); |
1107 | 1095 | ||
1108 | /* | 1096 | /* |
@@ -1114,6 +1102,9 @@ xfs_file_iomap_end_delalloc( | |||
1114 | * blocks in the range, they are ours. | 1102 | * blocks in the range, they are ours. |
1115 | */ | 1103 | */ |
1116 | if (start_fsb < end_fsb) { | 1104 | if (start_fsb < end_fsb) { |
1105 | truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb), | ||
1106 | XFS_FSB_TO_B(mp, end_fsb) - 1); | ||
1107 | |||
1117 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 1108 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
1118 | error = xfs_bmap_punch_delalloc_range(ip, start_fsb, | 1109 | error = xfs_bmap_punch_delalloc_range(ip, start_fsb, |
1119 | end_fsb - start_fsb); | 1110 | end_fsb - start_fsb); |
@@ -1144,7 +1135,7 @@ xfs_file_iomap_end( | |||
1144 | return 0; | 1135 | return 0; |
1145 | } | 1136 | } |
1146 | 1137 | ||
1147 | struct iomap_ops xfs_iomap_ops = { | 1138 | const struct iomap_ops xfs_iomap_ops = { |
1148 | .iomap_begin = xfs_file_iomap_begin, | 1139 | .iomap_begin = xfs_file_iomap_begin, |
1149 | .iomap_end = xfs_file_iomap_end, | 1140 | .iomap_end = xfs_file_iomap_end, |
1150 | }; | 1141 | }; |
@@ -1190,6 +1181,6 @@ out_unlock: | |||
1190 | return error; | 1181 | return error; |
1191 | } | 1182 | } |
1192 | 1183 | ||
1193 | struct iomap_ops xfs_xattr_iomap_ops = { | 1184 | const struct iomap_ops xfs_xattr_iomap_ops = { |
1194 | .iomap_begin = xfs_xattr_iomap_begin, | 1185 | .iomap_begin = xfs_xattr_iomap_begin, |
1195 | }; | 1186 | }; |
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 6d45cf01fcff..00db3ecea084 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h | |||
@@ -33,7 +33,27 @@ void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *, | |||
33 | struct xfs_bmbt_irec *); | 33 | struct xfs_bmbt_irec *); |
34 | xfs_extlen_t xfs_eof_alignment(struct xfs_inode *ip, xfs_extlen_t extsize); | 34 | xfs_extlen_t xfs_eof_alignment(struct xfs_inode *ip, xfs_extlen_t extsize); |
35 | 35 | ||
36 | extern struct iomap_ops xfs_iomap_ops; | 36 | static inline xfs_filblks_t |
37 | extern struct iomap_ops xfs_xattr_iomap_ops; | 37 | xfs_aligned_fsb_count( |
38 | xfs_fileoff_t offset_fsb, | ||
39 | xfs_filblks_t count_fsb, | ||
40 | xfs_extlen_t extsz) | ||
41 | { | ||
42 | if (extsz) { | ||
43 | xfs_extlen_t align; | ||
44 | |||
45 | align = do_mod(offset_fsb, extsz); | ||
46 | if (align) | ||
47 | count_fsb += align; | ||
48 | align = do_mod(count_fsb, extsz); | ||
49 | if (align) | ||
50 | count_fsb += extsz - align; | ||
51 | } | ||
52 | |||
53 | return count_fsb; | ||
54 | } | ||
55 | |||
56 | extern const struct iomap_ops xfs_iomap_ops; | ||
57 | extern const struct iomap_ops xfs_xattr_iomap_ops; | ||
38 | 58 | ||
39 | #endif /* __XFS_IOMAP_H__*/ | 59 | #endif /* __XFS_IOMAP_H__*/ |
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index b5e71072fde5..cc5a9f1574e7 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h | |||
@@ -124,7 +124,6 @@ struct xlog_ticket; | |||
124 | struct xfs_log_item; | 124 | struct xfs_log_item; |
125 | struct xfs_item_ops; | 125 | struct xfs_item_ops; |
126 | struct xfs_trans; | 126 | struct xfs_trans; |
127 | struct xfs_log_callback; | ||
128 | 127 | ||
129 | xfs_lsn_t xfs_log_done(struct xfs_mount *mp, | 128 | xfs_lsn_t xfs_log_done(struct xfs_mount *mp, |
130 | struct xlog_ticket *ticket, | 129 | struct xlog_ticket *ticket, |
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index a4ab192e1792..82f1cbcc4de1 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
@@ -30,6 +30,9 @@ | |||
30 | #include "xfs_trans_priv.h" | 30 | #include "xfs_trans_priv.h" |
31 | #include "xfs_log.h" | 31 | #include "xfs_log.h" |
32 | #include "xfs_log_priv.h" | 32 | #include "xfs_log_priv.h" |
33 | #include "xfs_trace.h" | ||
34 | |||
35 | struct workqueue_struct *xfs_discard_wq; | ||
33 | 36 | ||
34 | /* | 37 | /* |
35 | * Allocate a new ticket. Failing to get a new ticket makes it really hard to | 38 | * Allocate a new ticket. Failing to get a new ticket makes it really hard to |
@@ -491,6 +494,75 @@ xlog_cil_free_logvec( | |||
491 | } | 494 | } |
492 | } | 495 | } |
493 | 496 | ||
497 | static void | ||
498 | xlog_discard_endio_work( | ||
499 | struct work_struct *work) | ||
500 | { | ||
501 | struct xfs_cil_ctx *ctx = | ||
502 | container_of(work, struct xfs_cil_ctx, discard_endio_work); | ||
503 | struct xfs_mount *mp = ctx->cil->xc_log->l_mp; | ||
504 | |||
505 | xfs_extent_busy_clear(mp, &ctx->busy_extents, false); | ||
506 | kmem_free(ctx); | ||
507 | } | ||
508 | |||
509 | /* | ||
510 | * Queue up the actual completion to a thread to avoid IRQ-safe locking for | ||
511 | * pagb_lock. Note that we need a unbounded workqueue, otherwise we might | ||
512 | * get the execution delayed up to 30 seconds for weird reasons. | ||
513 | */ | ||
514 | static void | ||
515 | xlog_discard_endio( | ||
516 | struct bio *bio) | ||
517 | { | ||
518 | struct xfs_cil_ctx *ctx = bio->bi_private; | ||
519 | |||
520 | INIT_WORK(&ctx->discard_endio_work, xlog_discard_endio_work); | ||
521 | queue_work(xfs_discard_wq, &ctx->discard_endio_work); | ||
522 | } | ||
523 | |||
524 | static void | ||
525 | xlog_discard_busy_extents( | ||
526 | struct xfs_mount *mp, | ||
527 | struct xfs_cil_ctx *ctx) | ||
528 | { | ||
529 | struct list_head *list = &ctx->busy_extents; | ||
530 | struct xfs_extent_busy *busyp; | ||
531 | struct bio *bio = NULL; | ||
532 | struct blk_plug plug; | ||
533 | int error = 0; | ||
534 | |||
535 | ASSERT(mp->m_flags & XFS_MOUNT_DISCARD); | ||
536 | |||
537 | blk_start_plug(&plug); | ||
538 | list_for_each_entry(busyp, list, list) { | ||
539 | trace_xfs_discard_extent(mp, busyp->agno, busyp->bno, | ||
540 | busyp->length); | ||
541 | |||
542 | error = __blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, | ||
543 | XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno), | ||
544 | XFS_FSB_TO_BB(mp, busyp->length), | ||
545 | GFP_NOFS, 0, &bio); | ||
546 | if (error && error != -EOPNOTSUPP) { | ||
547 | xfs_info(mp, | ||
548 | "discard failed for extent [0x%llx,%u], error %d", | ||
549 | (unsigned long long)busyp->bno, | ||
550 | busyp->length, | ||
551 | error); | ||
552 | break; | ||
553 | } | ||
554 | } | ||
555 | |||
556 | if (bio) { | ||
557 | bio->bi_private = ctx; | ||
558 | bio->bi_end_io = xlog_discard_endio; | ||
559 | submit_bio(bio); | ||
560 | } else { | ||
561 | xlog_discard_endio_work(&ctx->discard_endio_work); | ||
562 | } | ||
563 | blk_finish_plug(&plug); | ||
564 | } | ||
565 | |||
494 | /* | 566 | /* |
495 | * Mark all items committed and clear busy extents. We free the log vector | 567 | * Mark all items committed and clear busy extents. We free the log vector |
496 | * chains in a separate pass so that we unpin the log items as quickly as | 568 | * chains in a separate pass so that we unpin the log items as quickly as |
@@ -525,14 +597,10 @@ xlog_cil_committed( | |||
525 | 597 | ||
526 | xlog_cil_free_logvec(ctx->lv_chain); | 598 | xlog_cil_free_logvec(ctx->lv_chain); |
527 | 599 | ||
528 | if (!list_empty(&ctx->busy_extents)) { | 600 | if (!list_empty(&ctx->busy_extents)) |
529 | ASSERT(mp->m_flags & XFS_MOUNT_DISCARD); | 601 | xlog_discard_busy_extents(mp, ctx); |
530 | 602 | else | |
531 | xfs_discard_extents(mp, &ctx->busy_extents); | 603 | kmem_free(ctx); |
532 | xfs_extent_busy_clear(mp, &ctx->busy_extents, false); | ||
533 | } | ||
534 | |||
535 | kmem_free(ctx); | ||
536 | } | 604 | } |
537 | 605 | ||
538 | /* | 606 | /* |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 2b6eec52178e..c2604a5366f2 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -257,6 +257,7 @@ struct xfs_cil_ctx { | |||
257 | struct xfs_log_vec *lv_chain; /* logvecs being pushed */ | 257 | struct xfs_log_vec *lv_chain; /* logvecs being pushed */ |
258 | struct xfs_log_callback log_cb; /* completion callback hook. */ | 258 | struct xfs_log_callback log_cb; /* completion callback hook. */ |
259 | struct list_head committing; /* ctx committing list */ | 259 | struct list_head committing; /* ctx committing list */ |
260 | struct work_struct discard_endio_work; | ||
260 | }; | 261 | }; |
261 | 262 | ||
262 | /* | 263 | /* |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 9b9540db17a6..450bde68bb75 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include "xfs_rmap_btree.h" | 45 | #include "xfs_rmap_btree.h" |
46 | #include "xfs_refcount_btree.h" | 46 | #include "xfs_refcount_btree.h" |
47 | #include "xfs_reflink.h" | 47 | #include "xfs_reflink.h" |
48 | #include "xfs_extent_busy.h" | ||
48 | 49 | ||
49 | 50 | ||
50 | static DEFINE_MUTEX(xfs_uuid_table_mutex); | 51 | static DEFINE_MUTEX(xfs_uuid_table_mutex); |
@@ -187,7 +188,7 @@ xfs_initialize_perag( | |||
187 | xfs_agnumber_t *maxagi) | 188 | xfs_agnumber_t *maxagi) |
188 | { | 189 | { |
189 | xfs_agnumber_t index; | 190 | xfs_agnumber_t index; |
190 | xfs_agnumber_t first_initialised = 0; | 191 | xfs_agnumber_t first_initialised = NULLAGNUMBER; |
191 | xfs_perag_t *pag; | 192 | xfs_perag_t *pag; |
192 | int error = -ENOMEM; | 193 | int error = -ENOMEM; |
193 | 194 | ||
@@ -202,22 +203,21 @@ xfs_initialize_perag( | |||
202 | xfs_perag_put(pag); | 203 | xfs_perag_put(pag); |
203 | continue; | 204 | continue; |
204 | } | 205 | } |
205 | if (!first_initialised) | ||
206 | first_initialised = index; | ||
207 | 206 | ||
208 | pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL); | 207 | pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL); |
209 | if (!pag) | 208 | if (!pag) |
210 | goto out_unwind; | 209 | goto out_unwind_new_pags; |
211 | pag->pag_agno = index; | 210 | pag->pag_agno = index; |
212 | pag->pag_mount = mp; | 211 | pag->pag_mount = mp; |
213 | spin_lock_init(&pag->pag_ici_lock); | 212 | spin_lock_init(&pag->pag_ici_lock); |
214 | mutex_init(&pag->pag_ici_reclaim_lock); | 213 | mutex_init(&pag->pag_ici_reclaim_lock); |
215 | INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); | 214 | INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); |
216 | if (xfs_buf_hash_init(pag)) | 215 | if (xfs_buf_hash_init(pag)) |
217 | goto out_unwind; | 216 | goto out_free_pag; |
217 | init_waitqueue_head(&pag->pagb_wait); | ||
218 | 218 | ||
219 | if (radix_tree_preload(GFP_NOFS)) | 219 | if (radix_tree_preload(GFP_NOFS)) |
220 | goto out_unwind; | 220 | goto out_hash_destroy; |
221 | 221 | ||
222 | spin_lock(&mp->m_perag_lock); | 222 | spin_lock(&mp->m_perag_lock); |
223 | if (radix_tree_insert(&mp->m_perag_tree, index, pag)) { | 223 | if (radix_tree_insert(&mp->m_perag_tree, index, pag)) { |
@@ -225,10 +225,13 @@ xfs_initialize_perag( | |||
225 | spin_unlock(&mp->m_perag_lock); | 225 | spin_unlock(&mp->m_perag_lock); |
226 | radix_tree_preload_end(); | 226 | radix_tree_preload_end(); |
227 | error = -EEXIST; | 227 | error = -EEXIST; |
228 | goto out_unwind; | 228 | goto out_hash_destroy; |
229 | } | 229 | } |
230 | spin_unlock(&mp->m_perag_lock); | 230 | spin_unlock(&mp->m_perag_lock); |
231 | radix_tree_preload_end(); | 231 | radix_tree_preload_end(); |
232 | /* first new pag is fully initialized */ | ||
233 | if (first_initialised == NULLAGNUMBER) | ||
234 | first_initialised = index; | ||
232 | } | 235 | } |
233 | 236 | ||
234 | index = xfs_set_inode_alloc(mp, agcount); | 237 | index = xfs_set_inode_alloc(mp, agcount); |
@@ -239,11 +242,16 @@ xfs_initialize_perag( | |||
239 | mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp); | 242 | mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp); |
240 | return 0; | 243 | return 0; |
241 | 244 | ||
242 | out_unwind: | 245 | out_hash_destroy: |
243 | xfs_buf_hash_destroy(pag); | 246 | xfs_buf_hash_destroy(pag); |
247 | out_free_pag: | ||
244 | kmem_free(pag); | 248 | kmem_free(pag); |
245 | for (; index > first_initialised; index--) { | 249 | out_unwind_new_pags: |
250 | /* unwind any prior newly initialized pags */ | ||
251 | for (index = first_initialised; index < agcount; index++) { | ||
246 | pag = radix_tree_delete(&mp->m_perag_tree, index); | 252 | pag = radix_tree_delete(&mp->m_perag_tree, index); |
253 | if (!pag) | ||
254 | break; | ||
247 | xfs_buf_hash_destroy(pag); | 255 | xfs_buf_hash_destroy(pag); |
248 | kmem_free(pag); | 256 | kmem_free(pag); |
249 | } | 257 | } |
@@ -1073,6 +1081,13 @@ xfs_unmountfs( | |||
1073 | xfs_log_force(mp, XFS_LOG_SYNC); | 1081 | xfs_log_force(mp, XFS_LOG_SYNC); |
1074 | 1082 | ||
1075 | /* | 1083 | /* |
1084 | * Wait for all busy extents to be freed, including completion of | ||
1085 | * any discard operation. | ||
1086 | */ | ||
1087 | xfs_extent_busy_wait_all(mp); | ||
1088 | flush_workqueue(xfs_discard_wq); | ||
1089 | |||
1090 | /* | ||
1076 | * We now need to tell the world we are unmounting. This will allow | 1091 | * We now need to tell the world we are unmounting. This will allow |
1077 | * us to detect that the filesystem is going away and we should error | 1092 | * us to detect that the filesystem is going away and we should error |
1078 | * out anything that we have been retrying in the background. This will | 1093 | * out anything that we have been retrying in the background. This will |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 7f351f706b7a..6db6fd6b82b0 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -200,11 +200,12 @@ typedef struct xfs_mount { | |||
200 | /* | 200 | /* |
201 | * DEBUG mode instrumentation to test and/or trigger delayed allocation | 201 | * DEBUG mode instrumentation to test and/or trigger delayed allocation |
202 | * block killing in the event of failed writes. When enabled, all | 202 | * block killing in the event of failed writes. When enabled, all |
203 | * buffered writes are forced to fail. All delalloc blocks in the range | 203 | * buffered writes are silenty dropped and handled as if they failed. |
204 | * of the write (including pre-existing delalloc blocks!) are tossed as | 204 | * All delalloc blocks in the range of the write (including pre-existing |
205 | * part of the write failure error handling sequence. | 205 | * delalloc blocks!) are tossed as part of the write failure error |
206 | * handling sequence. | ||
206 | */ | 207 | */ |
207 | bool m_fail_writes; | 208 | bool m_drop_writes; |
208 | #endif | 209 | #endif |
209 | } xfs_mount_t; | 210 | } xfs_mount_t; |
210 | 211 | ||
@@ -325,13 +326,13 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d) | |||
325 | 326 | ||
326 | #ifdef DEBUG | 327 | #ifdef DEBUG |
327 | static inline bool | 328 | static inline bool |
328 | xfs_mp_fail_writes(struct xfs_mount *mp) | 329 | xfs_mp_drop_writes(struct xfs_mount *mp) |
329 | { | 330 | { |
330 | return mp->m_fail_writes; | 331 | return mp->m_drop_writes; |
331 | } | 332 | } |
332 | #else | 333 | #else |
333 | static inline bool | 334 | static inline bool |
334 | xfs_mp_fail_writes(struct xfs_mount *mp) | 335 | xfs_mp_drop_writes(struct xfs_mount *mp) |
335 | { | 336 | { |
336 | return 0; | 337 | return 0; |
337 | } | 338 | } |
@@ -384,6 +385,8 @@ typedef struct xfs_perag { | |||
384 | xfs_agino_t pagl_rightrec; | 385 | xfs_agino_t pagl_rightrec; |
385 | spinlock_t pagb_lock; /* lock for pagb_tree */ | 386 | spinlock_t pagb_lock; /* lock for pagb_tree */ |
386 | struct rb_root pagb_tree; /* ordered tree of busy extents */ | 387 | struct rb_root pagb_tree; /* ordered tree of busy extents */ |
388 | unsigned int pagb_gen; /* generation count for pagb_tree */ | ||
389 | wait_queue_head_t pagb_wait; /* woken when pagb_gen changes */ | ||
387 | 390 | ||
388 | atomic_t pagf_fstrms; /* # of filestreams active in this AG */ | 391 | atomic_t pagf_fstrms; /* # of filestreams active in this AG */ |
389 | 392 | ||
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 07593a362cd0..da6d08fb359c 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c | |||
@@ -82,11 +82,22 @@ | |||
82 | * mappings are a reservation against the free space in the filesystem; | 82 | * mappings are a reservation against the free space in the filesystem; |
83 | * adjacent mappings can also be combined into fewer larger mappings. | 83 | * adjacent mappings can also be combined into fewer larger mappings. |
84 | * | 84 | * |
85 | * As an optimization, the CoW extent size hint (cowextsz) creates | ||
86 | * outsized aligned delalloc reservations in the hope of landing out of | ||
87 | * order nearby CoW writes in a single extent on disk, thereby reducing | ||
88 | * fragmentation and improving future performance. | ||
89 | * | ||
90 | * D: --RRRRRRSSSRRRRRRRR--- (data fork) | ||
91 | * C: ------DDDDDDD--------- (CoW fork) | ||
92 | * | ||
85 | * When dirty pages are being written out (typically in writepage), the | 93 | * When dirty pages are being written out (typically in writepage), the |
86 | * delalloc reservations are converted into real mappings by allocating | 94 | * delalloc reservations are converted into unwritten mappings by |
87 | * blocks and replacing the delalloc mapping with real ones. A delalloc | 95 | * allocating blocks and replacing the delalloc mapping with real ones. |
88 | * mapping can be replaced by several real ones if the free space is | 96 | * A delalloc mapping can be replaced by several unwritten ones if the |
89 | * fragmented. | 97 | * free space is fragmented. |
98 | * | ||
99 | * D: --RRRRRRSSSRRRRRRRR--- | ||
100 | * C: ------UUUUUUU--------- | ||
90 | * | 101 | * |
91 | * We want to adapt the delalloc mechanism for copy-on-write, since the | 102 | * We want to adapt the delalloc mechanism for copy-on-write, since the |
92 | * write paths are similar. The first two steps (creating the reservation | 103 | * write paths are similar. The first two steps (creating the reservation |
@@ -101,13 +112,29 @@ | |||
101 | * Block-aligned directio writes will use the same mechanism as buffered | 112 | * Block-aligned directio writes will use the same mechanism as buffered |
102 | * writes. | 113 | * writes. |
103 | * | 114 | * |
115 | * Just prior to submitting the actual disk write requests, we convert | ||
116 | * the extents representing the range of the file actually being written | ||
117 | * (as opposed to extra pieces created for the cowextsize hint) to real | ||
118 | * extents. This will become important in the next step: | ||
119 | * | ||
120 | * D: --RRRRRRSSSRRRRRRRR--- | ||
121 | * C: ------UUrrUUU--------- | ||
122 | * | ||
104 | * CoW remapping must be done after the data block write completes, | 123 | * CoW remapping must be done after the data block write completes, |
105 | * because we don't want to destroy the old data fork map until we're sure | 124 | * because we don't want to destroy the old data fork map until we're sure |
106 | * the new block has been written. Since the new mappings are kept in a | 125 | * the new block has been written. Since the new mappings are kept in a |
107 | * separate fork, we can simply iterate these mappings to find the ones | 126 | * separate fork, we can simply iterate these mappings to find the ones |
108 | * that cover the file blocks that we just CoW'd. For each extent, simply | 127 | * that cover the file blocks that we just CoW'd. For each extent, simply |
109 | * unmap the corresponding range in the data fork, map the new range into | 128 | * unmap the corresponding range in the data fork, map the new range into |
110 | * the data fork, and remove the extent from the CoW fork. | 129 | * the data fork, and remove the extent from the CoW fork. Because of |
130 | * the presence of the cowextsize hint, however, we must be careful | ||
131 | * only to remap the blocks that we've actually written out -- we must | ||
132 | * never remap delalloc reservations nor CoW staging blocks that have | ||
133 | * yet to be written. This corresponds exactly to the real extents in | ||
134 | * the CoW fork: | ||
135 | * | ||
136 | * D: --RRRRRRrrSRRRRRRRR--- | ||
137 | * C: ------UU--UUU--------- | ||
111 | * | 138 | * |
112 | * Since the remapping operation can be applied to an arbitrary file | 139 | * Since the remapping operation can be applied to an arbitrary file |
113 | * range, we record the need for the remap step as a flag in the ioend | 140 | * range, we record the need for the remap step as a flag in the ioend |
@@ -296,103 +323,165 @@ xfs_reflink_reserve_cow( | |||
296 | return 0; | 323 | return 0; |
297 | } | 324 | } |
298 | 325 | ||
299 | /* Allocate all CoW reservations covering a range of blocks in a file. */ | 326 | /* Convert part of an unwritten CoW extent to a real one. */ |
300 | static int | 327 | STATIC int |
301 | __xfs_reflink_allocate_cow( | 328 | xfs_reflink_convert_cow_extent( |
302 | struct xfs_inode *ip, | 329 | struct xfs_inode *ip, |
303 | xfs_fileoff_t *offset_fsb, | 330 | struct xfs_bmbt_irec *imap, |
304 | xfs_fileoff_t end_fsb) | 331 | xfs_fileoff_t offset_fsb, |
332 | xfs_filblks_t count_fsb, | ||
333 | struct xfs_defer_ops *dfops) | ||
305 | { | 334 | { |
306 | struct xfs_mount *mp = ip->i_mount; | 335 | xfs_fsblock_t first_block; |
307 | struct xfs_bmbt_irec imap; | 336 | int nimaps = 1; |
308 | struct xfs_defer_ops dfops; | ||
309 | struct xfs_trans *tp; | ||
310 | xfs_fsblock_t first_block; | ||
311 | int nimaps = 1, error; | ||
312 | bool shared; | ||
313 | |||
314 | xfs_defer_init(&dfops, &first_block); | ||
315 | 337 | ||
316 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, | 338 | if (imap->br_state == XFS_EXT_NORM) |
317 | XFS_TRANS_RESERVE, &tp); | 339 | return 0; |
318 | if (error) | ||
319 | return error; | ||
320 | 340 | ||
321 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 341 | xfs_trim_extent(imap, offset_fsb, count_fsb); |
342 | trace_xfs_reflink_convert_cow(ip, imap); | ||
343 | if (imap->br_blockcount == 0) | ||
344 | return 0; | ||
345 | return xfs_bmapi_write(NULL, ip, imap->br_startoff, imap->br_blockcount, | ||
346 | XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT, &first_block, | ||
347 | 0, imap, &nimaps, dfops); | ||
348 | } | ||
322 | 349 | ||
323 | /* Read extent from the source file. */ | 350 | /* Convert all of the unwritten CoW extents in a file's range to real ones. */ |
324 | nimaps = 1; | 351 | int |
325 | error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb, | 352 | xfs_reflink_convert_cow( |
326 | &imap, &nimaps, 0); | 353 | struct xfs_inode *ip, |
327 | if (error) | 354 | xfs_off_t offset, |
328 | goto out_unlock; | 355 | xfs_off_t count) |
329 | ASSERT(nimaps == 1); | 356 | { |
357 | struct xfs_bmbt_irec got; | ||
358 | struct xfs_defer_ops dfops; | ||
359 | struct xfs_mount *mp = ip->i_mount; | ||
360 | struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); | ||
361 | xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); | ||
362 | xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); | ||
363 | xfs_extnum_t idx; | ||
364 | bool found; | ||
365 | int error = 0; | ||
330 | 366 | ||
331 | error = xfs_reflink_reserve_cow(ip, &imap, &shared); | 367 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
332 | if (error) | ||
333 | goto out_trans_cancel; | ||
334 | 368 | ||
335 | if (!shared) { | 369 | /* Convert all the extents to real from unwritten. */ |
336 | *offset_fsb = imap.br_startoff + imap.br_blockcount; | 370 | for (found = xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got); |
337 | goto out_trans_cancel; | 371 | found && got.br_startoff < end_fsb; |
372 | found = xfs_iext_get_extent(ifp, ++idx, &got)) { | ||
373 | error = xfs_reflink_convert_cow_extent(ip, &got, offset_fsb, | ||
374 | end_fsb - offset_fsb, &dfops); | ||
375 | if (error) | ||
376 | break; | ||
338 | } | 377 | } |
339 | 378 | ||
340 | xfs_trans_ijoin(tp, ip, 0); | 379 | /* Finish up. */ |
341 | error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount, | ||
342 | XFS_BMAPI_COWFORK, &first_block, | ||
343 | XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), | ||
344 | &imap, &nimaps, &dfops); | ||
345 | if (error) | ||
346 | goto out_trans_cancel; | ||
347 | |||
348 | error = xfs_defer_finish(&tp, &dfops, NULL); | ||
349 | if (error) | ||
350 | goto out_trans_cancel; | ||
351 | |||
352 | error = xfs_trans_commit(tp); | ||
353 | |||
354 | *offset_fsb = imap.br_startoff + imap.br_blockcount; | ||
355 | out_unlock: | ||
356 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 380 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
357 | return error; | 381 | return error; |
358 | out_trans_cancel: | ||
359 | xfs_defer_cancel(&dfops); | ||
360 | xfs_trans_cancel(tp); | ||
361 | goto out_unlock; | ||
362 | } | 382 | } |
363 | 383 | ||
364 | /* Allocate all CoW reservations covering a part of a file. */ | 384 | /* Allocate all CoW reservations covering a range of blocks in a file. */ |
365 | int | 385 | int |
366 | xfs_reflink_allocate_cow_range( | 386 | xfs_reflink_allocate_cow( |
367 | struct xfs_inode *ip, | 387 | struct xfs_inode *ip, |
368 | xfs_off_t offset, | 388 | struct xfs_bmbt_irec *imap, |
369 | xfs_off_t count) | 389 | bool *shared, |
390 | uint *lockmode) | ||
370 | { | 391 | { |
371 | struct xfs_mount *mp = ip->i_mount; | 392 | struct xfs_mount *mp = ip->i_mount; |
372 | xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); | 393 | xfs_fileoff_t offset_fsb = imap->br_startoff; |
373 | xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); | 394 | xfs_filblks_t count_fsb = imap->br_blockcount; |
374 | int error; | 395 | struct xfs_bmbt_irec got; |
396 | struct xfs_defer_ops dfops; | ||
397 | struct xfs_trans *tp = NULL; | ||
398 | xfs_fsblock_t first_block; | ||
399 | int nimaps, error = 0; | ||
400 | bool trimmed; | ||
401 | xfs_filblks_t resaligned; | ||
402 | xfs_extlen_t resblks = 0; | ||
403 | xfs_extnum_t idx; | ||
375 | 404 | ||
405 | retry: | ||
376 | ASSERT(xfs_is_reflink_inode(ip)); | 406 | ASSERT(xfs_is_reflink_inode(ip)); |
377 | 407 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); | |
378 | trace_xfs_reflink_allocate_cow_range(ip, offset, count); | ||
379 | 408 | ||
380 | /* | 409 | /* |
381 | * Make sure that the dquots are there. | 410 | * Even if the extent is not shared we might have a preallocation for |
411 | * it in the COW fork. If so use it. | ||
382 | */ | 412 | */ |
383 | error = xfs_qm_dqattach(ip, 0); | 413 | if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &idx, &got) && |
384 | if (error) | 414 | got.br_startoff <= offset_fsb) { |
385 | return error; | 415 | *shared = true; |
386 | 416 | ||
387 | while (offset_fsb < end_fsb) { | 417 | /* If we have a real allocation in the COW fork we're done. */ |
388 | error = __xfs_reflink_allocate_cow(ip, &offset_fsb, end_fsb); | 418 | if (!isnullstartblock(got.br_startblock)) { |
389 | if (error) { | 419 | xfs_trim_extent(&got, offset_fsb, count_fsb); |
390 | trace_xfs_reflink_allocate_cow_range_error(ip, error, | 420 | *imap = got; |
391 | _RET_IP_); | 421 | goto convert; |
392 | break; | ||
393 | } | 422 | } |
423 | |||
424 | xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); | ||
425 | } else { | ||
426 | error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed); | ||
427 | if (error || !*shared) | ||
428 | goto out; | ||
429 | } | ||
430 | |||
431 | if (!tp) { | ||
432 | resaligned = xfs_aligned_fsb_count(imap->br_startoff, | ||
433 | imap->br_blockcount, xfs_get_cowextsz_hint(ip)); | ||
434 | resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); | ||
435 | |||
436 | xfs_iunlock(ip, *lockmode); | ||
437 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); | ||
438 | *lockmode = XFS_ILOCK_EXCL; | ||
439 | xfs_ilock(ip, *lockmode); | ||
440 | |||
441 | if (error) | ||
442 | return error; | ||
443 | |||
444 | error = xfs_qm_dqattach_locked(ip, 0); | ||
445 | if (error) | ||
446 | goto out; | ||
447 | goto retry; | ||
394 | } | 448 | } |
395 | 449 | ||
450 | error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0, | ||
451 | XFS_QMOPT_RES_REGBLKS); | ||
452 | if (error) | ||
453 | goto out; | ||
454 | |||
455 | xfs_trans_ijoin(tp, ip, 0); | ||
456 | |||
457 | xfs_defer_init(&dfops, &first_block); | ||
458 | nimaps = 1; | ||
459 | |||
460 | /* Allocate the entire reservation as unwritten blocks. */ | ||
461 | error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount, | ||
462 | XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block, | ||
463 | resblks, imap, &nimaps, &dfops); | ||
464 | if (error) | ||
465 | goto out_bmap_cancel; | ||
466 | |||
467 | /* Finish up. */ | ||
468 | error = xfs_defer_finish(&tp, &dfops, NULL); | ||
469 | if (error) | ||
470 | goto out_bmap_cancel; | ||
471 | |||
472 | error = xfs_trans_commit(tp); | ||
473 | if (error) | ||
474 | return error; | ||
475 | convert: | ||
476 | return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb, | ||
477 | &dfops); | ||
478 | out_bmap_cancel: | ||
479 | xfs_defer_cancel(&dfops); | ||
480 | xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0, | ||
481 | XFS_QMOPT_RES_REGBLKS); | ||
482 | out: | ||
483 | if (tp) | ||
484 | xfs_trans_cancel(tp); | ||
396 | return error; | 485 | return error; |
397 | } | 486 | } |
398 | 487 | ||
@@ -641,6 +730,16 @@ xfs_reflink_end_cow( | |||
641 | 730 | ||
642 | ASSERT(!isnullstartblock(got.br_startblock)); | 731 | ASSERT(!isnullstartblock(got.br_startblock)); |
643 | 732 | ||
733 | /* | ||
734 | * Don't remap unwritten extents; these are | ||
735 | * speculatively preallocated CoW extents that have been | ||
736 | * allocated but have not yet been involved in a write. | ||
737 | */ | ||
738 | if (got.br_state == XFS_EXT_UNWRITTEN) { | ||
739 | idx--; | ||
740 | goto next_extent; | ||
741 | } | ||
742 | |||
644 | /* Unmap the old blocks in the data fork. */ | 743 | /* Unmap the old blocks in the data fork. */ |
645 | xfs_defer_init(&dfops, &firstfsb); | 744 | xfs_defer_init(&dfops, &firstfsb); |
646 | rlen = del.br_blockcount; | 745 | rlen = del.br_blockcount; |
@@ -855,13 +954,14 @@ STATIC int | |||
855 | xfs_reflink_update_dest( | 954 | xfs_reflink_update_dest( |
856 | struct xfs_inode *dest, | 955 | struct xfs_inode *dest, |
857 | xfs_off_t newlen, | 956 | xfs_off_t newlen, |
858 | xfs_extlen_t cowextsize) | 957 | xfs_extlen_t cowextsize, |
958 | bool is_dedupe) | ||
859 | { | 959 | { |
860 | struct xfs_mount *mp = dest->i_mount; | 960 | struct xfs_mount *mp = dest->i_mount; |
861 | struct xfs_trans *tp; | 961 | struct xfs_trans *tp; |
862 | int error; | 962 | int error; |
863 | 963 | ||
864 | if (newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) | 964 | if (is_dedupe && newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) |
865 | return 0; | 965 | return 0; |
866 | 966 | ||
867 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); | 967 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); |
@@ -882,6 +982,10 @@ xfs_reflink_update_dest( | |||
882 | dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; | 982 | dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; |
883 | } | 983 | } |
884 | 984 | ||
985 | if (!is_dedupe) { | ||
986 | xfs_trans_ichgtime(tp, dest, | ||
987 | XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | ||
988 | } | ||
885 | xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE); | 989 | xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE); |
886 | 990 | ||
887 | error = xfs_trans_commit(tp); | 991 | error = xfs_trans_commit(tp); |
@@ -1195,7 +1299,8 @@ xfs_reflink_remap_range( | |||
1195 | !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) | 1299 | !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) |
1196 | cowextsize = src->i_d.di_cowextsize; | 1300 | cowextsize = src->i_d.di_cowextsize; |
1197 | 1301 | ||
1198 | ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize); | 1302 | ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize, |
1303 | is_dedupe); | ||
1199 | 1304 | ||
1200 | out_unlock: | 1305 | out_unlock: |
1201 | xfs_iunlock(src, XFS_MMAPLOCK_EXCL); | 1306 | xfs_iunlock(src, XFS_MMAPLOCK_EXCL); |
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index aa6a4d64bd35..33ac9b8db683 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h | |||
@@ -28,8 +28,10 @@ extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, | |||
28 | 28 | ||
29 | extern int xfs_reflink_reserve_cow(struct xfs_inode *ip, | 29 | extern int xfs_reflink_reserve_cow(struct xfs_inode *ip, |
30 | struct xfs_bmbt_irec *imap, bool *shared); | 30 | struct xfs_bmbt_irec *imap, bool *shared); |
31 | extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip, | 31 | extern int xfs_reflink_allocate_cow(struct xfs_inode *ip, |
32 | xfs_off_t offset, xfs_off_t count); | 32 | struct xfs_bmbt_irec *imap, bool *shared, uint *lockmode); |
33 | extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset, | ||
34 | xfs_off_t count); | ||
33 | extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset, | 35 | extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset, |
34 | struct xfs_bmbt_irec *imap); | 36 | struct xfs_bmbt_irec *imap); |
35 | extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip, | 37 | extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip, |
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 802bcc326d9f..c57aa7f18087 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c | |||
@@ -1093,7 +1093,6 @@ xfs_rtallocate_extent( | |||
1093 | xfs_extlen_t minlen, /* minimum length to allocate */ | 1093 | xfs_extlen_t minlen, /* minimum length to allocate */ |
1094 | xfs_extlen_t maxlen, /* maximum length to allocate */ | 1094 | xfs_extlen_t maxlen, /* maximum length to allocate */ |
1095 | xfs_extlen_t *len, /* out: actual length allocated */ | 1095 | xfs_extlen_t *len, /* out: actual length allocated */ |
1096 | xfs_alloctype_t type, /* allocation type XFS_ALLOCTYPE... */ | ||
1097 | int wasdel, /* was a delayed allocation extent */ | 1096 | int wasdel, /* was a delayed allocation extent */ |
1098 | xfs_extlen_t prod, /* extent product factor */ | 1097 | xfs_extlen_t prod, /* extent product factor */ |
1099 | xfs_rtblock_t *rtblock) /* out: start block allocated */ | 1098 | xfs_rtblock_t *rtblock) /* out: start block allocated */ |
@@ -1123,27 +1122,16 @@ xfs_rtallocate_extent( | |||
1123 | } | 1122 | } |
1124 | } | 1123 | } |
1125 | 1124 | ||
1125 | retry: | ||
1126 | sumbp = NULL; | 1126 | sumbp = NULL; |
1127 | /* | 1127 | if (bno == 0) { |
1128 | * Allocate by size, or near another block, or exactly at some block. | ||
1129 | */ | ||
1130 | switch (type) { | ||
1131 | case XFS_ALLOCTYPE_ANY_AG: | ||
1132 | error = xfs_rtallocate_extent_size(mp, tp, minlen, maxlen, len, | 1128 | error = xfs_rtallocate_extent_size(mp, tp, minlen, maxlen, len, |
1133 | &sumbp, &sb, prod, &r); | 1129 | &sumbp, &sb, prod, &r); |
1134 | break; | 1130 | } else { |
1135 | case XFS_ALLOCTYPE_NEAR_BNO: | ||
1136 | error = xfs_rtallocate_extent_near(mp, tp, bno, minlen, maxlen, | 1131 | error = xfs_rtallocate_extent_near(mp, tp, bno, minlen, maxlen, |
1137 | len, &sumbp, &sb, prod, &r); | 1132 | len, &sumbp, &sb, prod, &r); |
1138 | break; | ||
1139 | case XFS_ALLOCTYPE_THIS_BNO: | ||
1140 | error = xfs_rtallocate_extent_exact(mp, tp, bno, minlen, maxlen, | ||
1141 | len, &sumbp, &sb, prod, &r); | ||
1142 | break; | ||
1143 | default: | ||
1144 | error = -EIO; | ||
1145 | ASSERT(0); | ||
1146 | } | 1133 | } |
1134 | |||
1147 | if (error) | 1135 | if (error) |
1148 | return error; | 1136 | return error; |
1149 | 1137 | ||
@@ -1158,7 +1146,11 @@ xfs_rtallocate_extent( | |||
1158 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FREXTENTS, -slen); | 1146 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FREXTENTS, -slen); |
1159 | else | 1147 | else |
1160 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, -slen); | 1148 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, -slen); |
1149 | } else if (prod > 1) { | ||
1150 | prod = 1; | ||
1151 | goto retry; | ||
1161 | } | 1152 | } |
1153 | |||
1162 | *rtblock = r; | 1154 | *rtblock = r; |
1163 | return 0; | 1155 | return 0; |
1164 | } | 1156 | } |
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index 355dd9e1cb64..51dd3c726608 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h | |||
@@ -40,7 +40,6 @@ xfs_rtallocate_extent( | |||
40 | xfs_extlen_t minlen, /* minimum length to allocate */ | 40 | xfs_extlen_t minlen, /* minimum length to allocate */ |
41 | xfs_extlen_t maxlen, /* maximum length to allocate */ | 41 | xfs_extlen_t maxlen, /* maximum length to allocate */ |
42 | xfs_extlen_t *len, /* out: actual length allocated */ | 42 | xfs_extlen_t *len, /* out: actual length allocated */ |
43 | xfs_alloctype_t type, /* allocation type XFS_ALLOCTYPE... */ | ||
44 | int wasdel, /* was a delayed allocation extent */ | 43 | int wasdel, /* was a delayed allocation extent */ |
45 | xfs_extlen_t prod, /* extent product factor */ | 44 | xfs_extlen_t prod, /* extent product factor */ |
46 | xfs_rtblock_t *rtblock); /* out: start block allocated */ | 45 | xfs_rtblock_t *rtblock); /* out: start block allocated */ |
@@ -122,7 +121,7 @@ int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp, | |||
122 | 121 | ||
123 | 122 | ||
124 | #else | 123 | #else |
125 | # define xfs_rtallocate_extent(t,b,min,max,l,a,f,p,rb) (ENOSYS) | 124 | # define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (ENOSYS) |
126 | # define xfs_rtfree_extent(t,b,l) (ENOSYS) | 125 | # define xfs_rtfree_extent(t,b,l) (ENOSYS) |
127 | # define xfs_rtpick_extent(m,t,l,rb) (ENOSYS) | 126 | # define xfs_rtpick_extent(m,t,l,rb) (ENOSYS) |
128 | # define xfs_growfs_rt(mp,in) (ENOSYS) | 127 | # define xfs_growfs_rt(mp,in) (ENOSYS) |
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index eecbaac08eba..890862f2447c 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -1956,12 +1956,20 @@ xfs_init_workqueues(void) | |||
1956 | if (!xfs_alloc_wq) | 1956 | if (!xfs_alloc_wq) |
1957 | return -ENOMEM; | 1957 | return -ENOMEM; |
1958 | 1958 | ||
1959 | xfs_discard_wq = alloc_workqueue("xfsdiscard", WQ_UNBOUND, 0); | ||
1960 | if (!xfs_discard_wq) | ||
1961 | goto out_free_alloc_wq; | ||
1962 | |||
1959 | return 0; | 1963 | return 0; |
1964 | out_free_alloc_wq: | ||
1965 | destroy_workqueue(xfs_alloc_wq); | ||
1966 | return -ENOMEM; | ||
1960 | } | 1967 | } |
1961 | 1968 | ||
1962 | STATIC void | 1969 | STATIC void |
1963 | xfs_destroy_workqueues(void) | 1970 | xfs_destroy_workqueues(void) |
1964 | { | 1971 | { |
1972 | destroy_workqueue(xfs_discard_wq); | ||
1965 | destroy_workqueue(xfs_alloc_wq); | 1973 | destroy_workqueue(xfs_alloc_wq); |
1966 | } | 1974 | } |
1967 | 1975 | ||
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h index b6418abd85ad..5f2f32408011 100644 --- a/fs/xfs/xfs_super.h +++ b/fs/xfs/xfs_super.h | |||
@@ -73,6 +73,8 @@ extern const struct quotactl_ops xfs_quotactl_operations; | |||
73 | 73 | ||
74 | extern void xfs_reinit_percpu_counters(struct xfs_mount *mp); | 74 | extern void xfs_reinit_percpu_counters(struct xfs_mount *mp); |
75 | 75 | ||
76 | extern struct workqueue_struct *xfs_discard_wq; | ||
77 | |||
76 | #define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) | 78 | #define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) |
77 | 79 | ||
78 | #endif /* __XFS_SUPER_H__ */ | 80 | #endif /* __XFS_SUPER_H__ */ |
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index de6195e38910..80ac15fb9638 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c | |||
@@ -93,7 +93,7 @@ to_mp(struct kobject *kobject) | |||
93 | #ifdef DEBUG | 93 | #ifdef DEBUG |
94 | 94 | ||
95 | STATIC ssize_t | 95 | STATIC ssize_t |
96 | fail_writes_store( | 96 | drop_writes_store( |
97 | struct kobject *kobject, | 97 | struct kobject *kobject, |
98 | const char *buf, | 98 | const char *buf, |
99 | size_t count) | 99 | size_t count) |
@@ -107,9 +107,9 @@ fail_writes_store( | |||
107 | return ret; | 107 | return ret; |
108 | 108 | ||
109 | if (val == 1) | 109 | if (val == 1) |
110 | mp->m_fail_writes = true; | 110 | mp->m_drop_writes = true; |
111 | else if (val == 0) | 111 | else if (val == 0) |
112 | mp->m_fail_writes = false; | 112 | mp->m_drop_writes = false; |
113 | else | 113 | else |
114 | return -EINVAL; | 114 | return -EINVAL; |
115 | 115 | ||
@@ -117,21 +117,21 @@ fail_writes_store( | |||
117 | } | 117 | } |
118 | 118 | ||
119 | STATIC ssize_t | 119 | STATIC ssize_t |
120 | fail_writes_show( | 120 | drop_writes_show( |
121 | struct kobject *kobject, | 121 | struct kobject *kobject, |
122 | char *buf) | 122 | char *buf) |
123 | { | 123 | { |
124 | struct xfs_mount *mp = to_mp(kobject); | 124 | struct xfs_mount *mp = to_mp(kobject); |
125 | 125 | ||
126 | return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_fail_writes ? 1 : 0); | 126 | return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_drop_writes ? 1 : 0); |
127 | } | 127 | } |
128 | XFS_SYSFS_ATTR_RW(fail_writes); | 128 | XFS_SYSFS_ATTR_RW(drop_writes); |
129 | 129 | ||
130 | #endif /* DEBUG */ | 130 | #endif /* DEBUG */ |
131 | 131 | ||
132 | static struct attribute *xfs_mp_attrs[] = { | 132 | static struct attribute *xfs_mp_attrs[] = { |
133 | #ifdef DEBUG | 133 | #ifdef DEBUG |
134 | ATTR_LIST(fail_writes), | 134 | ATTR_LIST(drop_writes), |
135 | #endif | 135 | #endif |
136 | NULL, | 136 | NULL, |
137 | }; | 137 | }; |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 69c5bcd9a51b..fb7555e73a62 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
@@ -2245,7 +2245,6 @@ DEFINE_BTREE_CUR_EVENT(xfs_btree_overlapped_query_range); | |||
2245 | 2245 | ||
2246 | /* deferred ops */ | 2246 | /* deferred ops */ |
2247 | struct xfs_defer_pending; | 2247 | struct xfs_defer_pending; |
2248 | struct xfs_defer_intake; | ||
2249 | struct xfs_defer_ops; | 2248 | struct xfs_defer_ops; |
2250 | 2249 | ||
2251 | DECLARE_EVENT_CLASS(xfs_defer_class, | 2250 | DECLARE_EVENT_CLASS(xfs_defer_class, |
@@ -3089,6 +3088,7 @@ DECLARE_EVENT_CLASS(xfs_inode_irec_class, | |||
3089 | __field(xfs_fileoff_t, lblk) | 3088 | __field(xfs_fileoff_t, lblk) |
3090 | __field(xfs_extlen_t, len) | 3089 | __field(xfs_extlen_t, len) |
3091 | __field(xfs_fsblock_t, pblk) | 3090 | __field(xfs_fsblock_t, pblk) |
3091 | __field(int, state) | ||
3092 | ), | 3092 | ), |
3093 | TP_fast_assign( | 3093 | TP_fast_assign( |
3094 | __entry->dev = VFS_I(ip)->i_sb->s_dev; | 3094 | __entry->dev = VFS_I(ip)->i_sb->s_dev; |
@@ -3096,13 +3096,15 @@ DECLARE_EVENT_CLASS(xfs_inode_irec_class, | |||
3096 | __entry->lblk = irec->br_startoff; | 3096 | __entry->lblk = irec->br_startoff; |
3097 | __entry->len = irec->br_blockcount; | 3097 | __entry->len = irec->br_blockcount; |
3098 | __entry->pblk = irec->br_startblock; | 3098 | __entry->pblk = irec->br_startblock; |
3099 | __entry->state = irec->br_state; | ||
3099 | ), | 3100 | ), |
3100 | TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x pblk %llu", | 3101 | TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x pblk %llu st %d", |
3101 | MAJOR(__entry->dev), MINOR(__entry->dev), | 3102 | MAJOR(__entry->dev), MINOR(__entry->dev), |
3102 | __entry->ino, | 3103 | __entry->ino, |
3103 | __entry->lblk, | 3104 | __entry->lblk, |
3104 | __entry->len, | 3105 | __entry->len, |
3105 | __entry->pblk) | 3106 | __entry->pblk, |
3107 | __entry->state) | ||
3106 | ); | 3108 | ); |
3107 | #define DEFINE_INODE_IREC_EVENT(name) \ | 3109 | #define DEFINE_INODE_IREC_EVENT(name) \ |
3108 | DEFINE_EVENT(xfs_inode_irec_class, name, \ | 3110 | DEFINE_EVENT(xfs_inode_irec_class, name, \ |
@@ -3242,11 +3244,11 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_around_shared); | |||
3242 | DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc); | 3244 | DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc); |
3243 | DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found); | 3245 | DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found); |
3244 | DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc); | 3246 | DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc); |
3247 | DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow); | ||
3245 | 3248 | ||
3246 | DEFINE_RW_EVENT(xfs_reflink_reserve_cow); | 3249 | DEFINE_RW_EVENT(xfs_reflink_reserve_cow); |
3247 | DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range); | ||
3248 | 3250 | ||
3249 | DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write); | 3251 | DEFINE_SIMPLE_IO_EVENT(xfs_reflink_bounce_dio_write); |
3250 | DEFINE_IOMAP_EVENT(xfs_reflink_find_cow_mapping); | 3252 | DEFINE_IOMAP_EVENT(xfs_reflink_find_cow_mapping); |
3251 | DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_irec); | 3253 | DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_irec); |
3252 | 3254 | ||
@@ -3254,7 +3256,6 @@ DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range); | |||
3254 | DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow); | 3256 | DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow); |
3255 | DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap); | 3257 | DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap); |
3256 | 3258 | ||
3257 | DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error); | ||
3258 | DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error); | 3259 | DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error); |
3259 | DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error); | 3260 | DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error); |
3260 | 3261 | ||
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 61b7fbdd3ebd..1646f659b60f 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
@@ -32,7 +32,6 @@ struct xfs_mount; | |||
32 | struct xfs_trans; | 32 | struct xfs_trans; |
33 | struct xfs_trans_res; | 33 | struct xfs_trans_res; |
34 | struct xfs_dquot_acct; | 34 | struct xfs_dquot_acct; |
35 | struct xfs_busy_extent; | ||
36 | struct xfs_rud_log_item; | 35 | struct xfs_rud_log_item; |
37 | struct xfs_rui_log_item; | 36 | struct xfs_rui_log_item; |
38 | struct xfs_btree_cur; | 37 | struct xfs_btree_cur; |
diff --git a/include/linux/dax.h b/include/linux/dax.h index 24ad71173995..2983e52efd07 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h | |||
@@ -37,9 +37,9 @@ static inline void *dax_radix_locked_entry(sector_t sector, unsigned long flags) | |||
37 | } | 37 | } |
38 | 38 | ||
39 | ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, | 39 | ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, |
40 | struct iomap_ops *ops); | 40 | const struct iomap_ops *ops); |
41 | int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, | 41 | int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, |
42 | struct iomap_ops *ops); | 42 | const struct iomap_ops *ops); |
43 | int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); | 43 | int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); |
44 | int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index); | 44 | int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index); |
45 | int dax_invalidate_mapping_entry_sync(struct address_space *mapping, | 45 | int dax_invalidate_mapping_entry_sync(struct address_space *mapping, |
@@ -72,7 +72,7 @@ static inline unsigned int dax_radix_order(void *entry) | |||
72 | return 0; | 72 | return 0; |
73 | } | 73 | } |
74 | int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, | 74 | int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, |
75 | pmd_t *pmd, unsigned int flags, struct iomap_ops *ops); | 75 | pmd_t *pmd, unsigned int flags, const struct iomap_ops *ops); |
76 | #else | 76 | #else |
77 | static inline unsigned int dax_radix_order(void *entry) | 77 | static inline unsigned int dax_radix_order(void *entry) |
78 | { | 78 | { |
@@ -80,7 +80,7 @@ static inline unsigned int dax_radix_order(void *entry) | |||
80 | } | 80 | } |
81 | static inline int dax_iomap_pmd_fault(struct vm_area_struct *vma, | 81 | static inline int dax_iomap_pmd_fault(struct vm_area_struct *vma, |
82 | unsigned long address, pmd_t *pmd, unsigned int flags, | 82 | unsigned long address, pmd_t *pmd, unsigned int flags, |
83 | struct iomap_ops *ops) | 83 | const struct iomap_ops *ops) |
84 | { | 84 | { |
85 | return VM_FAULT_FALLBACK; | 85 | return VM_FAULT_FALLBACK; |
86 | } | 86 | } |
diff --git a/include/linux/iomap.h b/include/linux/iomap.h index a4c94b86401e..891459caa278 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h | |||
@@ -72,17 +72,17 @@ struct iomap_ops { | |||
72 | }; | 72 | }; |
73 | 73 | ||
74 | ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, | 74 | ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, |
75 | struct iomap_ops *ops); | 75 | const struct iomap_ops *ops); |
76 | int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len, | 76 | int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len, |
77 | struct iomap_ops *ops); | 77 | const struct iomap_ops *ops); |
78 | int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, | 78 | int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, |
79 | bool *did_zero, struct iomap_ops *ops); | 79 | bool *did_zero, const struct iomap_ops *ops); |
80 | int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, | 80 | int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, |
81 | struct iomap_ops *ops); | 81 | const struct iomap_ops *ops); |
82 | int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, | 82 | int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, |
83 | struct iomap_ops *ops); | 83 | const struct iomap_ops *ops); |
84 | int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 84 | int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
85 | loff_t start, loff_t len, struct iomap_ops *ops); | 85 | loff_t start, loff_t len, const struct iomap_ops *ops); |
86 | 86 | ||
87 | /* | 87 | /* |
88 | * Flags for direct I/O ->end_io: | 88 | * Flags for direct I/O ->end_io: |
@@ -92,6 +92,6 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
92 | typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t ret, | 92 | typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t ret, |
93 | unsigned flags); | 93 | unsigned flags); |
94 | ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, | 94 | ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, |
95 | struct iomap_ops *ops, iomap_dio_end_io_t end_io); | 95 | const struct iomap_ops *ops, iomap_dio_end_io_t end_io); |
96 | 96 | ||
97 | #endif /* LINUX_IOMAP_H */ | 97 | #endif /* LINUX_IOMAP_H */ |
diff --git a/include/linux/module.h b/include/linux/module.h index f4f542ed3d92..0297c5cd7cdf 100644 --- a/include/linux/module.h +++ b/include/linux/module.h | |||
@@ -126,13 +126,13 @@ extern void cleanup_module(void); | |||
126 | 126 | ||
127 | /* Each module must use one module_init(). */ | 127 | /* Each module must use one module_init(). */ |
128 | #define module_init(initfn) \ | 128 | #define module_init(initfn) \ |
129 | static inline initcall_t __inittest(void) \ | 129 | static inline initcall_t __maybe_unused __inittest(void) \ |
130 | { return initfn; } \ | 130 | { return initfn; } \ |
131 | int init_module(void) __attribute__((alias(#initfn))); | 131 | int init_module(void) __attribute__((alias(#initfn))); |
132 | 132 | ||
133 | /* This is only required if you want to be unloadable. */ | 133 | /* This is only required if you want to be unloadable. */ |
134 | #define module_exit(exitfn) \ | 134 | #define module_exit(exitfn) \ |
135 | static inline exitcall_t __exittest(void) \ | 135 | static inline exitcall_t __maybe_unused __exittest(void) \ |
136 | { return exitfn; } \ | 136 | { return exitfn; } \ |
137 | void cleanup_module(void) __attribute__((alias(#exitfn))); | 137 | void cleanup_module(void) __attribute__((alias(#exitfn))); |
138 | 138 | ||
@@ -281,8 +281,6 @@ enum module_state { | |||
281 | MODULE_STATE_UNFORMED, /* Still setting it up. */ | 281 | MODULE_STATE_UNFORMED, /* Still setting it up. */ |
282 | }; | 282 | }; |
283 | 283 | ||
284 | struct module; | ||
285 | |||
286 | struct mod_tree_node { | 284 | struct mod_tree_node { |
287 | struct module *mod; | 285 | struct module *mod; |
288 | struct latch_tree_node node; | 286 | struct latch_tree_node node; |
diff --git a/include/linux/printk.h b/include/linux/printk.h index 3472cc6b7a60..571257e0f53d 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h | |||
@@ -147,17 +147,11 @@ void early_printk(const char *s, ...) { } | |||
147 | #endif | 147 | #endif |
148 | 148 | ||
149 | #ifdef CONFIG_PRINTK_NMI | 149 | #ifdef CONFIG_PRINTK_NMI |
150 | extern void printk_nmi_init(void); | ||
151 | extern void printk_nmi_enter(void); | 150 | extern void printk_nmi_enter(void); |
152 | extern void printk_nmi_exit(void); | 151 | extern void printk_nmi_exit(void); |
153 | extern void printk_nmi_flush(void); | ||
154 | extern void printk_nmi_flush_on_panic(void); | ||
155 | #else | 152 | #else |
156 | static inline void printk_nmi_init(void) { } | ||
157 | static inline void printk_nmi_enter(void) { } | 153 | static inline void printk_nmi_enter(void) { } |
158 | static inline void printk_nmi_exit(void) { } | 154 | static inline void printk_nmi_exit(void) { } |
159 | static inline void printk_nmi_flush(void) { } | ||
160 | static inline void printk_nmi_flush_on_panic(void) { } | ||
161 | #endif /* PRINTK_NMI */ | 155 | #endif /* PRINTK_NMI */ |
162 | 156 | ||
163 | #ifdef CONFIG_PRINTK | 157 | #ifdef CONFIG_PRINTK |
@@ -209,6 +203,9 @@ void __init setup_log_buf(int early); | |||
209 | __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...); | 203 | __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...); |
210 | void dump_stack_print_info(const char *log_lvl); | 204 | void dump_stack_print_info(const char *log_lvl); |
211 | void show_regs_print_info(const char *log_lvl); | 205 | void show_regs_print_info(const char *log_lvl); |
206 | extern void printk_safe_init(void); | ||
207 | extern void printk_safe_flush(void); | ||
208 | extern void printk_safe_flush_on_panic(void); | ||
212 | #else | 209 | #else |
213 | static inline __printf(1, 0) | 210 | static inline __printf(1, 0) |
214 | int vprintk(const char *s, va_list args) | 211 | int vprintk(const char *s, va_list args) |
@@ -268,6 +265,18 @@ static inline void dump_stack_print_info(const char *log_lvl) | |||
268 | static inline void show_regs_print_info(const char *log_lvl) | 265 | static inline void show_regs_print_info(const char *log_lvl) |
269 | { | 266 | { |
270 | } | 267 | } |
268 | |||
269 | static inline void printk_safe_init(void) | ||
270 | { | ||
271 | } | ||
272 | |||
273 | static inline void printk_safe_flush(void) | ||
274 | { | ||
275 | } | ||
276 | |||
277 | static inline void printk_safe_flush_on_panic(void) | ||
278 | { | ||
279 | } | ||
271 | #endif | 280 | #endif |
272 | 281 | ||
273 | extern asmlinkage void dump_stack(void) __cold; | 282 | extern asmlinkage void dump_stack(void) __cold; |
diff --git a/init/Kconfig b/init/Kconfig index 55bb6fbc294e..483ad679aa37 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -861,17 +861,19 @@ config LOG_CPU_MAX_BUF_SHIFT | |||
861 | 13 => 8 KB for each CPU | 861 | 13 => 8 KB for each CPU |
862 | 12 => 4 KB for each CPU | 862 | 12 => 4 KB for each CPU |
863 | 863 | ||
864 | config NMI_LOG_BUF_SHIFT | 864 | config PRINTK_SAFE_LOG_BUF_SHIFT |
865 | int "Temporary per-CPU NMI log buffer size (12 => 4KB, 13 => 8KB)" | 865 | int "Temporary per-CPU printk log buffer size (12 => 4KB, 13 => 8KB)" |
866 | range 10 21 | 866 | range 10 21 |
867 | default 13 | 867 | default 13 |
868 | depends on PRINTK_NMI | 868 | depends on PRINTK |
869 | help | 869 | help |
870 | Select the size of a per-CPU buffer where NMI messages are temporary | 870 | Select the size of an alternate printk per-CPU buffer where messages |
871 | stored. They are copied to the main log buffer in a safe context | 871 | printed from usafe contexts are temporary stored. One example would |
872 | to avoid a deadlock. The value defines the size as a power of 2. | 872 | be NMI messages, another one - printk recursion. The messages are |
873 | copied to the main log buffer in a safe context to avoid a deadlock. | ||
874 | The value defines the size as a power of 2. | ||
873 | 875 | ||
874 | NMI messages are rare and limited. The largest one is when | 876 | Those messages are rare and limited. The largest one is when |
875 | a backtrace is printed. It usually fits into 4KB. Select | 877 | a backtrace is printed. It usually fits into 4KB. Select |
876 | 8KB if you want to be on the safe side. | 878 | 8KB if you want to be on the safe side. |
877 | 879 | ||
diff --git a/init/main.c b/init/main.c index c8a00f0f10ff..24ea48745061 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -581,7 +581,7 @@ asmlinkage __visible void __init start_kernel(void) | |||
581 | timekeeping_init(); | 581 | timekeeping_init(); |
582 | time_init(); | 582 | time_init(); |
583 | sched_clock_postinit(); | 583 | sched_clock_postinit(); |
584 | printk_nmi_init(); | 584 | printk_safe_init(); |
585 | perf_event_init(); | 585 | perf_event_init(); |
586 | profile_init(); | 586 | profile_init(); |
587 | call_function_init(); | 587 | call_function_init(); |
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index a01974e1bf6b..bfe62d5b3872 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c | |||
@@ -916,7 +916,7 @@ void crash_kexec(struct pt_regs *regs) | |||
916 | old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu); | 916 | old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu); |
917 | if (old_cpu == PANIC_CPU_INVALID) { | 917 | if (old_cpu == PANIC_CPU_INVALID) { |
918 | /* This is the 1st CPU which comes here, so go ahead. */ | 918 | /* This is the 1st CPU which comes here, so go ahead. */ |
919 | printk_nmi_flush_on_panic(); | 919 | printk_safe_flush_on_panic(); |
920 | __crash_kexec(regs); | 920 | __crash_kexec(regs); |
921 | 921 | ||
922 | /* | 922 | /* |
diff --git a/kernel/module.c b/kernel/module.c index a3889169a3ae..7eba6dea4f41 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -2811,6 +2811,8 @@ static int check_modinfo_livepatch(struct module *mod, struct load_info *info) | |||
2811 | if (get_modinfo(info, "livepatch")) { | 2811 | if (get_modinfo(info, "livepatch")) { |
2812 | mod->klp = true; | 2812 | mod->klp = true; |
2813 | add_taint_module(mod, TAINT_LIVEPATCH, LOCKDEP_STILL_OK); | 2813 | add_taint_module(mod, TAINT_LIVEPATCH, LOCKDEP_STILL_OK); |
2814 | pr_notice_once("%s: tainting kernel with TAINT_LIVEPATCH\n", | ||
2815 | mod->name); | ||
2814 | } | 2816 | } |
2815 | 2817 | ||
2816 | return 0; | 2818 | return 0; |
@@ -3723,6 +3725,7 @@ static int load_module(struct load_info *info, const char __user *uargs, | |||
3723 | mod_sysfs_teardown(mod); | 3725 | mod_sysfs_teardown(mod); |
3724 | coming_cleanup: | 3726 | coming_cleanup: |
3725 | mod->state = MODULE_STATE_GOING; | 3727 | mod->state = MODULE_STATE_GOING; |
3728 | destroy_params(mod->kp, mod->num_kp); | ||
3726 | blocking_notifier_call_chain(&module_notify_list, | 3729 | blocking_notifier_call_chain(&module_notify_list, |
3727 | MODULE_STATE_GOING, mod); | 3730 | MODULE_STATE_GOING, mod); |
3728 | klp_module_going(mod); | 3731 | klp_module_going(mod); |
@@ -4169,22 +4172,23 @@ const struct exception_table_entry *search_module_extables(unsigned long addr) | |||
4169 | struct module *mod; | 4172 | struct module *mod; |
4170 | 4173 | ||
4171 | preempt_disable(); | 4174 | preempt_disable(); |
4172 | list_for_each_entry_rcu(mod, &modules, list) { | 4175 | mod = __module_address(addr); |
4173 | if (mod->state == MODULE_STATE_UNFORMED) | 4176 | if (!mod) |
4174 | continue; | 4177 | goto out; |
4175 | if (mod->num_exentries == 0) | ||
4176 | continue; | ||
4177 | 4178 | ||
4178 | e = search_extable(mod->extable, | 4179 | if (!mod->num_exentries) |
4179 | mod->extable + mod->num_exentries - 1, | 4180 | goto out; |
4180 | addr); | 4181 | |
4181 | if (e) | 4182 | e = search_extable(mod->extable, |
4182 | break; | 4183 | mod->extable + mod->num_exentries - 1, |
4183 | } | 4184 | addr); |
4185 | out: | ||
4184 | preempt_enable(); | 4186 | preempt_enable(); |
4185 | 4187 | ||
4186 | /* Now, if we found one, we are running inside it now, hence | 4188 | /* |
4187 | we cannot unload the module, hence no refcnt needed. */ | 4189 | * Now, if we found one, we are running inside it now, hence |
4190 | * we cannot unload the module, hence no refcnt needed. | ||
4191 | */ | ||
4188 | return e; | 4192 | return e; |
4189 | } | 4193 | } |
4190 | 4194 | ||
diff --git a/kernel/panic.c b/kernel/panic.c index 08aa88dde7de..b95959733ce0 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -188,7 +188,7 @@ void panic(const char *fmt, ...) | |||
188 | * Bypass the panic_cpu check and call __crash_kexec directly. | 188 | * Bypass the panic_cpu check and call __crash_kexec directly. |
189 | */ | 189 | */ |
190 | if (!_crash_kexec_post_notifiers) { | 190 | if (!_crash_kexec_post_notifiers) { |
191 | printk_nmi_flush_on_panic(); | 191 | printk_safe_flush_on_panic(); |
192 | __crash_kexec(NULL); | 192 | __crash_kexec(NULL); |
193 | 193 | ||
194 | /* | 194 | /* |
@@ -213,7 +213,7 @@ void panic(const char *fmt, ...) | |||
213 | atomic_notifier_call_chain(&panic_notifier_list, 0, buf); | 213 | atomic_notifier_call_chain(&panic_notifier_list, 0, buf); |
214 | 214 | ||
215 | /* Call flush even twice. It tries harder with a single online CPU */ | 215 | /* Call flush even twice. It tries harder with a single online CPU */ |
216 | printk_nmi_flush_on_panic(); | 216 | printk_safe_flush_on_panic(); |
217 | kmsg_dump(KMSG_DUMP_PANIC); | 217 | kmsg_dump(KMSG_DUMP_PANIC); |
218 | 218 | ||
219 | /* | 219 | /* |
diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile index abb0042a427b..4a2ffc39eb95 100644 --- a/kernel/printk/Makefile +++ b/kernel/printk/Makefile | |||
@@ -1,3 +1,3 @@ | |||
1 | obj-y = printk.o | 1 | obj-y = printk.o |
2 | obj-$(CONFIG_PRINTK_NMI) += nmi.o | 2 | obj-$(CONFIG_PRINTK) += printk_safe.o |
3 | obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o | 3 | obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o |
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 7fd2838fa417..1db044f808b7 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h | |||
@@ -16,42 +16,55 @@ | |||
16 | */ | 16 | */ |
17 | #include <linux/percpu.h> | 17 | #include <linux/percpu.h> |
18 | 18 | ||
19 | typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args); | 19 | #ifdef CONFIG_PRINTK |
20 | 20 | ||
21 | int __printf(1, 0) vprintk_default(const char *fmt, va_list args); | 21 | #define PRINTK_SAFE_CONTEXT_MASK 0x7fffffff |
22 | 22 | #define PRINTK_NMI_CONTEXT_MASK 0x80000000 | |
23 | #ifdef CONFIG_PRINTK_NMI | ||
24 | 23 | ||
25 | extern raw_spinlock_t logbuf_lock; | 24 | extern raw_spinlock_t logbuf_lock; |
26 | 25 | ||
26 | __printf(1, 0) int vprintk_default(const char *fmt, va_list args); | ||
27 | __printf(1, 0) int vprintk_func(const char *fmt, va_list args); | ||
28 | void __printk_safe_enter(void); | ||
29 | void __printk_safe_exit(void); | ||
30 | |||
31 | #define printk_safe_enter_irqsave(flags) \ | ||
32 | do { \ | ||
33 | local_irq_save(flags); \ | ||
34 | __printk_safe_enter(); \ | ||
35 | } while (0) | ||
36 | |||
37 | #define printk_safe_exit_irqrestore(flags) \ | ||
38 | do { \ | ||
39 | __printk_safe_exit(); \ | ||
40 | local_irq_restore(flags); \ | ||
41 | } while (0) | ||
42 | |||
43 | #define printk_safe_enter_irq() \ | ||
44 | do { \ | ||
45 | local_irq_disable(); \ | ||
46 | __printk_safe_enter(); \ | ||
47 | } while (0) | ||
48 | |||
49 | #define printk_safe_exit_irq() \ | ||
50 | do { \ | ||
51 | __printk_safe_exit(); \ | ||
52 | local_irq_enable(); \ | ||
53 | } while (0) | ||
54 | |||
55 | #else | ||
56 | |||
57 | __printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; } | ||
58 | |||
27 | /* | 59 | /* |
28 | * printk() could not take logbuf_lock in NMI context. Instead, | 60 | * In !PRINTK builds we still export logbuf_lock spin_lock, console_sem |
29 | * it temporary stores the strings into a per-CPU buffer. | 61 | * semaphore and some of console functions (console_unlock()/etc.), so |
30 | * The alternative implementation is chosen transparently | 62 | * printk-safe must preserve the existing local IRQ guarantees. |
31 | * via per-CPU variable. | ||
32 | */ | 63 | */ |
33 | DECLARE_PER_CPU(printk_func_t, printk_func); | 64 | #define printk_safe_enter_irqsave(flags) local_irq_save(flags) |
34 | static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args) | 65 | #define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) |
35 | { | 66 | |
36 | return this_cpu_read(printk_func)(fmt, args); | 67 | #define printk_safe_enter_irq() local_irq_disable() |
37 | } | 68 | #define printk_safe_exit_irq() local_irq_enable() |
38 | 69 | ||
39 | extern atomic_t nmi_message_lost; | 70 | #endif /* CONFIG_PRINTK */ |
40 | static inline int get_nmi_message_lost(void) | ||
41 | { | ||
42 | return atomic_xchg(&nmi_message_lost, 0); | ||
43 | } | ||
44 | |||
45 | #else /* CONFIG_PRINTK_NMI */ | ||
46 | |||
47 | static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args) | ||
48 | { | ||
49 | return vprintk_default(fmt, args); | ||
50 | } | ||
51 | |||
52 | static inline int get_nmi_message_lost(void) | ||
53 | { | ||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | #endif /* CONFIG_PRINTK_NMI */ | ||
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 4ba3d34938c0..34da86e73d00 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c | |||
@@ -213,17 +213,36 @@ static int nr_ext_console_drivers; | |||
213 | 213 | ||
214 | static int __down_trylock_console_sem(unsigned long ip) | 214 | static int __down_trylock_console_sem(unsigned long ip) |
215 | { | 215 | { |
216 | if (down_trylock(&console_sem)) | 216 | int lock_failed; |
217 | unsigned long flags; | ||
218 | |||
219 | /* | ||
220 | * Here and in __up_console_sem() we need to be in safe mode, | ||
221 | * because spindump/WARN/etc from under console ->lock will | ||
222 | * deadlock in printk()->down_trylock_console_sem() otherwise. | ||
223 | */ | ||
224 | printk_safe_enter_irqsave(flags); | ||
225 | lock_failed = down_trylock(&console_sem); | ||
226 | printk_safe_exit_irqrestore(flags); | ||
227 | |||
228 | if (lock_failed) | ||
217 | return 1; | 229 | return 1; |
218 | mutex_acquire(&console_lock_dep_map, 0, 1, ip); | 230 | mutex_acquire(&console_lock_dep_map, 0, 1, ip); |
219 | return 0; | 231 | return 0; |
220 | } | 232 | } |
221 | #define down_trylock_console_sem() __down_trylock_console_sem(_RET_IP_) | 233 | #define down_trylock_console_sem() __down_trylock_console_sem(_RET_IP_) |
222 | 234 | ||
223 | #define up_console_sem() do { \ | 235 | static void __up_console_sem(unsigned long ip) |
224 | mutex_release(&console_lock_dep_map, 1, _RET_IP_);\ | 236 | { |
225 | up(&console_sem);\ | 237 | unsigned long flags; |
226 | } while (0) | 238 | |
239 | mutex_release(&console_lock_dep_map, 1, ip); | ||
240 | |||
241 | printk_safe_enter_irqsave(flags); | ||
242 | up(&console_sem); | ||
243 | printk_safe_exit_irqrestore(flags); | ||
244 | } | ||
245 | #define up_console_sem() __up_console_sem(_RET_IP_) | ||
227 | 246 | ||
228 | /* | 247 | /* |
229 | * This is used for debugging the mess that is the VT code by | 248 | * This is used for debugging the mess that is the VT code by |
@@ -351,6 +370,34 @@ __packed __aligned(4) | |||
351 | */ | 370 | */ |
352 | DEFINE_RAW_SPINLOCK(logbuf_lock); | 371 | DEFINE_RAW_SPINLOCK(logbuf_lock); |
353 | 372 | ||
373 | /* | ||
374 | * Helper macros to lock/unlock logbuf_lock and switch between | ||
375 | * printk-safe/unsafe modes. | ||
376 | */ | ||
377 | #define logbuf_lock_irq() \ | ||
378 | do { \ | ||
379 | printk_safe_enter_irq(); \ | ||
380 | raw_spin_lock(&logbuf_lock); \ | ||
381 | } while (0) | ||
382 | |||
383 | #define logbuf_unlock_irq() \ | ||
384 | do { \ | ||
385 | raw_spin_unlock(&logbuf_lock); \ | ||
386 | printk_safe_exit_irq(); \ | ||
387 | } while (0) | ||
388 | |||
389 | #define logbuf_lock_irqsave(flags) \ | ||
390 | do { \ | ||
391 | printk_safe_enter_irqsave(flags); \ | ||
392 | raw_spin_lock(&logbuf_lock); \ | ||
393 | } while (0) | ||
394 | |||
395 | #define logbuf_unlock_irqrestore(flags) \ | ||
396 | do { \ | ||
397 | raw_spin_unlock(&logbuf_lock); \ | ||
398 | printk_safe_exit_irqrestore(flags); \ | ||
399 | } while (0) | ||
400 | |||
354 | #ifdef CONFIG_PRINTK | 401 | #ifdef CONFIG_PRINTK |
355 | DECLARE_WAIT_QUEUE_HEAD(log_wait); | 402 | DECLARE_WAIT_QUEUE_HEAD(log_wait); |
356 | /* the next printk record to read by syslog(READ) or /proc/kmsg */ | 403 | /* the next printk record to read by syslog(READ) or /proc/kmsg */ |
@@ -782,20 +829,21 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, | |||
782 | ret = mutex_lock_interruptible(&user->lock); | 829 | ret = mutex_lock_interruptible(&user->lock); |
783 | if (ret) | 830 | if (ret) |
784 | return ret; | 831 | return ret; |
785 | raw_spin_lock_irq(&logbuf_lock); | 832 | |
833 | logbuf_lock_irq(); | ||
786 | while (user->seq == log_next_seq) { | 834 | while (user->seq == log_next_seq) { |
787 | if (file->f_flags & O_NONBLOCK) { | 835 | if (file->f_flags & O_NONBLOCK) { |
788 | ret = -EAGAIN; | 836 | ret = -EAGAIN; |
789 | raw_spin_unlock_irq(&logbuf_lock); | 837 | logbuf_unlock_irq(); |
790 | goto out; | 838 | goto out; |
791 | } | 839 | } |
792 | 840 | ||
793 | raw_spin_unlock_irq(&logbuf_lock); | 841 | logbuf_unlock_irq(); |
794 | ret = wait_event_interruptible(log_wait, | 842 | ret = wait_event_interruptible(log_wait, |
795 | user->seq != log_next_seq); | 843 | user->seq != log_next_seq); |
796 | if (ret) | 844 | if (ret) |
797 | goto out; | 845 | goto out; |
798 | raw_spin_lock_irq(&logbuf_lock); | 846 | logbuf_lock_irq(); |
799 | } | 847 | } |
800 | 848 | ||
801 | if (user->seq < log_first_seq) { | 849 | if (user->seq < log_first_seq) { |
@@ -803,7 +851,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, | |||
803 | user->idx = log_first_idx; | 851 | user->idx = log_first_idx; |
804 | user->seq = log_first_seq; | 852 | user->seq = log_first_seq; |
805 | ret = -EPIPE; | 853 | ret = -EPIPE; |
806 | raw_spin_unlock_irq(&logbuf_lock); | 854 | logbuf_unlock_irq(); |
807 | goto out; | 855 | goto out; |
808 | } | 856 | } |
809 | 857 | ||
@@ -816,7 +864,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, | |||
816 | 864 | ||
817 | user->idx = log_next(user->idx); | 865 | user->idx = log_next(user->idx); |
818 | user->seq++; | 866 | user->seq++; |
819 | raw_spin_unlock_irq(&logbuf_lock); | 867 | logbuf_unlock_irq(); |
820 | 868 | ||
821 | if (len > count) { | 869 | if (len > count) { |
822 | ret = -EINVAL; | 870 | ret = -EINVAL; |
@@ -843,7 +891,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) | |||
843 | if (offset) | 891 | if (offset) |
844 | return -ESPIPE; | 892 | return -ESPIPE; |
845 | 893 | ||
846 | raw_spin_lock_irq(&logbuf_lock); | 894 | logbuf_lock_irq(); |
847 | switch (whence) { | 895 | switch (whence) { |
848 | case SEEK_SET: | 896 | case SEEK_SET: |
849 | /* the first record */ | 897 | /* the first record */ |
@@ -867,7 +915,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) | |||
867 | default: | 915 | default: |
868 | ret = -EINVAL; | 916 | ret = -EINVAL; |
869 | } | 917 | } |
870 | raw_spin_unlock_irq(&logbuf_lock); | 918 | logbuf_unlock_irq(); |
871 | return ret; | 919 | return ret; |
872 | } | 920 | } |
873 | 921 | ||
@@ -881,7 +929,7 @@ static unsigned int devkmsg_poll(struct file *file, poll_table *wait) | |||
881 | 929 | ||
882 | poll_wait(file, &log_wait, wait); | 930 | poll_wait(file, &log_wait, wait); |
883 | 931 | ||
884 | raw_spin_lock_irq(&logbuf_lock); | 932 | logbuf_lock_irq(); |
885 | if (user->seq < log_next_seq) { | 933 | if (user->seq < log_next_seq) { |
886 | /* return error when data has vanished underneath us */ | 934 | /* return error when data has vanished underneath us */ |
887 | if (user->seq < log_first_seq) | 935 | if (user->seq < log_first_seq) |
@@ -889,7 +937,7 @@ static unsigned int devkmsg_poll(struct file *file, poll_table *wait) | |||
889 | else | 937 | else |
890 | ret = POLLIN|POLLRDNORM; | 938 | ret = POLLIN|POLLRDNORM; |
891 | } | 939 | } |
892 | raw_spin_unlock_irq(&logbuf_lock); | 940 | logbuf_unlock_irq(); |
893 | 941 | ||
894 | return ret; | 942 | return ret; |
895 | } | 943 | } |
@@ -919,10 +967,10 @@ static int devkmsg_open(struct inode *inode, struct file *file) | |||
919 | 967 | ||
920 | mutex_init(&user->lock); | 968 | mutex_init(&user->lock); |
921 | 969 | ||
922 | raw_spin_lock_irq(&logbuf_lock); | 970 | logbuf_lock_irq(); |
923 | user->idx = log_first_idx; | 971 | user->idx = log_first_idx; |
924 | user->seq = log_first_seq; | 972 | user->seq = log_first_seq; |
925 | raw_spin_unlock_irq(&logbuf_lock); | 973 | logbuf_unlock_irq(); |
926 | 974 | ||
927 | file->private_data = user; | 975 | file->private_data = user; |
928 | return 0; | 976 | return 0; |
@@ -1064,13 +1112,13 @@ void __init setup_log_buf(int early) | |||
1064 | return; | 1112 | return; |
1065 | } | 1113 | } |
1066 | 1114 | ||
1067 | raw_spin_lock_irqsave(&logbuf_lock, flags); | 1115 | logbuf_lock_irqsave(flags); |
1068 | log_buf_len = new_log_buf_len; | 1116 | log_buf_len = new_log_buf_len; |
1069 | log_buf = new_log_buf; | 1117 | log_buf = new_log_buf; |
1070 | new_log_buf_len = 0; | 1118 | new_log_buf_len = 0; |
1071 | free = __LOG_BUF_LEN - log_next_idx; | 1119 | free = __LOG_BUF_LEN - log_next_idx; |
1072 | memcpy(log_buf, __log_buf, __LOG_BUF_LEN); | 1120 | memcpy(log_buf, __log_buf, __LOG_BUF_LEN); |
1073 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); | 1121 | logbuf_unlock_irqrestore(flags); |
1074 | 1122 | ||
1075 | pr_info("log_buf_len: %d bytes\n", log_buf_len); | 1123 | pr_info("log_buf_len: %d bytes\n", log_buf_len); |
1076 | pr_info("early log buf free: %d(%d%%)\n", | 1124 | pr_info("early log buf free: %d(%d%%)\n", |
@@ -1248,7 +1296,7 @@ static int syslog_print(char __user *buf, int size) | |||
1248 | size_t n; | 1296 | size_t n; |
1249 | size_t skip; | 1297 | size_t skip; |
1250 | 1298 | ||
1251 | raw_spin_lock_irq(&logbuf_lock); | 1299 | logbuf_lock_irq(); |
1252 | if (syslog_seq < log_first_seq) { | 1300 | if (syslog_seq < log_first_seq) { |
1253 | /* messages are gone, move to first one */ | 1301 | /* messages are gone, move to first one */ |
1254 | syslog_seq = log_first_seq; | 1302 | syslog_seq = log_first_seq; |
@@ -1256,7 +1304,7 @@ static int syslog_print(char __user *buf, int size) | |||
1256 | syslog_partial = 0; | 1304 | syslog_partial = 0; |
1257 | } | 1305 | } |
1258 | if (syslog_seq == log_next_seq) { | 1306 | if (syslog_seq == log_next_seq) { |
1259 | raw_spin_unlock_irq(&logbuf_lock); | 1307 | logbuf_unlock_irq(); |
1260 | break; | 1308 | break; |
1261 | } | 1309 | } |
1262 | 1310 | ||
@@ -1275,7 +1323,7 @@ static int syslog_print(char __user *buf, int size) | |||
1275 | syslog_partial += n; | 1323 | syslog_partial += n; |
1276 | } else | 1324 | } else |
1277 | n = 0; | 1325 | n = 0; |
1278 | raw_spin_unlock_irq(&logbuf_lock); | 1326 | logbuf_unlock_irq(); |
1279 | 1327 | ||
1280 | if (!n) | 1328 | if (!n) |
1281 | break; | 1329 | break; |
@@ -1304,7 +1352,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |||
1304 | if (!text) | 1352 | if (!text) |
1305 | return -ENOMEM; | 1353 | return -ENOMEM; |
1306 | 1354 | ||
1307 | raw_spin_lock_irq(&logbuf_lock); | 1355 | logbuf_lock_irq(); |
1308 | if (buf) { | 1356 | if (buf) { |
1309 | u64 next_seq; | 1357 | u64 next_seq; |
1310 | u64 seq; | 1358 | u64 seq; |
@@ -1352,12 +1400,12 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |||
1352 | idx = log_next(idx); | 1400 | idx = log_next(idx); |
1353 | seq++; | 1401 | seq++; |
1354 | 1402 | ||
1355 | raw_spin_unlock_irq(&logbuf_lock); | 1403 | logbuf_unlock_irq(); |
1356 | if (copy_to_user(buf + len, text, textlen)) | 1404 | if (copy_to_user(buf + len, text, textlen)) |
1357 | len = -EFAULT; | 1405 | len = -EFAULT; |
1358 | else | 1406 | else |
1359 | len += textlen; | 1407 | len += textlen; |
1360 | raw_spin_lock_irq(&logbuf_lock); | 1408 | logbuf_lock_irq(); |
1361 | 1409 | ||
1362 | if (seq < log_first_seq) { | 1410 | if (seq < log_first_seq) { |
1363 | /* messages are gone, move to next one */ | 1411 | /* messages are gone, move to next one */ |
@@ -1371,7 +1419,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |||
1371 | clear_seq = log_next_seq; | 1419 | clear_seq = log_next_seq; |
1372 | clear_idx = log_next_idx; | 1420 | clear_idx = log_next_idx; |
1373 | } | 1421 | } |
1374 | raw_spin_unlock_irq(&logbuf_lock); | 1422 | logbuf_unlock_irq(); |
1375 | 1423 | ||
1376 | kfree(text); | 1424 | kfree(text); |
1377 | return len; | 1425 | return len; |
@@ -1458,7 +1506,7 @@ int do_syslog(int type, char __user *buf, int len, int source) | |||
1458 | break; | 1506 | break; |
1459 | /* Number of chars in the log buffer */ | 1507 | /* Number of chars in the log buffer */ |
1460 | case SYSLOG_ACTION_SIZE_UNREAD: | 1508 | case SYSLOG_ACTION_SIZE_UNREAD: |
1461 | raw_spin_lock_irq(&logbuf_lock); | 1509 | logbuf_lock_irq(); |
1462 | if (syslog_seq < log_first_seq) { | 1510 | if (syslog_seq < log_first_seq) { |
1463 | /* messages are gone, move to first one */ | 1511 | /* messages are gone, move to first one */ |
1464 | syslog_seq = log_first_seq; | 1512 | syslog_seq = log_first_seq; |
@@ -1486,7 +1534,7 @@ int do_syslog(int type, char __user *buf, int len, int source) | |||
1486 | } | 1534 | } |
1487 | error -= syslog_partial; | 1535 | error -= syslog_partial; |
1488 | } | 1536 | } |
1489 | raw_spin_unlock_irq(&logbuf_lock); | 1537 | logbuf_unlock_irq(); |
1490 | break; | 1538 | break; |
1491 | /* Size of the log buffer */ | 1539 | /* Size of the log buffer */ |
1492 | case SYSLOG_ACTION_SIZE_BUFFER: | 1540 | case SYSLOG_ACTION_SIZE_BUFFER: |
@@ -1510,8 +1558,7 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) | |||
1510 | * log_buf[start] to log_buf[end - 1]. | 1558 | * log_buf[start] to log_buf[end - 1]. |
1511 | * The console_lock must be held. | 1559 | * The console_lock must be held. |
1512 | */ | 1560 | */ |
1513 | static void call_console_drivers(int level, | 1561 | static void call_console_drivers(const char *ext_text, size_t ext_len, |
1514 | const char *ext_text, size_t ext_len, | ||
1515 | const char *text, size_t len) | 1562 | const char *text, size_t len) |
1516 | { | 1563 | { |
1517 | struct console *con; | 1564 | struct console *con; |
@@ -1538,28 +1585,6 @@ static void call_console_drivers(int level, | |||
1538 | } | 1585 | } |
1539 | } | 1586 | } |
1540 | 1587 | ||
1541 | /* | ||
1542 | * Zap console related locks when oopsing. | ||
1543 | * To leave time for slow consoles to print a full oops, | ||
1544 | * only zap at most once every 30 seconds. | ||
1545 | */ | ||
1546 | static void zap_locks(void) | ||
1547 | { | ||
1548 | static unsigned long oops_timestamp; | ||
1549 | |||
1550 | if (time_after_eq(jiffies, oops_timestamp) && | ||
1551 | !time_after(jiffies, oops_timestamp + 30 * HZ)) | ||
1552 | return; | ||
1553 | |||
1554 | oops_timestamp = jiffies; | ||
1555 | |||
1556 | debug_locks_off(); | ||
1557 | /* If a crash is occurring, make sure we can't deadlock */ | ||
1558 | raw_spin_lock_init(&logbuf_lock); | ||
1559 | /* And make sure that we print immediately */ | ||
1560 | sema_init(&console_sem, 1); | ||
1561 | } | ||
1562 | |||
1563 | int printk_delay_msec __read_mostly; | 1588 | int printk_delay_msec __read_mostly; |
1564 | 1589 | ||
1565 | static inline void printk_delay(void) | 1590 | static inline void printk_delay(void) |
@@ -1669,18 +1694,13 @@ asmlinkage int vprintk_emit(int facility, int level, | |||
1669 | const char *dict, size_t dictlen, | 1694 | const char *dict, size_t dictlen, |
1670 | const char *fmt, va_list args) | 1695 | const char *fmt, va_list args) |
1671 | { | 1696 | { |
1672 | static bool recursion_bug; | ||
1673 | static char textbuf[LOG_LINE_MAX]; | 1697 | static char textbuf[LOG_LINE_MAX]; |
1674 | char *text = textbuf; | 1698 | char *text = textbuf; |
1675 | size_t text_len = 0; | 1699 | size_t text_len = 0; |
1676 | enum log_flags lflags = 0; | 1700 | enum log_flags lflags = 0; |
1677 | unsigned long flags; | 1701 | unsigned long flags; |
1678 | int this_cpu; | ||
1679 | int printed_len = 0; | 1702 | int printed_len = 0; |
1680 | int nmi_message_lost; | ||
1681 | bool in_sched = false; | 1703 | bool in_sched = false; |
1682 | /* cpu currently holding logbuf_lock in this function */ | ||
1683 | static unsigned int logbuf_cpu = UINT_MAX; | ||
1684 | 1704 | ||
1685 | if (level == LOGLEVEL_SCHED) { | 1705 | if (level == LOGLEVEL_SCHED) { |
1686 | level = LOGLEVEL_DEFAULT; | 1706 | level = LOGLEVEL_DEFAULT; |
@@ -1690,53 +1710,8 @@ asmlinkage int vprintk_emit(int facility, int level, | |||
1690 | boot_delay_msec(level); | 1710 | boot_delay_msec(level); |
1691 | printk_delay(); | 1711 | printk_delay(); |
1692 | 1712 | ||
1693 | local_irq_save(flags); | ||
1694 | this_cpu = smp_processor_id(); | ||
1695 | |||
1696 | /* | ||
1697 | * Ouch, printk recursed into itself! | ||
1698 | */ | ||
1699 | if (unlikely(logbuf_cpu == this_cpu)) { | ||
1700 | /* | ||
1701 | * If a crash is occurring during printk() on this CPU, | ||
1702 | * then try to get the crash message out but make sure | ||
1703 | * we can't deadlock. Otherwise just return to avoid the | ||
1704 | * recursion and return - but flag the recursion so that | ||
1705 | * it can be printed at the next appropriate moment: | ||
1706 | */ | ||
1707 | if (!oops_in_progress && !lockdep_recursing(current)) { | ||
1708 | recursion_bug = true; | ||
1709 | local_irq_restore(flags); | ||
1710 | return 0; | ||
1711 | } | ||
1712 | zap_locks(); | ||
1713 | } | ||
1714 | |||
1715 | lockdep_off(); | ||
1716 | /* This stops the holder of console_sem just where we want him */ | 1713 | /* This stops the holder of console_sem just where we want him */ |
1717 | raw_spin_lock(&logbuf_lock); | 1714 | logbuf_lock_irqsave(flags); |
1718 | logbuf_cpu = this_cpu; | ||
1719 | |||
1720 | if (unlikely(recursion_bug)) { | ||
1721 | static const char recursion_msg[] = | ||
1722 | "BUG: recent printk recursion!"; | ||
1723 | |||
1724 | recursion_bug = false; | ||
1725 | /* emit KERN_CRIT message */ | ||
1726 | printed_len += log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0, | ||
1727 | NULL, 0, recursion_msg, | ||
1728 | strlen(recursion_msg)); | ||
1729 | } | ||
1730 | |||
1731 | nmi_message_lost = get_nmi_message_lost(); | ||
1732 | if (unlikely(nmi_message_lost)) { | ||
1733 | text_len = scnprintf(textbuf, sizeof(textbuf), | ||
1734 | "BAD LUCK: lost %d message(s) from NMI context!", | ||
1735 | nmi_message_lost); | ||
1736 | printed_len += log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0, | ||
1737 | NULL, 0, textbuf, text_len); | ||
1738 | } | ||
1739 | |||
1740 | /* | 1715 | /* |
1741 | * The printf needs to come first; we need the syslog | 1716 | * The printf needs to come first; we need the syslog |
1742 | * prefix which might be passed-in as a parameter. | 1717 | * prefix which might be passed-in as a parameter. |
@@ -1779,14 +1754,10 @@ asmlinkage int vprintk_emit(int facility, int level, | |||
1779 | 1754 | ||
1780 | printed_len += log_output(facility, level, lflags, dict, dictlen, text, text_len); | 1755 | printed_len += log_output(facility, level, lflags, dict, dictlen, text, text_len); |
1781 | 1756 | ||
1782 | logbuf_cpu = UINT_MAX; | 1757 | logbuf_unlock_irqrestore(flags); |
1783 | raw_spin_unlock(&logbuf_lock); | ||
1784 | lockdep_on(); | ||
1785 | local_irq_restore(flags); | ||
1786 | 1758 | ||
1787 | /* If called from the scheduler, we can not call up(). */ | 1759 | /* If called from the scheduler, we can not call up(). */ |
1788 | if (!in_sched) { | 1760 | if (!in_sched) { |
1789 | lockdep_off(); | ||
1790 | /* | 1761 | /* |
1791 | * Try to acquire and then immediately release the console | 1762 | * Try to acquire and then immediately release the console |
1792 | * semaphore. The release will print out buffers and wake up | 1763 | * semaphore. The release will print out buffers and wake up |
@@ -1794,7 +1765,6 @@ asmlinkage int vprintk_emit(int facility, int level, | |||
1794 | */ | 1765 | */ |
1795 | if (console_trylock()) | 1766 | if (console_trylock()) |
1796 | console_unlock(); | 1767 | console_unlock(); |
1797 | lockdep_on(); | ||
1798 | } | 1768 | } |
1799 | 1769 | ||
1800 | return printed_len; | 1770 | return printed_len; |
@@ -1803,7 +1773,7 @@ EXPORT_SYMBOL(vprintk_emit); | |||
1803 | 1773 | ||
1804 | asmlinkage int vprintk(const char *fmt, va_list args) | 1774 | asmlinkage int vprintk(const char *fmt, va_list args) |
1805 | { | 1775 | { |
1806 | return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args); | 1776 | return vprintk_func(fmt, args); |
1807 | } | 1777 | } |
1808 | EXPORT_SYMBOL(vprintk); | 1778 | EXPORT_SYMBOL(vprintk); |
1809 | 1779 | ||
@@ -1895,16 +1865,12 @@ static ssize_t msg_print_ext_header(char *buf, size_t size, | |||
1895 | static ssize_t msg_print_ext_body(char *buf, size_t size, | 1865 | static ssize_t msg_print_ext_body(char *buf, size_t size, |
1896 | char *dict, size_t dict_len, | 1866 | char *dict, size_t dict_len, |
1897 | char *text, size_t text_len) { return 0; } | 1867 | char *text, size_t text_len) { return 0; } |
1898 | static void call_console_drivers(int level, | 1868 | static void call_console_drivers(const char *ext_text, size_t ext_len, |
1899 | const char *ext_text, size_t ext_len, | ||
1900 | const char *text, size_t len) {} | 1869 | const char *text, size_t len) {} |
1901 | static size_t msg_print_text(const struct printk_log *msg, | 1870 | static size_t msg_print_text(const struct printk_log *msg, |
1902 | bool syslog, char *buf, size_t size) { return 0; } | 1871 | bool syslog, char *buf, size_t size) { return 0; } |
1903 | static bool suppress_message_printing(int level) { return false; } | 1872 | static bool suppress_message_printing(int level) { return false; } |
1904 | 1873 | ||
1905 | /* Still needs to be defined for users */ | ||
1906 | DEFINE_PER_CPU(printk_func_t, printk_func); | ||
1907 | |||
1908 | #endif /* CONFIG_PRINTK */ | 1874 | #endif /* CONFIG_PRINTK */ |
1909 | 1875 | ||
1910 | #ifdef CONFIG_EARLY_PRINTK | 1876 | #ifdef CONFIG_EARLY_PRINTK |
@@ -2220,9 +2186,9 @@ again: | |||
2220 | struct printk_log *msg; | 2186 | struct printk_log *msg; |
2221 | size_t ext_len = 0; | 2187 | size_t ext_len = 0; |
2222 | size_t len; | 2188 | size_t len; |
2223 | int level; | ||
2224 | 2189 | ||
2225 | raw_spin_lock_irqsave(&logbuf_lock, flags); | 2190 | printk_safe_enter_irqsave(flags); |
2191 | raw_spin_lock(&logbuf_lock); | ||
2226 | if (seen_seq != log_next_seq) { | 2192 | if (seen_seq != log_next_seq) { |
2227 | wake_klogd = true; | 2193 | wake_klogd = true; |
2228 | seen_seq = log_next_seq; | 2194 | seen_seq = log_next_seq; |
@@ -2243,8 +2209,7 @@ skip: | |||
2243 | break; | 2209 | break; |
2244 | 2210 | ||
2245 | msg = log_from_idx(console_idx); | 2211 | msg = log_from_idx(console_idx); |
2246 | level = msg->level; | 2212 | if (suppress_message_printing(msg->level)) { |
2247 | if (suppress_message_printing(level)) { | ||
2248 | /* | 2213 | /* |
2249 | * Skip record we have buffered and already printed | 2214 | * Skip record we have buffered and already printed |
2250 | * directly to the console when we received it, and | 2215 | * directly to the console when we received it, and |
@@ -2270,9 +2235,9 @@ skip: | |||
2270 | raw_spin_unlock(&logbuf_lock); | 2235 | raw_spin_unlock(&logbuf_lock); |
2271 | 2236 | ||
2272 | stop_critical_timings(); /* don't trace print latency */ | 2237 | stop_critical_timings(); /* don't trace print latency */ |
2273 | call_console_drivers(level, ext_text, ext_len, text, len); | 2238 | call_console_drivers(ext_text, ext_len, text, len); |
2274 | start_critical_timings(); | 2239 | start_critical_timings(); |
2275 | local_irq_restore(flags); | 2240 | printk_safe_exit_irqrestore(flags); |
2276 | 2241 | ||
2277 | if (do_cond_resched) | 2242 | if (do_cond_resched) |
2278 | cond_resched(); | 2243 | cond_resched(); |
@@ -2295,7 +2260,8 @@ skip: | |||
2295 | */ | 2260 | */ |
2296 | raw_spin_lock(&logbuf_lock); | 2261 | raw_spin_lock(&logbuf_lock); |
2297 | retry = console_seq != log_next_seq; | 2262 | retry = console_seq != log_next_seq; |
2298 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); | 2263 | raw_spin_unlock(&logbuf_lock); |
2264 | printk_safe_exit_irqrestore(flags); | ||
2299 | 2265 | ||
2300 | if (retry && console_trylock()) | 2266 | if (retry && console_trylock()) |
2301 | goto again; | 2267 | goto again; |
@@ -2558,10 +2524,10 @@ void register_console(struct console *newcon) | |||
2558 | * console_unlock(); will print out the buffered messages | 2524 | * console_unlock(); will print out the buffered messages |
2559 | * for us. | 2525 | * for us. |
2560 | */ | 2526 | */ |
2561 | raw_spin_lock_irqsave(&logbuf_lock, flags); | 2527 | logbuf_lock_irqsave(flags); |
2562 | console_seq = syslog_seq; | 2528 | console_seq = syslog_seq; |
2563 | console_idx = syslog_idx; | 2529 | console_idx = syslog_idx; |
2564 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); | 2530 | logbuf_unlock_irqrestore(flags); |
2565 | /* | 2531 | /* |
2566 | * We're about to replay the log buffer. Only do this to the | 2532 | * We're about to replay the log buffer. Only do this to the |
2567 | * just-registered console to avoid excessive message spam to | 2533 | * just-registered console to avoid excessive message spam to |
@@ -2860,12 +2826,12 @@ void kmsg_dump(enum kmsg_dump_reason reason) | |||
2860 | /* initialize iterator with data about the stored records */ | 2826 | /* initialize iterator with data about the stored records */ |
2861 | dumper->active = true; | 2827 | dumper->active = true; |
2862 | 2828 | ||
2863 | raw_spin_lock_irqsave(&logbuf_lock, flags); | 2829 | logbuf_lock_irqsave(flags); |
2864 | dumper->cur_seq = clear_seq; | 2830 | dumper->cur_seq = clear_seq; |
2865 | dumper->cur_idx = clear_idx; | 2831 | dumper->cur_idx = clear_idx; |
2866 | dumper->next_seq = log_next_seq; | 2832 | dumper->next_seq = log_next_seq; |
2867 | dumper->next_idx = log_next_idx; | 2833 | dumper->next_idx = log_next_idx; |
2868 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); | 2834 | logbuf_unlock_irqrestore(flags); |
2869 | 2835 | ||
2870 | /* invoke dumper which will iterate over records */ | 2836 | /* invoke dumper which will iterate over records */ |
2871 | dumper->dump(dumper, reason); | 2837 | dumper->dump(dumper, reason); |
@@ -2950,9 +2916,9 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, | |||
2950 | unsigned long flags; | 2916 | unsigned long flags; |
2951 | bool ret; | 2917 | bool ret; |
2952 | 2918 | ||
2953 | raw_spin_lock_irqsave(&logbuf_lock, flags); | 2919 | logbuf_lock_irqsave(flags); |
2954 | ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len); | 2920 | ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len); |
2955 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); | 2921 | logbuf_unlock_irqrestore(flags); |
2956 | 2922 | ||
2957 | return ret; | 2923 | return ret; |
2958 | } | 2924 | } |
@@ -2991,7 +2957,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, | |||
2991 | if (!dumper->active) | 2957 | if (!dumper->active) |
2992 | goto out; | 2958 | goto out; |
2993 | 2959 | ||
2994 | raw_spin_lock_irqsave(&logbuf_lock, flags); | 2960 | logbuf_lock_irqsave(flags); |
2995 | if (dumper->cur_seq < log_first_seq) { | 2961 | if (dumper->cur_seq < log_first_seq) { |
2996 | /* messages are gone, move to first available one */ | 2962 | /* messages are gone, move to first available one */ |
2997 | dumper->cur_seq = log_first_seq; | 2963 | dumper->cur_seq = log_first_seq; |
@@ -3000,7 +2966,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, | |||
3000 | 2966 | ||
3001 | /* last entry */ | 2967 | /* last entry */ |
3002 | if (dumper->cur_seq >= dumper->next_seq) { | 2968 | if (dumper->cur_seq >= dumper->next_seq) { |
3003 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); | 2969 | logbuf_unlock_irqrestore(flags); |
3004 | goto out; | 2970 | goto out; |
3005 | } | 2971 | } |
3006 | 2972 | ||
@@ -3042,7 +3008,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, | |||
3042 | dumper->next_seq = next_seq; | 3008 | dumper->next_seq = next_seq; |
3043 | dumper->next_idx = next_idx; | 3009 | dumper->next_idx = next_idx; |
3044 | ret = true; | 3010 | ret = true; |
3045 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); | 3011 | logbuf_unlock_irqrestore(flags); |
3046 | out: | 3012 | out: |
3047 | if (len) | 3013 | if (len) |
3048 | *len = l; | 3014 | *len = l; |
@@ -3080,9 +3046,9 @@ void kmsg_dump_rewind(struct kmsg_dumper *dumper) | |||
3080 | { | 3046 | { |
3081 | unsigned long flags; | 3047 | unsigned long flags; |
3082 | 3048 | ||
3083 | raw_spin_lock_irqsave(&logbuf_lock, flags); | 3049 | logbuf_lock_irqsave(flags); |
3084 | kmsg_dump_rewind_nolock(dumper); | 3050 | kmsg_dump_rewind_nolock(dumper); |
3085 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); | 3051 | logbuf_unlock_irqrestore(flags); |
3086 | } | 3052 | } |
3087 | EXPORT_SYMBOL_GPL(kmsg_dump_rewind); | 3053 | EXPORT_SYMBOL_GPL(kmsg_dump_rewind); |
3088 | 3054 | ||
diff --git a/kernel/printk/nmi.c b/kernel/printk/printk_safe.c index f011aaef583c..033e50a7d706 100644 --- a/kernel/printk/nmi.c +++ b/kernel/printk/printk_safe.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * nmi.c - Safe printk in NMI context | 2 | * printk_safe.c - Safe printk for printk-deadlock-prone contexts |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or | 4 | * This program is free software; you can redistribute it and/or |
5 | * modify it under the terms of the GNU General Public License | 5 | * modify it under the terms of the GNU General Public License |
@@ -32,36 +32,58 @@ | |||
32 | * is later flushed into the main ring buffer via IRQ work. | 32 | * is later flushed into the main ring buffer via IRQ work. |
33 | * | 33 | * |
34 | * The alternative implementation is chosen transparently | 34 | * The alternative implementation is chosen transparently |
35 | * via @printk_func per-CPU variable. | 35 | * by examinig current printk() context mask stored in @printk_context |
36 | * per-CPU variable. | ||
36 | * | 37 | * |
37 | * The implementation allows to flush the strings also from another CPU. | 38 | * The implementation allows to flush the strings also from another CPU. |
38 | * There are situations when we want to make sure that all buffers | 39 | * There are situations when we want to make sure that all buffers |
39 | * were handled or when IRQs are blocked. | 40 | * were handled or when IRQs are blocked. |
40 | */ | 41 | */ |
41 | DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default; | 42 | static int printk_safe_irq_ready; |
42 | static int printk_nmi_irq_ready; | ||
43 | atomic_t nmi_message_lost; | ||
44 | 43 | ||
45 | #define NMI_LOG_BUF_LEN ((1 << CONFIG_NMI_LOG_BUF_SHIFT) - \ | 44 | #define SAFE_LOG_BUF_LEN ((1 << CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT) - \ |
46 | sizeof(atomic_t) - sizeof(struct irq_work)) | 45 | sizeof(atomic_t) - \ |
46 | sizeof(atomic_t) - \ | ||
47 | sizeof(struct irq_work)) | ||
47 | 48 | ||
48 | struct nmi_seq_buf { | 49 | struct printk_safe_seq_buf { |
49 | atomic_t len; /* length of written data */ | 50 | atomic_t len; /* length of written data */ |
51 | atomic_t message_lost; | ||
50 | struct irq_work work; /* IRQ work that flushes the buffer */ | 52 | struct irq_work work; /* IRQ work that flushes the buffer */ |
51 | unsigned char buffer[NMI_LOG_BUF_LEN]; | 53 | unsigned char buffer[SAFE_LOG_BUF_LEN]; |
52 | }; | 54 | }; |
53 | static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq); | 55 | |
56 | static DEFINE_PER_CPU(struct printk_safe_seq_buf, safe_print_seq); | ||
57 | static DEFINE_PER_CPU(int, printk_context); | ||
58 | |||
59 | #ifdef CONFIG_PRINTK_NMI | ||
60 | static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq); | ||
61 | #endif | ||
62 | |||
63 | /* Get flushed in a more safe context. */ | ||
64 | static void queue_flush_work(struct printk_safe_seq_buf *s) | ||
65 | { | ||
66 | if (printk_safe_irq_ready) { | ||
67 | /* Make sure that IRQ work is really initialized. */ | ||
68 | smp_rmb(); | ||
69 | irq_work_queue(&s->work); | ||
70 | } | ||
71 | } | ||
54 | 72 | ||
55 | /* | 73 | /* |
56 | * Safe printk() for NMI context. It uses a per-CPU buffer to | 74 | * Add a message to per-CPU context-dependent buffer. NMI and printk-safe |
57 | * store the message. NMIs are not nested, so there is always only | 75 | * have dedicated buffers, because otherwise printk-safe preempted by |
58 | * one writer running. But the buffer might get flushed from another | 76 | * NMI-printk would have overwritten the NMI messages. |
59 | * CPU, so we need to be careful. | 77 | * |
78 | * The messages are fushed from irq work (or from panic()), possibly, | ||
79 | * from other CPU, concurrently with printk_safe_log_store(). Should this | ||
80 | * happen, printk_safe_log_store() will notice the buffer->len mismatch | ||
81 | * and repeat the write. | ||
60 | */ | 82 | */ |
61 | static int vprintk_nmi(const char *fmt, va_list args) | 83 | static int printk_safe_log_store(struct printk_safe_seq_buf *s, |
84 | const char *fmt, va_list args) | ||
62 | { | 85 | { |
63 | struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq); | 86 | int add; |
64 | int add = 0; | ||
65 | size_t len; | 87 | size_t len; |
66 | 88 | ||
67 | again: | 89 | again: |
@@ -69,18 +91,21 @@ again: | |||
69 | 91 | ||
70 | /* The trailing '\0' is not counted into len. */ | 92 | /* The trailing '\0' is not counted into len. */ |
71 | if (len >= sizeof(s->buffer) - 1) { | 93 | if (len >= sizeof(s->buffer) - 1) { |
72 | atomic_inc(&nmi_message_lost); | 94 | atomic_inc(&s->message_lost); |
95 | queue_flush_work(s); | ||
73 | return 0; | 96 | return 0; |
74 | } | 97 | } |
75 | 98 | ||
76 | /* | 99 | /* |
77 | * Make sure that all old data have been read before the buffer was | 100 | * Make sure that all old data have been read before the buffer |
78 | * reseted. This is not needed when we just append data. | 101 | * was reset. This is not needed when we just append data. |
79 | */ | 102 | */ |
80 | if (!len) | 103 | if (!len) |
81 | smp_rmb(); | 104 | smp_rmb(); |
82 | 105 | ||
83 | add = vscnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, args); | 106 | add = vscnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, args); |
107 | if (!add) | ||
108 | return 0; | ||
84 | 109 | ||
85 | /* | 110 | /* |
86 | * Do it once again if the buffer has been flushed in the meantime. | 111 | * Do it once again if the buffer has been flushed in the meantime. |
@@ -90,32 +115,23 @@ again: | |||
90 | if (atomic_cmpxchg(&s->len, len, len + add) != len) | 115 | if (atomic_cmpxchg(&s->len, len, len + add) != len) |
91 | goto again; | 116 | goto again; |
92 | 117 | ||
93 | /* Get flushed in a more safe context. */ | 118 | queue_flush_work(s); |
94 | if (add && printk_nmi_irq_ready) { | ||
95 | /* Make sure that IRQ work is really initialized. */ | ||
96 | smp_rmb(); | ||
97 | irq_work_queue(&s->work); | ||
98 | } | ||
99 | |||
100 | return add; | 119 | return add; |
101 | } | 120 | } |
102 | 121 | ||
103 | static void printk_nmi_flush_line(const char *text, int len) | 122 | static inline void printk_safe_flush_line(const char *text, int len) |
104 | { | 123 | { |
105 | /* | 124 | /* |
106 | * The buffers are flushed in NMI only on panic. The messages must | 125 | * Avoid any console drivers calls from here, because we may be |
107 | * go only into the ring buffer at this stage. Consoles will get | 126 | * in NMI or printk_safe context (when in panic). The messages |
108 | * explicitly called later when a crashdump is not generated. | 127 | * must go only into the ring buffer at this stage. Consoles will |
128 | * get explicitly called later when a crashdump is not generated. | ||
109 | */ | 129 | */ |
110 | if (in_nmi()) | 130 | printk_deferred("%.*s", len, text); |
111 | printk_deferred("%.*s", len, text); | ||
112 | else | ||
113 | printk("%.*s", len, text); | ||
114 | |||
115 | } | 131 | } |
116 | 132 | ||
117 | /* printk part of the temporary buffer line by line */ | 133 | /* printk part of the temporary buffer line by line */ |
118 | static int printk_nmi_flush_buffer(const char *start, size_t len) | 134 | static int printk_safe_flush_buffer(const char *start, size_t len) |
119 | { | 135 | { |
120 | const char *c, *end; | 136 | const char *c, *end; |
121 | bool header; | 137 | bool header; |
@@ -127,7 +143,7 @@ static int printk_nmi_flush_buffer(const char *start, size_t len) | |||
127 | /* Print line by line. */ | 143 | /* Print line by line. */ |
128 | while (c < end) { | 144 | while (c < end) { |
129 | if (*c == '\n') { | 145 | if (*c == '\n') { |
130 | printk_nmi_flush_line(start, c - start + 1); | 146 | printk_safe_flush_line(start, c - start + 1); |
131 | start = ++c; | 147 | start = ++c; |
132 | header = true; | 148 | header = true; |
133 | continue; | 149 | continue; |
@@ -140,7 +156,7 @@ static int printk_nmi_flush_buffer(const char *start, size_t len) | |||
140 | continue; | 156 | continue; |
141 | } | 157 | } |
142 | 158 | ||
143 | printk_nmi_flush_line(start, c - start); | 159 | printk_safe_flush_line(start, c - start); |
144 | start = c++; | 160 | start = c++; |
145 | header = true; | 161 | header = true; |
146 | continue; | 162 | continue; |
@@ -154,22 +170,31 @@ static int printk_nmi_flush_buffer(const char *start, size_t len) | |||
154 | if (start < end && !header) { | 170 | if (start < end && !header) { |
155 | static const char newline[] = KERN_CONT "\n"; | 171 | static const char newline[] = KERN_CONT "\n"; |
156 | 172 | ||
157 | printk_nmi_flush_line(start, end - start); | 173 | printk_safe_flush_line(start, end - start); |
158 | printk_nmi_flush_line(newline, strlen(newline)); | 174 | printk_safe_flush_line(newline, strlen(newline)); |
159 | } | 175 | } |
160 | 176 | ||
161 | return len; | 177 | return len; |
162 | } | 178 | } |
163 | 179 | ||
180 | static void report_message_lost(struct printk_safe_seq_buf *s) | ||
181 | { | ||
182 | int lost = atomic_xchg(&s->message_lost, 0); | ||
183 | |||
184 | if (lost) | ||
185 | printk_deferred("Lost %d message(s)!\n", lost); | ||
186 | } | ||
187 | |||
164 | /* | 188 | /* |
165 | * Flush data from the associated per_CPU buffer. The function | 189 | * Flush data from the associated per-CPU buffer. The function |
166 | * can be called either via IRQ work or independently. | 190 | * can be called either via IRQ work or independently. |
167 | */ | 191 | */ |
168 | static void __printk_nmi_flush(struct irq_work *work) | 192 | static void __printk_safe_flush(struct irq_work *work) |
169 | { | 193 | { |
170 | static raw_spinlock_t read_lock = | 194 | static raw_spinlock_t read_lock = |
171 | __RAW_SPIN_LOCK_INITIALIZER(read_lock); | 195 | __RAW_SPIN_LOCK_INITIALIZER(read_lock); |
172 | struct nmi_seq_buf *s = container_of(work, struct nmi_seq_buf, work); | 196 | struct printk_safe_seq_buf *s = |
197 | container_of(work, struct printk_safe_seq_buf, work); | ||
173 | unsigned long flags; | 198 | unsigned long flags; |
174 | size_t len; | 199 | size_t len; |
175 | int i; | 200 | int i; |
@@ -194,9 +219,9 @@ more: | |||
194 | * buffer size. | 219 | * buffer size. |
195 | */ | 220 | */ |
196 | if ((i && i >= len) || len > sizeof(s->buffer)) { | 221 | if ((i && i >= len) || len > sizeof(s->buffer)) { |
197 | const char *msg = "printk_nmi_flush: internal error\n"; | 222 | const char *msg = "printk_safe_flush: internal error\n"; |
198 | 223 | ||
199 | printk_nmi_flush_line(msg, strlen(msg)); | 224 | printk_safe_flush_line(msg, strlen(msg)); |
200 | len = 0; | 225 | len = 0; |
201 | } | 226 | } |
202 | 227 | ||
@@ -205,7 +230,7 @@ more: | |||
205 | 230 | ||
206 | /* Make sure that data has been written up to the @len */ | 231 | /* Make sure that data has been written up to the @len */ |
207 | smp_rmb(); | 232 | smp_rmb(); |
208 | i += printk_nmi_flush_buffer(s->buffer + i, len - i); | 233 | i += printk_safe_flush_buffer(s->buffer + i, len - i); |
209 | 234 | ||
210 | /* | 235 | /* |
211 | * Check that nothing has got added in the meantime and truncate | 236 | * Check that nothing has got added in the meantime and truncate |
@@ -217,35 +242,40 @@ more: | |||
217 | goto more; | 242 | goto more; |
218 | 243 | ||
219 | out: | 244 | out: |
245 | report_message_lost(s); | ||
220 | raw_spin_unlock_irqrestore(&read_lock, flags); | 246 | raw_spin_unlock_irqrestore(&read_lock, flags); |
221 | } | 247 | } |
222 | 248 | ||
223 | /** | 249 | /** |
224 | * printk_nmi_flush - flush all per-cpu nmi buffers. | 250 | * printk_safe_flush - flush all per-cpu nmi buffers. |
225 | * | 251 | * |
226 | * The buffers are flushed automatically via IRQ work. This function | 252 | * The buffers are flushed automatically via IRQ work. This function |
227 | * is useful only when someone wants to be sure that all buffers have | 253 | * is useful only when someone wants to be sure that all buffers have |
228 | * been flushed at some point. | 254 | * been flushed at some point. |
229 | */ | 255 | */ |
230 | void printk_nmi_flush(void) | 256 | void printk_safe_flush(void) |
231 | { | 257 | { |
232 | int cpu; | 258 | int cpu; |
233 | 259 | ||
234 | for_each_possible_cpu(cpu) | 260 | for_each_possible_cpu(cpu) { |
235 | __printk_nmi_flush(&per_cpu(nmi_print_seq, cpu).work); | 261 | #ifdef CONFIG_PRINTK_NMI |
262 | __printk_safe_flush(&per_cpu(nmi_print_seq, cpu).work); | ||
263 | #endif | ||
264 | __printk_safe_flush(&per_cpu(safe_print_seq, cpu).work); | ||
265 | } | ||
236 | } | 266 | } |
237 | 267 | ||
238 | /** | 268 | /** |
239 | * printk_nmi_flush_on_panic - flush all per-cpu nmi buffers when the system | 269 | * printk_safe_flush_on_panic - flush all per-cpu nmi buffers when the system |
240 | * goes down. | 270 | * goes down. |
241 | * | 271 | * |
242 | * Similar to printk_nmi_flush() but it can be called even in NMI context when | 272 | * Similar to printk_safe_flush() but it can be called even in NMI context when |
243 | * the system goes down. It does the best effort to get NMI messages into | 273 | * the system goes down. It does the best effort to get NMI messages into |
244 | * the main ring buffer. | 274 | * the main ring buffer. |
245 | * | 275 | * |
246 | * Note that it could try harder when there is only one CPU online. | 276 | * Note that it could try harder when there is only one CPU online. |
247 | */ | 277 | */ |
248 | void printk_nmi_flush_on_panic(void) | 278 | void printk_safe_flush_on_panic(void) |
249 | { | 279 | { |
250 | /* | 280 | /* |
251 | * Make sure that we could access the main ring buffer. | 281 | * Make sure that we could access the main ring buffer. |
@@ -259,33 +289,97 @@ void printk_nmi_flush_on_panic(void) | |||
259 | raw_spin_lock_init(&logbuf_lock); | 289 | raw_spin_lock_init(&logbuf_lock); |
260 | } | 290 | } |
261 | 291 | ||
262 | printk_nmi_flush(); | 292 | printk_safe_flush(); |
263 | } | 293 | } |
264 | 294 | ||
265 | void __init printk_nmi_init(void) | 295 | #ifdef CONFIG_PRINTK_NMI |
296 | /* | ||
297 | * Safe printk() for NMI context. It uses a per-CPU buffer to | ||
298 | * store the message. NMIs are not nested, so there is always only | ||
299 | * one writer running. But the buffer might get flushed from another | ||
300 | * CPU, so we need to be careful. | ||
301 | */ | ||
302 | static int vprintk_nmi(const char *fmt, va_list args) | ||
266 | { | 303 | { |
267 | int cpu; | 304 | struct printk_safe_seq_buf *s = this_cpu_ptr(&nmi_print_seq); |
268 | 305 | ||
269 | for_each_possible_cpu(cpu) { | 306 | return printk_safe_log_store(s, fmt, args); |
270 | struct nmi_seq_buf *s = &per_cpu(nmi_print_seq, cpu); | 307 | } |
271 | 308 | ||
272 | init_irq_work(&s->work, __printk_nmi_flush); | 309 | void printk_nmi_enter(void) |
273 | } | 310 | { |
311 | this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK); | ||
312 | } | ||
274 | 313 | ||
275 | /* Make sure that IRQ works are initialized before enabling. */ | 314 | void printk_nmi_exit(void) |
276 | smp_wmb(); | 315 | { |
277 | printk_nmi_irq_ready = 1; | 316 | this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK); |
317 | } | ||
278 | 318 | ||
279 | /* Flush pending messages that did not have scheduled IRQ works. */ | 319 | #else |
280 | printk_nmi_flush(); | 320 | |
321 | static int vprintk_nmi(const char *fmt, va_list args) | ||
322 | { | ||
323 | return 0; | ||
281 | } | 324 | } |
282 | 325 | ||
283 | void printk_nmi_enter(void) | 326 | #endif /* CONFIG_PRINTK_NMI */ |
327 | |||
328 | /* | ||
329 | * Lock-less printk(), to avoid deadlocks should the printk() recurse | ||
330 | * into itself. It uses a per-CPU buffer to store the message, just like | ||
331 | * NMI. | ||
332 | */ | ||
333 | static int vprintk_safe(const char *fmt, va_list args) | ||
284 | { | 334 | { |
285 | this_cpu_write(printk_func, vprintk_nmi); | 335 | struct printk_safe_seq_buf *s = this_cpu_ptr(&safe_print_seq); |
336 | |||
337 | return printk_safe_log_store(s, fmt, args); | ||
286 | } | 338 | } |
287 | 339 | ||
288 | void printk_nmi_exit(void) | 340 | /* Can be preempted by NMI. */ |
341 | void __printk_safe_enter(void) | ||
342 | { | ||
343 | this_cpu_inc(printk_context); | ||
344 | } | ||
345 | |||
346 | /* Can be preempted by NMI. */ | ||
347 | void __printk_safe_exit(void) | ||
289 | { | 348 | { |
290 | this_cpu_write(printk_func, vprintk_default); | 349 | this_cpu_dec(printk_context); |
350 | } | ||
351 | |||
352 | __printf(1, 0) int vprintk_func(const char *fmt, va_list args) | ||
353 | { | ||
354 | if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK) | ||
355 | return vprintk_nmi(fmt, args); | ||
356 | |||
357 | if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK) | ||
358 | return vprintk_safe(fmt, args); | ||
359 | |||
360 | return vprintk_default(fmt, args); | ||
361 | } | ||
362 | |||
363 | void __init printk_safe_init(void) | ||
364 | { | ||
365 | int cpu; | ||
366 | |||
367 | for_each_possible_cpu(cpu) { | ||
368 | struct printk_safe_seq_buf *s; | ||
369 | |||
370 | s = &per_cpu(safe_print_seq, cpu); | ||
371 | init_irq_work(&s->work, __printk_safe_flush); | ||
372 | |||
373 | #ifdef CONFIG_PRINTK_NMI | ||
374 | s = &per_cpu(nmi_print_seq, cpu); | ||
375 | init_irq_work(&s->work, __printk_safe_flush); | ||
376 | #endif | ||
377 | } | ||
378 | |||
379 | /* Make sure that IRQ works are initialized before enabling. */ | ||
380 | smp_wmb(); | ||
381 | printk_safe_irq_ready = 1; | ||
382 | |||
383 | /* Flush pending messages that did not have scheduled IRQ works. */ | ||
384 | printk_safe_flush(); | ||
291 | } | 385 | } |
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c index 75554754eadf..5f7999eacad5 100644 --- a/lib/nmi_backtrace.c +++ b/lib/nmi_backtrace.c | |||
@@ -77,7 +77,7 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask, | |||
77 | * Force flush any remote buffers that might be stuck in IRQ context | 77 | * Force flush any remote buffers that might be stuck in IRQ context |
78 | * and therefore could not run their irq_work. | 78 | * and therefore could not run their irq_work. |
79 | */ | 79 | */ |
80 | printk_nmi_flush(); | 80 | printk_safe_flush(); |
81 | 81 | ||
82 | clear_bit_unlock(0, &backtrace_flag); | 82 | clear_bit_unlock(0, &backtrace_flag); |
83 | put_cpu(); | 83 | put_cpu(); |