aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/dlm/lowcomms.c2
-rw-r--r--fs/fat/inode.c13
-rw-r--r--fs/fs-writeback.c35
-rw-r--r--fs/gfs2/incore.h2
-rw-r--r--fs/iomap.c17
-rw-r--r--fs/ocfs2/cluster/tcp.c2
-rw-r--r--fs/overlayfs/util.c1
-rw-r--r--fs/timerfd.c8
-rw-r--r--fs/userfaultfd.c75
-rw-r--r--fs/xfs/kmem.c18
-rw-r--r--fs/xfs/kmem.h2
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c34
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c6
-rw-r--r--fs/xfs/xfs_aops.c59
-rw-r--r--fs/xfs/xfs_icache.c2
-rw-r--r--fs/xfs/xfs_inode.c2
-rw-r--r--fs/xfs/xfs_iomap.c25
-rw-r--r--fs/xfs/xfs_itable.c6
-rw-r--r--fs/xfs/xfs_mount.c3
-rw-r--r--fs/xfs/xfs_reflink.c23
-rw-r--r--fs/xfs/xfs_reflink.h4
-rw-r--r--fs/xfs/xfs_super.c2
22 files changed, 169 insertions, 172 deletions
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 7d398d300e97..9382db998ec9 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -743,7 +743,7 @@ static int tcp_accept_from_sock(struct connection *con)
743 newsock->type = con->sock->type; 743 newsock->type = con->sock->type;
744 newsock->ops = con->sock->ops; 744 newsock->ops = con->sock->ops;
745 745
746 result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK); 746 result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK, true);
747 if (result < 0) 747 if (result < 0)
748 goto accept_err; 748 goto accept_err;
749 749
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 338d2f73eb29..a2c05f2ada6d 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1359,6 +1359,16 @@ out:
1359 return 0; 1359 return 0;
1360} 1360}
1361 1361
1362static void fat_dummy_inode_init(struct inode *inode)
1363{
1364 /* Initialize this dummy inode to work as no-op. */
1365 MSDOS_I(inode)->mmu_private = 0;
1366 MSDOS_I(inode)->i_start = 0;
1367 MSDOS_I(inode)->i_logstart = 0;
1368 MSDOS_I(inode)->i_attrs = 0;
1369 MSDOS_I(inode)->i_pos = 0;
1370}
1371
1362static int fat_read_root(struct inode *inode) 1372static int fat_read_root(struct inode *inode)
1363{ 1373{
1364 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); 1374 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
@@ -1803,12 +1813,13 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
1803 fat_inode = new_inode(sb); 1813 fat_inode = new_inode(sb);
1804 if (!fat_inode) 1814 if (!fat_inode)
1805 goto out_fail; 1815 goto out_fail;
1806 MSDOS_I(fat_inode)->i_pos = 0; 1816 fat_dummy_inode_init(fat_inode);
1807 sbi->fat_inode = fat_inode; 1817 sbi->fat_inode = fat_inode;
1808 1818
1809 fsinfo_inode = new_inode(sb); 1819 fsinfo_inode = new_inode(sb);
1810 if (!fsinfo_inode) 1820 if (!fsinfo_inode)
1811 goto out_fail; 1821 goto out_fail;
1822 fat_dummy_inode_init(fsinfo_inode);
1812 fsinfo_inode->i_ino = MSDOS_FSINFO_INO; 1823 fsinfo_inode->i_ino = MSDOS_FSINFO_INO;
1813 sbi->fsinfo_inode = fsinfo_inode; 1824 sbi->fsinfo_inode = fsinfo_inode;
1814 insert_inode_hash(fsinfo_inode); 1825 insert_inode_hash(fsinfo_inode);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ef600591d96f..63ee2940775c 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -173,19 +173,33 @@ static void wb_wakeup(struct bdi_writeback *wb)
173 spin_unlock_bh(&wb->work_lock); 173 spin_unlock_bh(&wb->work_lock);
174} 174}
175 175
176static void finish_writeback_work(struct bdi_writeback *wb,
177 struct wb_writeback_work *work)
178{
179 struct wb_completion *done = work->done;
180
181 if (work->auto_free)
182 kfree(work);
183 if (done && atomic_dec_and_test(&done->cnt))
184 wake_up_all(&wb->bdi->wb_waitq);
185}
186
176static void wb_queue_work(struct bdi_writeback *wb, 187static void wb_queue_work(struct bdi_writeback *wb,
177 struct wb_writeback_work *work) 188 struct wb_writeback_work *work)
178{ 189{
179 trace_writeback_queue(wb, work); 190 trace_writeback_queue(wb, work);
180 191
181 spin_lock_bh(&wb->work_lock);
182 if (!test_bit(WB_registered, &wb->state))
183 goto out_unlock;
184 if (work->done) 192 if (work->done)
185 atomic_inc(&work->done->cnt); 193 atomic_inc(&work->done->cnt);
186 list_add_tail(&work->list, &wb->work_list); 194
187 mod_delayed_work(bdi_wq, &wb->dwork, 0); 195 spin_lock_bh(&wb->work_lock);
188out_unlock: 196
197 if (test_bit(WB_registered, &wb->state)) {
198 list_add_tail(&work->list, &wb->work_list);
199 mod_delayed_work(bdi_wq, &wb->dwork, 0);
200 } else
201 finish_writeback_work(wb, work);
202
189 spin_unlock_bh(&wb->work_lock); 203 spin_unlock_bh(&wb->work_lock);
190} 204}
191 205
@@ -1873,16 +1887,9 @@ static long wb_do_writeback(struct bdi_writeback *wb)
1873 1887
1874 set_bit(WB_writeback_running, &wb->state); 1888 set_bit(WB_writeback_running, &wb->state);
1875 while ((work = get_next_work_item(wb)) != NULL) { 1889 while ((work = get_next_work_item(wb)) != NULL) {
1876 struct wb_completion *done = work->done;
1877
1878 trace_writeback_exec(wb, work); 1890 trace_writeback_exec(wb, work);
1879
1880 wrote += wb_writeback(wb, work); 1891 wrote += wb_writeback(wb, work);
1881 1892 finish_writeback_work(wb, work);
1882 if (work->auto_free)
1883 kfree(work);
1884 if (done && atomic_dec_and_test(&done->cnt))
1885 wake_up_all(&wb->bdi->wb_waitq);
1886 } 1893 }
1887 1894
1888 /* 1895 /*
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index c45084ac642d..511e1ed7e2de 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -207,7 +207,7 @@ struct lm_lockname {
207 struct gfs2_sbd *ln_sbd; 207 struct gfs2_sbd *ln_sbd;
208 u64 ln_number; 208 u64 ln_number;
209 unsigned int ln_type; 209 unsigned int ln_type;
210}; 210} __packed __aligned(sizeof(int));
211 211
212#define lm_name_equal(name1, name2) \ 212#define lm_name_equal(name1, name2) \
213 (((name1)->ln_number == (name2)->ln_number) && \ 213 (((name1)->ln_number == (name2)->ln_number) && \
diff --git a/fs/iomap.c b/fs/iomap.c
index 3ca1a8e44135..141c3cd55a8b 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -846,7 +846,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
846 struct address_space *mapping = iocb->ki_filp->f_mapping; 846 struct address_space *mapping = iocb->ki_filp->f_mapping;
847 struct inode *inode = file_inode(iocb->ki_filp); 847 struct inode *inode = file_inode(iocb->ki_filp);
848 size_t count = iov_iter_count(iter); 848 size_t count = iov_iter_count(iter);
849 loff_t pos = iocb->ki_pos, end = iocb->ki_pos + count - 1, ret = 0; 849 loff_t pos = iocb->ki_pos, start = pos;
850 loff_t end = iocb->ki_pos + count - 1, ret = 0;
850 unsigned int flags = IOMAP_DIRECT; 851 unsigned int flags = IOMAP_DIRECT;
851 struct blk_plug plug; 852 struct blk_plug plug;
852 struct iomap_dio *dio; 853 struct iomap_dio *dio;
@@ -887,12 +888,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
887 } 888 }
888 889
889 if (mapping->nrpages) { 890 if (mapping->nrpages) {
890 ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end); 891 ret = filemap_write_and_wait_range(mapping, start, end);
891 if (ret) 892 if (ret)
892 goto out_free_dio; 893 goto out_free_dio;
893 894
894 ret = invalidate_inode_pages2_range(mapping, 895 ret = invalidate_inode_pages2_range(mapping,
895 iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); 896 start >> PAGE_SHIFT, end >> PAGE_SHIFT);
896 WARN_ON_ONCE(ret); 897 WARN_ON_ONCE(ret);
897 ret = 0; 898 ret = 0;
898 } 899 }
@@ -941,6 +942,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
941 __set_current_state(TASK_RUNNING); 942 __set_current_state(TASK_RUNNING);
942 } 943 }
943 944
945 ret = iomap_dio_complete(dio);
946
944 /* 947 /*
945 * Try again to invalidate clean pages which might have been cached by 948 * Try again to invalidate clean pages which might have been cached by
946 * non-direct readahead, or faulted in by get_user_pages() if the source 949 * non-direct readahead, or faulted in by get_user_pages() if the source
@@ -949,12 +952,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
949 * this invalidation fails, tough, the write still worked... 952 * this invalidation fails, tough, the write still worked...
950 */ 953 */
951 if (iov_iter_rw(iter) == WRITE && mapping->nrpages) { 954 if (iov_iter_rw(iter) == WRITE && mapping->nrpages) {
952 ret = invalidate_inode_pages2_range(mapping, 955 int err = invalidate_inode_pages2_range(mapping,
953 iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); 956 start >> PAGE_SHIFT, end >> PAGE_SHIFT);
954 WARN_ON_ONCE(ret); 957 WARN_ON_ONCE(err);
955 } 958 }
956 959
957 return iomap_dio_complete(dio); 960 return ret;
958 961
959out_free_dio: 962out_free_dio:
960 kfree(dio); 963 kfree(dio);
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 4348027384f5..d0ab7e56d0b4 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1863,7 +1863,7 @@ static int o2net_accept_one(struct socket *sock, int *more)
1863 1863
1864 new_sock->type = sock->type; 1864 new_sock->type = sock->type;
1865 new_sock->ops = sock->ops; 1865 new_sock->ops = sock->ops;
1866 ret = sock->ops->accept(sock, new_sock, O_NONBLOCK); 1866 ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, false);
1867 if (ret < 0) 1867 if (ret < 0)
1868 goto out; 1868 goto out;
1869 1869
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 1953986ee6bc..6e610a205e15 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -12,7 +12,6 @@
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/cred.h> 13#include <linux/cred.h>
14#include <linux/xattr.h> 14#include <linux/xattr.h>
15#include <linux/sched/signal.h>
16#include "overlayfs.h" 15#include "overlayfs.h"
17#include "ovl_entry.h" 16#include "ovl_entry.h"
18 17
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 384fa759a563..c543cdb5f8ed 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -400,9 +400,9 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
400 clockid != CLOCK_BOOTTIME_ALARM)) 400 clockid != CLOCK_BOOTTIME_ALARM))
401 return -EINVAL; 401 return -EINVAL;
402 402
403 if (!capable(CAP_WAKE_ALARM) && 403 if ((clockid == CLOCK_REALTIME_ALARM ||
404 (clockid == CLOCK_REALTIME_ALARM || 404 clockid == CLOCK_BOOTTIME_ALARM) &&
405 clockid == CLOCK_BOOTTIME_ALARM)) 405 !capable(CAP_WAKE_ALARM))
406 return -EPERM; 406 return -EPERM;
407 407
408 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 408 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -449,7 +449,7 @@ static int do_timerfd_settime(int ufd, int flags,
449 return ret; 449 return ret;
450 ctx = f.file->private_data; 450 ctx = f.file->private_data;
451 451
452 if (!capable(CAP_WAKE_ALARM) && isalarm(ctx)) { 452 if (isalarm(ctx) && !capable(CAP_WAKE_ALARM)) {
453 fdput(f); 453 fdput(f);
454 return -EPERM; 454 return -EPERM;
455 } 455 }
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 973607df579d..1d227b0fcf49 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -138,8 +138,6 @@ out:
138 * userfaultfd_ctx_get - Acquires a reference to the internal userfaultfd 138 * userfaultfd_ctx_get - Acquires a reference to the internal userfaultfd
139 * context. 139 * context.
140 * @ctx: [in] Pointer to the userfaultfd context. 140 * @ctx: [in] Pointer to the userfaultfd context.
141 *
142 * Returns: In case of success, returns not zero.
143 */ 141 */
144static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx) 142static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx)
145{ 143{
@@ -267,6 +265,7 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
267{ 265{
268 struct mm_struct *mm = ctx->mm; 266 struct mm_struct *mm = ctx->mm;
269 pgd_t *pgd; 267 pgd_t *pgd;
268 p4d_t *p4d;
270 pud_t *pud; 269 pud_t *pud;
271 pmd_t *pmd, _pmd; 270 pmd_t *pmd, _pmd;
272 pte_t *pte; 271 pte_t *pte;
@@ -277,7 +276,10 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
277 pgd = pgd_offset(mm, address); 276 pgd = pgd_offset(mm, address);
278 if (!pgd_present(*pgd)) 277 if (!pgd_present(*pgd))
279 goto out; 278 goto out;
280 pud = pud_offset(pgd, address); 279 p4d = p4d_offset(pgd, address);
280 if (!p4d_present(*p4d))
281 goto out;
282 pud = pud_offset(p4d, address);
281 if (!pud_present(*pud)) 283 if (!pud_present(*pud))
282 goto out; 284 goto out;
283 pmd = pmd_offset(pud, address); 285 pmd = pmd_offset(pud, address);
@@ -490,7 +492,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
490 * in such case. 492 * in such case.
491 */ 493 */
492 down_read(&mm->mmap_sem); 494 down_read(&mm->mmap_sem);
493 ret = 0; 495 ret = VM_FAULT_NOPAGE;
494 } 496 }
495 } 497 }
496 498
@@ -527,10 +529,11 @@ out:
527 return ret; 529 return ret;
528} 530}
529 531
530static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, 532static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
531 struct userfaultfd_wait_queue *ewq) 533 struct userfaultfd_wait_queue *ewq)
532{ 534{
533 int ret = 0; 535 if (WARN_ON_ONCE(current->flags & PF_EXITING))
536 goto out;
534 537
535 ewq->ctx = ctx; 538 ewq->ctx = ctx;
536 init_waitqueue_entry(&ewq->wq, current); 539 init_waitqueue_entry(&ewq->wq, current);
@@ -547,8 +550,16 @@ static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
547 break; 550 break;
548 if (ACCESS_ONCE(ctx->released) || 551 if (ACCESS_ONCE(ctx->released) ||
549 fatal_signal_pending(current)) { 552 fatal_signal_pending(current)) {
550 ret = -1;
551 __remove_wait_queue(&ctx->event_wqh, &ewq->wq); 553 __remove_wait_queue(&ctx->event_wqh, &ewq->wq);
554 if (ewq->msg.event == UFFD_EVENT_FORK) {
555 struct userfaultfd_ctx *new;
556
557 new = (struct userfaultfd_ctx *)
558 (unsigned long)
559 ewq->msg.arg.reserved.reserved1;
560
561 userfaultfd_ctx_put(new);
562 }
552 break; 563 break;
553 } 564 }
554 565
@@ -566,9 +577,8 @@ static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
566 * ctx may go away after this if the userfault pseudo fd is 577 * ctx may go away after this if the userfault pseudo fd is
567 * already released. 578 * already released.
568 */ 579 */
569 580out:
570 userfaultfd_ctx_put(ctx); 581 userfaultfd_ctx_put(ctx);
571 return ret;
572} 582}
573 583
574static void userfaultfd_event_complete(struct userfaultfd_ctx *ctx, 584static void userfaultfd_event_complete(struct userfaultfd_ctx *ctx,
@@ -626,7 +636,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
626 return 0; 636 return 0;
627} 637}
628 638
629static int dup_fctx(struct userfaultfd_fork_ctx *fctx) 639static void dup_fctx(struct userfaultfd_fork_ctx *fctx)
630{ 640{
631 struct userfaultfd_ctx *ctx = fctx->orig; 641 struct userfaultfd_ctx *ctx = fctx->orig;
632 struct userfaultfd_wait_queue ewq; 642 struct userfaultfd_wait_queue ewq;
@@ -636,17 +646,15 @@ static int dup_fctx(struct userfaultfd_fork_ctx *fctx)
636 ewq.msg.event = UFFD_EVENT_FORK; 646 ewq.msg.event = UFFD_EVENT_FORK;
637 ewq.msg.arg.reserved.reserved1 = (unsigned long)fctx->new; 647 ewq.msg.arg.reserved.reserved1 = (unsigned long)fctx->new;
638 648
639 return userfaultfd_event_wait_completion(ctx, &ewq); 649 userfaultfd_event_wait_completion(ctx, &ewq);
640} 650}
641 651
642void dup_userfaultfd_complete(struct list_head *fcs) 652void dup_userfaultfd_complete(struct list_head *fcs)
643{ 653{
644 int ret = 0;
645 struct userfaultfd_fork_ctx *fctx, *n; 654 struct userfaultfd_fork_ctx *fctx, *n;
646 655
647 list_for_each_entry_safe(fctx, n, fcs, list) { 656 list_for_each_entry_safe(fctx, n, fcs, list) {
648 if (!ret) 657 dup_fctx(fctx);
649 ret = dup_fctx(fctx);
650 list_del(&fctx->list); 658 list_del(&fctx->list);
651 kfree(fctx); 659 kfree(fctx);
652 } 660 }
@@ -689,8 +697,7 @@ void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx,
689 userfaultfd_event_wait_completion(ctx, &ewq); 697 userfaultfd_event_wait_completion(ctx, &ewq);
690} 698}
691 699
692void userfaultfd_remove(struct vm_area_struct *vma, 700bool userfaultfd_remove(struct vm_area_struct *vma,
693 struct vm_area_struct **prev,
694 unsigned long start, unsigned long end) 701 unsigned long start, unsigned long end)
695{ 702{
696 struct mm_struct *mm = vma->vm_mm; 703 struct mm_struct *mm = vma->vm_mm;
@@ -699,13 +706,11 @@ void userfaultfd_remove(struct vm_area_struct *vma,
699 706
700 ctx = vma->vm_userfaultfd_ctx.ctx; 707 ctx = vma->vm_userfaultfd_ctx.ctx;
701 if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE)) 708 if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE))
702 return; 709 return true;
703 710
704 userfaultfd_ctx_get(ctx); 711 userfaultfd_ctx_get(ctx);
705 up_read(&mm->mmap_sem); 712 up_read(&mm->mmap_sem);
706 713
707 *prev = NULL; /* We wait for ACK w/o the mmap semaphore */
708
709 msg_init(&ewq.msg); 714 msg_init(&ewq.msg);
710 715
711 ewq.msg.event = UFFD_EVENT_REMOVE; 716 ewq.msg.event = UFFD_EVENT_REMOVE;
@@ -714,7 +719,7 @@ void userfaultfd_remove(struct vm_area_struct *vma,
714 719
715 userfaultfd_event_wait_completion(ctx, &ewq); 720 userfaultfd_event_wait_completion(ctx, &ewq);
716 721
717 down_read(&mm->mmap_sem); 722 return false;
718} 723}
719 724
720static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps, 725static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps,
@@ -775,34 +780,6 @@ void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf)
775 } 780 }
776} 781}
777 782
778void userfaultfd_exit(struct mm_struct *mm)
779{
780 struct vm_area_struct *vma = mm->mmap;
781
782 /*
783 * We can do the vma walk without locking because the caller
784 * (exit_mm) knows it now has exclusive access
785 */
786 while (vma) {
787 struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
788
789 if (ctx && (ctx->features & UFFD_FEATURE_EVENT_EXIT)) {
790 struct userfaultfd_wait_queue ewq;
791
792 userfaultfd_ctx_get(ctx);
793
794 msg_init(&ewq.msg);
795 ewq.msg.event = UFFD_EVENT_EXIT;
796
797 userfaultfd_event_wait_completion(ctx, &ewq);
798
799 ctx->features &= ~UFFD_FEATURE_EVENT_EXIT;
800 }
801
802 vma = vma->vm_next;
803 }
804}
805
806static int userfaultfd_release(struct inode *inode, struct file *file) 783static int userfaultfd_release(struct inode *inode, struct file *file)
807{ 784{
808 struct userfaultfd_ctx *ctx = file->private_data; 785 struct userfaultfd_ctx *ctx = file->private_data;
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index 2dfdc62f795e..70a5b55e0870 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -25,24 +25,6 @@
25#include "kmem.h" 25#include "kmem.h"
26#include "xfs_message.h" 26#include "xfs_message.h"
27 27
28/*
29 * Greedy allocation. May fail and may return vmalloced memory.
30 */
31void *
32kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
33{
34 void *ptr;
35 size_t kmsize = maxsize;
36
37 while (!(ptr = vzalloc(kmsize))) {
38 if ((kmsize >>= 1) <= minsize)
39 kmsize = minsize;
40 }
41 if (ptr)
42 *size = kmsize;
43 return ptr;
44}
45
46void * 28void *
47kmem_alloc(size_t size, xfs_km_flags_t flags) 29kmem_alloc(size_t size, xfs_km_flags_t flags)
48{ 30{
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index 689f746224e7..f0fc84fcaac2 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -69,8 +69,6 @@ static inline void kmem_free(const void *ptr)
69} 69}
70 70
71 71
72extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);
73
74static inline void * 72static inline void *
75kmem_zalloc(size_t size, xfs_km_flags_t flags) 73kmem_zalloc(size_t size, xfs_km_flags_t flags)
76{ 74{
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index a9c66d47757a..9bd104f32908 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -763,8 +763,8 @@ xfs_bmap_extents_to_btree(
763 args.type = XFS_ALLOCTYPE_START_BNO; 763 args.type = XFS_ALLOCTYPE_START_BNO;
764 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); 764 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
765 } else if (dfops->dop_low) { 765 } else if (dfops->dop_low) {
766try_another_ag:
767 args.type = XFS_ALLOCTYPE_START_BNO; 766 args.type = XFS_ALLOCTYPE_START_BNO;
767try_another_ag:
768 args.fsbno = *firstblock; 768 args.fsbno = *firstblock;
769 } else { 769 } else {
770 args.type = XFS_ALLOCTYPE_NEAR_BNO; 770 args.type = XFS_ALLOCTYPE_NEAR_BNO;
@@ -790,13 +790,17 @@ try_another_ag:
790 if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) && 790 if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
791 args.fsbno == NULLFSBLOCK && 791 args.fsbno == NULLFSBLOCK &&
792 args.type == XFS_ALLOCTYPE_NEAR_BNO) { 792 args.type == XFS_ALLOCTYPE_NEAR_BNO) {
793 dfops->dop_low = true; 793 args.type = XFS_ALLOCTYPE_FIRST_AG;
794 goto try_another_ag; 794 goto try_another_ag;
795 } 795 }
796 if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
797 xfs_iroot_realloc(ip, -1, whichfork);
798 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
799 return -ENOSPC;
800 }
796 /* 801 /*
797 * Allocation can't fail, the space was reserved. 802 * Allocation can't fail, the space was reserved.
798 */ 803 */
799 ASSERT(args.fsbno != NULLFSBLOCK);
800 ASSERT(*firstblock == NULLFSBLOCK || 804 ASSERT(*firstblock == NULLFSBLOCK ||
801 args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock)); 805 args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock));
802 *firstblock = cur->bc_private.b.firstblock = args.fsbno; 806 *firstblock = cur->bc_private.b.firstblock = args.fsbno;
@@ -4150,6 +4154,19 @@ xfs_bmapi_read(
4150 return 0; 4154 return 0;
4151} 4155}
4152 4156
4157/*
4158 * Add a delayed allocation extent to an inode. Blocks are reserved from the
4159 * global pool and the extent inserted into the inode in-core extent tree.
4160 *
4161 * On entry, got refers to the first extent beyond the offset of the extent to
4162 * allocate or eof is specified if no such extent exists. On return, got refers
4163 * to the extent record that was inserted to the inode fork.
4164 *
4165 * Note that the allocated extent may have been merged with contiguous extents
4166 * during insertion into the inode fork. Thus, got does not reflect the current
4167 * state of the inode fork on return. If necessary, the caller can use lastx to
4168 * look up the updated record in the inode fork.
4169 */
4153int 4170int
4154xfs_bmapi_reserve_delalloc( 4171xfs_bmapi_reserve_delalloc(
4155 struct xfs_inode *ip, 4172 struct xfs_inode *ip,
@@ -4236,13 +4253,8 @@ xfs_bmapi_reserve_delalloc(
4236 got->br_startblock = nullstartblock(indlen); 4253 got->br_startblock = nullstartblock(indlen);
4237 got->br_blockcount = alen; 4254 got->br_blockcount = alen;
4238 got->br_state = XFS_EXT_NORM; 4255 got->br_state = XFS_EXT_NORM;
4239 xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);
4240 4256
4241 /* 4257 xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);
4242 * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay
4243 * might have merged it into one of the neighbouring ones.
4244 */
4245 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
4246 4258
4247 /* 4259 /*
4248 * Tag the inode if blocks were preallocated. Note that COW fork 4260 * Tag the inode if blocks were preallocated. Note that COW fork
@@ -4254,10 +4266,6 @@ xfs_bmapi_reserve_delalloc(
4254 if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len)) 4266 if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
4255 xfs_inode_set_cowblocks_tag(ip); 4267 xfs_inode_set_cowblocks_tag(ip);
4256 4268
4257 ASSERT(got->br_startoff <= aoff);
4258 ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
4259 ASSERT(isnullstartblock(got->br_startblock));
4260 ASSERT(got->br_state == XFS_EXT_NORM);
4261 return 0; 4269 return 0;
4262 4270
4263out_unreserve_blocks: 4271out_unreserve_blocks:
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index f93072b58a58..fd55db479385 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -447,8 +447,8 @@ xfs_bmbt_alloc_block(
447 447
448 if (args.fsbno == NULLFSBLOCK) { 448 if (args.fsbno == NULLFSBLOCK) {
449 args.fsbno = be64_to_cpu(start->l); 449 args.fsbno = be64_to_cpu(start->l);
450try_another_ag:
451 args.type = XFS_ALLOCTYPE_START_BNO; 450 args.type = XFS_ALLOCTYPE_START_BNO;
451try_another_ag:
452 /* 452 /*
453 * Make sure there is sufficient room left in the AG to 453 * Make sure there is sufficient room left in the AG to
454 * complete a full tree split for an extent insert. If 454 * complete a full tree split for an extent insert. If
@@ -488,8 +488,8 @@ try_another_ag:
488 if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) && 488 if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
489 args.fsbno == NULLFSBLOCK && 489 args.fsbno == NULLFSBLOCK &&
490 args.type == XFS_ALLOCTYPE_NEAR_BNO) { 490 args.type == XFS_ALLOCTYPE_NEAR_BNO) {
491 cur->bc_private.b.dfops->dop_low = true;
492 args.fsbno = cur->bc_private.b.firstblock; 491 args.fsbno = cur->bc_private.b.firstblock;
492 args.type = XFS_ALLOCTYPE_FIRST_AG;
493 goto try_another_ag; 493 goto try_another_ag;
494 } 494 }
495 495
@@ -506,7 +506,7 @@ try_another_ag:
506 goto error0; 506 goto error0;
507 cur->bc_private.b.dfops->dop_low = true; 507 cur->bc_private.b.dfops->dop_low = true;
508 } 508 }
509 if (args.fsbno == NULLFSBLOCK) { 509 if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
510 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 510 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
511 *stat = 0; 511 *stat = 0;
512 return 0; 512 return 0;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index bf65a9ea8642..61494295d92f 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -274,54 +274,49 @@ xfs_end_io(
274 struct xfs_ioend *ioend = 274 struct xfs_ioend *ioend =
275 container_of(work, struct xfs_ioend, io_work); 275 container_of(work, struct xfs_ioend, io_work);
276 struct xfs_inode *ip = XFS_I(ioend->io_inode); 276 struct xfs_inode *ip = XFS_I(ioend->io_inode);
277 xfs_off_t offset = ioend->io_offset;
278 size_t size = ioend->io_size;
277 int error = ioend->io_bio->bi_error; 279 int error = ioend->io_bio->bi_error;
278 280
279 /* 281 /*
280 * Set an error if the mount has shut down and proceed with end I/O 282 * Just clean up the in-memory strutures if the fs has been shut down.
281 * processing so it can perform whatever cleanups are necessary.
282 */ 283 */
283 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 284 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
284 error = -EIO; 285 error = -EIO;
286 goto done;
287 }
285 288
286 /* 289 /*
287 * For a CoW extent, we need to move the mapping from the CoW fork 290 * Clean up any COW blocks on an I/O error.
288 * to the data fork. If instead an error happened, just dump the
289 * new blocks.
290 */ 291 */
291 if (ioend->io_type == XFS_IO_COW) { 292 if (unlikely(error)) {
292 if (error) 293 switch (ioend->io_type) {
293 goto done; 294 case XFS_IO_COW:
294 if (ioend->io_bio->bi_error) { 295 xfs_reflink_cancel_cow_range(ip, offset, size, true);
295 error = xfs_reflink_cancel_cow_range(ip, 296 break;
296 ioend->io_offset, ioend->io_size);
297 goto done;
298 } 297 }
299 error = xfs_reflink_end_cow(ip, ioend->io_offset, 298
300 ioend->io_size); 299 goto done;
301 if (error)
302 goto done;
303 } 300 }
304 301
305 /* 302 /*
306 * For unwritten extents we need to issue transactions to convert a 303 * Success: commit the COW or unwritten blocks if needed.
307 * range to normal written extens after the data I/O has finished.
308 * Detecting and handling completion IO errors is done individually
309 * for each case as different cleanup operations need to be performed
310 * on error.
311 */ 304 */
312 if (ioend->io_type == XFS_IO_UNWRITTEN) { 305 switch (ioend->io_type) {
313 if (error) 306 case XFS_IO_COW:
314 goto done; 307 error = xfs_reflink_end_cow(ip, offset, size);
315 error = xfs_iomap_write_unwritten(ip, ioend->io_offset, 308 break;
316 ioend->io_size); 309 case XFS_IO_UNWRITTEN:
317 } else if (ioend->io_append_trans) { 310 error = xfs_iomap_write_unwritten(ip, offset, size);
318 error = xfs_setfilesize_ioend(ioend, error); 311 break;
319 } else { 312 default:
320 ASSERT(!xfs_ioend_is_append(ioend) || 313 ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
321 ioend->io_type == XFS_IO_COW); 314 break;
322 } 315 }
323 316
324done: 317done:
318 if (ioend->io_append_trans)
319 error = xfs_setfilesize_ioend(ioend, error);
325 xfs_destroy_ioend(ioend, error); 320 xfs_destroy_ioend(ioend, error);
326} 321}
327 322
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 7234b9748c36..3531f8f72fa5 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1608,7 +1608,7 @@ xfs_inode_free_cowblocks(
1608 xfs_ilock(ip, XFS_IOLOCK_EXCL); 1608 xfs_ilock(ip, XFS_IOLOCK_EXCL);
1609 xfs_ilock(ip, XFS_MMAPLOCK_EXCL); 1609 xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
1610 1610
1611 ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF); 1611 ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);
1612 1612
1613 xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); 1613 xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
1614 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1614 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index edfa6a55b064..7eaf1ef74e3c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1615,7 +1615,7 @@ xfs_itruncate_extents(
1615 1615
1616 /* Remove all pending CoW reservations. */ 1616 /* Remove all pending CoW reservations. */
1617 error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block, 1617 error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block,
1618 last_block); 1618 last_block, true);
1619 if (error) 1619 if (error)
1620 goto out; 1620 goto out;
1621 1621
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 41662fb14e87..288ee5b840d7 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -630,6 +630,11 @@ retry:
630 goto out_unlock; 630 goto out_unlock;
631 } 631 }
632 632
633 /*
634 * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
635 * them out if the write happens to fail.
636 */
637 iomap->flags = IOMAP_F_NEW;
633 trace_xfs_iomap_alloc(ip, offset, count, 0, &got); 638 trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
634done: 639done:
635 if (isnullstartblock(got.br_startblock)) 640 if (isnullstartblock(got.br_startblock))
@@ -1071,16 +1076,22 @@ xfs_file_iomap_end_delalloc(
1071 struct xfs_inode *ip, 1076 struct xfs_inode *ip,
1072 loff_t offset, 1077 loff_t offset,
1073 loff_t length, 1078 loff_t length,
1074 ssize_t written) 1079 ssize_t written,
1080 struct iomap *iomap)
1075{ 1081{
1076 struct xfs_mount *mp = ip->i_mount; 1082 struct xfs_mount *mp = ip->i_mount;
1077 xfs_fileoff_t start_fsb; 1083 xfs_fileoff_t start_fsb;
1078 xfs_fileoff_t end_fsb; 1084 xfs_fileoff_t end_fsb;
1079 int error = 0; 1085 int error = 0;
1080 1086
1081 /* behave as if the write failed if drop writes is enabled */ 1087 /*
1082 if (xfs_mp_drop_writes(mp)) 1088 * Behave as if the write failed if drop writes is enabled. Set the NEW
1089 * flag to force delalloc cleanup.
1090 */
1091 if (xfs_mp_drop_writes(mp)) {
1092 iomap->flags |= IOMAP_F_NEW;
1083 written = 0; 1093 written = 0;
1094 }
1084 1095
1085 /* 1096 /*
1086 * start_fsb refers to the first unused block after a short write. If 1097 * start_fsb refers to the first unused block after a short write. If
@@ -1094,14 +1105,14 @@ xfs_file_iomap_end_delalloc(
1094 end_fsb = XFS_B_TO_FSB(mp, offset + length); 1105 end_fsb = XFS_B_TO_FSB(mp, offset + length);
1095 1106
1096 /* 1107 /*
1097 * Trim back delalloc blocks if we didn't manage to write the whole 1108 * Trim delalloc blocks if they were allocated by this write and we
1098 * range reserved. 1109 * didn't manage to write the whole range.
1099 * 1110 *
1100 * We don't need to care about racing delalloc as we hold i_mutex 1111 * We don't need to care about racing delalloc as we hold i_mutex
1101 * across the reserve/allocate/unreserve calls. If there are delalloc 1112 * across the reserve/allocate/unreserve calls. If there are delalloc
1102 * blocks in the range, they are ours. 1113 * blocks in the range, they are ours.
1103 */ 1114 */
1104 if (start_fsb < end_fsb) { 1115 if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) {
1105 truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb), 1116 truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb),
1106 XFS_FSB_TO_B(mp, end_fsb) - 1); 1117 XFS_FSB_TO_B(mp, end_fsb) - 1);
1107 1118
@@ -1131,7 +1142,7 @@ xfs_file_iomap_end(
1131{ 1142{
1132 if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC) 1143 if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
1133 return xfs_file_iomap_end_delalloc(XFS_I(inode), offset, 1144 return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
1134 length, written); 1145 length, written, iomap);
1135 return 0; 1146 return 0;
1136} 1147}
1137 1148
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 66e881790c17..2a6d9b1558e0 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -361,7 +361,6 @@ xfs_bulkstat(
361 xfs_agino_t agino; /* inode # in allocation group */ 361 xfs_agino_t agino; /* inode # in allocation group */
362 xfs_agnumber_t agno; /* allocation group number */ 362 xfs_agnumber_t agno; /* allocation group number */
363 xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ 363 xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
364 size_t irbsize; /* size of irec buffer in bytes */
365 xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ 364 xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
366 int nirbuf; /* size of irbuf */ 365 int nirbuf; /* size of irbuf */
367 int ubcount; /* size of user's buffer */ 366 int ubcount; /* size of user's buffer */
@@ -388,11 +387,10 @@ xfs_bulkstat(
388 *ubcountp = 0; 387 *ubcountp = 0;
389 *done = 0; 388 *done = 0;
390 389
391 irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4); 390 irbuf = kmem_zalloc_large(PAGE_SIZE * 4, KM_SLEEP);
392 if (!irbuf) 391 if (!irbuf)
393 return -ENOMEM; 392 return -ENOMEM;
394 393 nirbuf = (PAGE_SIZE * 4) / sizeof(*irbuf);
395 nirbuf = irbsize / sizeof(*irbuf);
396 394
397 /* 395 /*
398 * Loop over the allocation groups, starting from the last 396 * Loop over the allocation groups, starting from the last
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 450bde68bb75..688ebff1f663 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -513,8 +513,7 @@ STATIC void
513xfs_set_inoalignment(xfs_mount_t *mp) 513xfs_set_inoalignment(xfs_mount_t *mp)
514{ 514{
515 if (xfs_sb_version_hasalign(&mp->m_sb) && 515 if (xfs_sb_version_hasalign(&mp->m_sb) &&
516 mp->m_sb.sb_inoalignmt >= 516 mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
517 XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
518 mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1; 517 mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
519 else 518 else
520 mp->m_inoalign_mask = 0; 519 mp->m_inoalign_mask = 0;
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index da6d08fb359c..4a84c5ea266d 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -548,14 +548,18 @@ xfs_reflink_trim_irec_to_next_cow(
548} 548}
549 549
550/* 550/*
551 * Cancel all pending CoW reservations for some block range of an inode. 551 * Cancel CoW reservations for some block range of an inode.
552 *
553 * If cancel_real is true this function cancels all COW fork extents for the
554 * inode; if cancel_real is false, real extents are not cleared.
552 */ 555 */
553int 556int
554xfs_reflink_cancel_cow_blocks( 557xfs_reflink_cancel_cow_blocks(
555 struct xfs_inode *ip, 558 struct xfs_inode *ip,
556 struct xfs_trans **tpp, 559 struct xfs_trans **tpp,
557 xfs_fileoff_t offset_fsb, 560 xfs_fileoff_t offset_fsb,
558 xfs_fileoff_t end_fsb) 561 xfs_fileoff_t end_fsb,
562 bool cancel_real)
559{ 563{
560 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); 564 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
561 struct xfs_bmbt_irec got, del; 565 struct xfs_bmbt_irec got, del;
@@ -579,7 +583,7 @@ xfs_reflink_cancel_cow_blocks(
579 &idx, &got, &del); 583 &idx, &got, &del);
580 if (error) 584 if (error)
581 break; 585 break;
582 } else { 586 } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) {
583 xfs_trans_ijoin(*tpp, ip, 0); 587 xfs_trans_ijoin(*tpp, ip, 0);
584 xfs_defer_init(&dfops, &firstfsb); 588 xfs_defer_init(&dfops, &firstfsb);
585 589
@@ -621,13 +625,17 @@ xfs_reflink_cancel_cow_blocks(
621} 625}
622 626
623/* 627/*
624 * Cancel all pending CoW reservations for some byte range of an inode. 628 * Cancel CoW reservations for some byte range of an inode.
629 *
630 * If cancel_real is true this function cancels all COW fork extents for the
631 * inode; if cancel_real is false, real extents are not cleared.
625 */ 632 */
626int 633int
627xfs_reflink_cancel_cow_range( 634xfs_reflink_cancel_cow_range(
628 struct xfs_inode *ip, 635 struct xfs_inode *ip,
629 xfs_off_t offset, 636 xfs_off_t offset,
630 xfs_off_t count) 637 xfs_off_t count,
638 bool cancel_real)
631{ 639{
632 struct xfs_trans *tp; 640 struct xfs_trans *tp;
633 xfs_fileoff_t offset_fsb; 641 xfs_fileoff_t offset_fsb;
@@ -653,7 +661,8 @@ xfs_reflink_cancel_cow_range(
653 xfs_trans_ijoin(tp, ip, 0); 661 xfs_trans_ijoin(tp, ip, 0);
654 662
655 /* Scrape out the old CoW reservations */ 663 /* Scrape out the old CoW reservations */
656 error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb); 664 error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb,
665 cancel_real);
657 if (error) 666 if (error)
658 goto out_cancel; 667 goto out_cancel;
659 668
@@ -1450,7 +1459,7 @@ next:
1450 * We didn't find any shared blocks so turn off the reflink flag. 1459 * We didn't find any shared blocks so turn off the reflink flag.
1451 * First, get rid of any leftover CoW mappings. 1460 * First, get rid of any leftover CoW mappings.
1452 */ 1461 */
1453 error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF); 1462 error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true);
1454 if (error) 1463 if (error)
1455 return error; 1464 return error;
1456 1465
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index 33ac9b8db683..d29a7967f029 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -39,9 +39,9 @@ extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,
39 39
40extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip, 40extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip,
41 struct xfs_trans **tpp, xfs_fileoff_t offset_fsb, 41 struct xfs_trans **tpp, xfs_fileoff_t offset_fsb,
42 xfs_fileoff_t end_fsb); 42 xfs_fileoff_t end_fsb, bool cancel_real);
43extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset, 43extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
44 xfs_off_t count); 44 xfs_off_t count, bool cancel_real);
45extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, 45extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
46 xfs_off_t count); 46 xfs_off_t count);
47extern int xfs_reflink_recover_cow(struct xfs_mount *mp); 47extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 890862f2447c..685c042a120f 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -953,7 +953,7 @@ xfs_fs_destroy_inode(
953 XFS_STATS_INC(ip->i_mount, vn_remove); 953 XFS_STATS_INC(ip->i_mount, vn_remove);
954 954
955 if (xfs_is_reflink_inode(ip)) { 955 if (xfs_is_reflink_inode(ip)) {
956 error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF); 956 error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
957 if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) 957 if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount))
958 xfs_warn(ip->i_mount, 958 xfs_warn(ip->i_mount,
959"Error %d while evicting CoW blocks for inode %llu.", 959"Error %d while evicting CoW blocks for inode %llu.",