diff options
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/dlm/lowcomms.c | 2 | ||||
| -rw-r--r-- | fs/fat/inode.c | 13 | ||||
| -rw-r--r-- | fs/fs-writeback.c | 35 | ||||
| -rw-r--r-- | fs/gfs2/incore.h | 2 | ||||
| -rw-r--r-- | fs/iomap.c | 17 | ||||
| -rw-r--r-- | fs/ocfs2/cluster/tcp.c | 2 | ||||
| -rw-r--r-- | fs/overlayfs/util.c | 1 | ||||
| -rw-r--r-- | fs/timerfd.c | 8 | ||||
| -rw-r--r-- | fs/userfaultfd.c | 75 | ||||
| -rw-r--r-- | fs/xfs/kmem.c | 18 | ||||
| -rw-r--r-- | fs/xfs/kmem.h | 2 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c | 34 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_bmap_btree.c | 6 | ||||
| -rw-r--r-- | fs/xfs/xfs_aops.c | 59 | ||||
| -rw-r--r-- | fs/xfs/xfs_icache.c | 2 | ||||
| -rw-r--r-- | fs/xfs/xfs_inode.c | 2 | ||||
| -rw-r--r-- | fs/xfs/xfs_iomap.c | 25 | ||||
| -rw-r--r-- | fs/xfs/xfs_itable.c | 6 | ||||
| -rw-r--r-- | fs/xfs/xfs_mount.c | 3 | ||||
| -rw-r--r-- | fs/xfs/xfs_reflink.c | 23 | ||||
| -rw-r--r-- | fs/xfs/xfs_reflink.h | 4 | ||||
| -rw-r--r-- | fs/xfs/xfs_super.c | 2 |
22 files changed, 169 insertions, 172 deletions
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 7d398d300e97..9382db998ec9 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c | |||
| @@ -743,7 +743,7 @@ static int tcp_accept_from_sock(struct connection *con) | |||
| 743 | newsock->type = con->sock->type; | 743 | newsock->type = con->sock->type; |
| 744 | newsock->ops = con->sock->ops; | 744 | newsock->ops = con->sock->ops; |
| 745 | 745 | ||
| 746 | result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK); | 746 | result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK, true); |
| 747 | if (result < 0) | 747 | if (result < 0) |
| 748 | goto accept_err; | 748 | goto accept_err; |
| 749 | 749 | ||
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 338d2f73eb29..a2c05f2ada6d 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
| @@ -1359,6 +1359,16 @@ out: | |||
| 1359 | return 0; | 1359 | return 0; |
| 1360 | } | 1360 | } |
| 1361 | 1361 | ||
| 1362 | static void fat_dummy_inode_init(struct inode *inode) | ||
| 1363 | { | ||
| 1364 | /* Initialize this dummy inode to work as no-op. */ | ||
| 1365 | MSDOS_I(inode)->mmu_private = 0; | ||
| 1366 | MSDOS_I(inode)->i_start = 0; | ||
| 1367 | MSDOS_I(inode)->i_logstart = 0; | ||
| 1368 | MSDOS_I(inode)->i_attrs = 0; | ||
| 1369 | MSDOS_I(inode)->i_pos = 0; | ||
| 1370 | } | ||
| 1371 | |||
| 1362 | static int fat_read_root(struct inode *inode) | 1372 | static int fat_read_root(struct inode *inode) |
| 1363 | { | 1373 | { |
| 1364 | struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); | 1374 | struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); |
| @@ -1803,12 +1813,13 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, | |||
| 1803 | fat_inode = new_inode(sb); | 1813 | fat_inode = new_inode(sb); |
| 1804 | if (!fat_inode) | 1814 | if (!fat_inode) |
| 1805 | goto out_fail; | 1815 | goto out_fail; |
| 1806 | MSDOS_I(fat_inode)->i_pos = 0; | 1816 | fat_dummy_inode_init(fat_inode); |
| 1807 | sbi->fat_inode = fat_inode; | 1817 | sbi->fat_inode = fat_inode; |
| 1808 | 1818 | ||
| 1809 | fsinfo_inode = new_inode(sb); | 1819 | fsinfo_inode = new_inode(sb); |
| 1810 | if (!fsinfo_inode) | 1820 | if (!fsinfo_inode) |
| 1811 | goto out_fail; | 1821 | goto out_fail; |
| 1822 | fat_dummy_inode_init(fsinfo_inode); | ||
| 1812 | fsinfo_inode->i_ino = MSDOS_FSINFO_INO; | 1823 | fsinfo_inode->i_ino = MSDOS_FSINFO_INO; |
| 1813 | sbi->fsinfo_inode = fsinfo_inode; | 1824 | sbi->fsinfo_inode = fsinfo_inode; |
| 1814 | insert_inode_hash(fsinfo_inode); | 1825 | insert_inode_hash(fsinfo_inode); |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index ef600591d96f..63ee2940775c 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
| @@ -173,19 +173,33 @@ static void wb_wakeup(struct bdi_writeback *wb) | |||
| 173 | spin_unlock_bh(&wb->work_lock); | 173 | spin_unlock_bh(&wb->work_lock); |
| 174 | } | 174 | } |
| 175 | 175 | ||
| 176 | static void finish_writeback_work(struct bdi_writeback *wb, | ||
| 177 | struct wb_writeback_work *work) | ||
| 178 | { | ||
| 179 | struct wb_completion *done = work->done; | ||
| 180 | |||
| 181 | if (work->auto_free) | ||
| 182 | kfree(work); | ||
| 183 | if (done && atomic_dec_and_test(&done->cnt)) | ||
| 184 | wake_up_all(&wb->bdi->wb_waitq); | ||
| 185 | } | ||
| 186 | |||
| 176 | static void wb_queue_work(struct bdi_writeback *wb, | 187 | static void wb_queue_work(struct bdi_writeback *wb, |
| 177 | struct wb_writeback_work *work) | 188 | struct wb_writeback_work *work) |
| 178 | { | 189 | { |
| 179 | trace_writeback_queue(wb, work); | 190 | trace_writeback_queue(wb, work); |
| 180 | 191 | ||
| 181 | spin_lock_bh(&wb->work_lock); | ||
| 182 | if (!test_bit(WB_registered, &wb->state)) | ||
| 183 | goto out_unlock; | ||
| 184 | if (work->done) | 192 | if (work->done) |
| 185 | atomic_inc(&work->done->cnt); | 193 | atomic_inc(&work->done->cnt); |
| 186 | list_add_tail(&work->list, &wb->work_list); | 194 | |
| 187 | mod_delayed_work(bdi_wq, &wb->dwork, 0); | 195 | spin_lock_bh(&wb->work_lock); |
| 188 | out_unlock: | 196 | |
| 197 | if (test_bit(WB_registered, &wb->state)) { | ||
| 198 | list_add_tail(&work->list, &wb->work_list); | ||
| 199 | mod_delayed_work(bdi_wq, &wb->dwork, 0); | ||
| 200 | } else | ||
| 201 | finish_writeback_work(wb, work); | ||
| 202 | |||
| 189 | spin_unlock_bh(&wb->work_lock); | 203 | spin_unlock_bh(&wb->work_lock); |
| 190 | } | 204 | } |
| 191 | 205 | ||
| @@ -1873,16 +1887,9 @@ static long wb_do_writeback(struct bdi_writeback *wb) | |||
| 1873 | 1887 | ||
| 1874 | set_bit(WB_writeback_running, &wb->state); | 1888 | set_bit(WB_writeback_running, &wb->state); |
| 1875 | while ((work = get_next_work_item(wb)) != NULL) { | 1889 | while ((work = get_next_work_item(wb)) != NULL) { |
| 1876 | struct wb_completion *done = work->done; | ||
| 1877 | |||
| 1878 | trace_writeback_exec(wb, work); | 1890 | trace_writeback_exec(wb, work); |
| 1879 | |||
| 1880 | wrote += wb_writeback(wb, work); | 1891 | wrote += wb_writeback(wb, work); |
| 1881 | 1892 | finish_writeback_work(wb, work); | |
| 1882 | if (work->auto_free) | ||
| 1883 | kfree(work); | ||
| 1884 | if (done && atomic_dec_and_test(&done->cnt)) | ||
| 1885 | wake_up_all(&wb->bdi->wb_waitq); | ||
| 1886 | } | 1893 | } |
| 1887 | 1894 | ||
| 1888 | /* | 1895 | /* |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index c45084ac642d..511e1ed7e2de 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
| @@ -207,7 +207,7 @@ struct lm_lockname { | |||
| 207 | struct gfs2_sbd *ln_sbd; | 207 | struct gfs2_sbd *ln_sbd; |
| 208 | u64 ln_number; | 208 | u64 ln_number; |
| 209 | unsigned int ln_type; | 209 | unsigned int ln_type; |
| 210 | }; | 210 | } __packed __aligned(sizeof(int)); |
| 211 | 211 | ||
| 212 | #define lm_name_equal(name1, name2) \ | 212 | #define lm_name_equal(name1, name2) \ |
| 213 | (((name1)->ln_number == (name2)->ln_number) && \ | 213 | (((name1)->ln_number == (name2)->ln_number) && \ |
diff --git a/fs/iomap.c b/fs/iomap.c index 3ca1a8e44135..141c3cd55a8b 100644 --- a/fs/iomap.c +++ b/fs/iomap.c | |||
| @@ -846,7 +846,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, | |||
| 846 | struct address_space *mapping = iocb->ki_filp->f_mapping; | 846 | struct address_space *mapping = iocb->ki_filp->f_mapping; |
| 847 | struct inode *inode = file_inode(iocb->ki_filp); | 847 | struct inode *inode = file_inode(iocb->ki_filp); |
| 848 | size_t count = iov_iter_count(iter); | 848 | size_t count = iov_iter_count(iter); |
| 849 | loff_t pos = iocb->ki_pos, end = iocb->ki_pos + count - 1, ret = 0; | 849 | loff_t pos = iocb->ki_pos, start = pos; |
| 850 | loff_t end = iocb->ki_pos + count - 1, ret = 0; | ||
| 850 | unsigned int flags = IOMAP_DIRECT; | 851 | unsigned int flags = IOMAP_DIRECT; |
| 851 | struct blk_plug plug; | 852 | struct blk_plug plug; |
| 852 | struct iomap_dio *dio; | 853 | struct iomap_dio *dio; |
| @@ -887,12 +888,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, | |||
| 887 | } | 888 | } |
| 888 | 889 | ||
| 889 | if (mapping->nrpages) { | 890 | if (mapping->nrpages) { |
| 890 | ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end); | 891 | ret = filemap_write_and_wait_range(mapping, start, end); |
| 891 | if (ret) | 892 | if (ret) |
| 892 | goto out_free_dio; | 893 | goto out_free_dio; |
| 893 | 894 | ||
| 894 | ret = invalidate_inode_pages2_range(mapping, | 895 | ret = invalidate_inode_pages2_range(mapping, |
| 895 | iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); | 896 | start >> PAGE_SHIFT, end >> PAGE_SHIFT); |
| 896 | WARN_ON_ONCE(ret); | 897 | WARN_ON_ONCE(ret); |
| 897 | ret = 0; | 898 | ret = 0; |
| 898 | } | 899 | } |
| @@ -941,6 +942,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, | |||
| 941 | __set_current_state(TASK_RUNNING); | 942 | __set_current_state(TASK_RUNNING); |
| 942 | } | 943 | } |
| 943 | 944 | ||
| 945 | ret = iomap_dio_complete(dio); | ||
| 946 | |||
| 944 | /* | 947 | /* |
| 945 | * Try again to invalidate clean pages which might have been cached by | 948 | * Try again to invalidate clean pages which might have been cached by |
| 946 | * non-direct readahead, or faulted in by get_user_pages() if the source | 949 | * non-direct readahead, or faulted in by get_user_pages() if the source |
| @@ -949,12 +952,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, | |||
| 949 | * this invalidation fails, tough, the write still worked... | 952 | * this invalidation fails, tough, the write still worked... |
| 950 | */ | 953 | */ |
| 951 | if (iov_iter_rw(iter) == WRITE && mapping->nrpages) { | 954 | if (iov_iter_rw(iter) == WRITE && mapping->nrpages) { |
| 952 | ret = invalidate_inode_pages2_range(mapping, | 955 | int err = invalidate_inode_pages2_range(mapping, |
| 953 | iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); | 956 | start >> PAGE_SHIFT, end >> PAGE_SHIFT); |
| 954 | WARN_ON_ONCE(ret); | 957 | WARN_ON_ONCE(err); |
| 955 | } | 958 | } |
| 956 | 959 | ||
| 957 | return iomap_dio_complete(dio); | 960 | return ret; |
| 958 | 961 | ||
| 959 | out_free_dio: | 962 | out_free_dio: |
| 960 | kfree(dio); | 963 | kfree(dio); |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 4348027384f5..d0ab7e56d0b4 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
| @@ -1863,7 +1863,7 @@ static int o2net_accept_one(struct socket *sock, int *more) | |||
| 1863 | 1863 | ||
| 1864 | new_sock->type = sock->type; | 1864 | new_sock->type = sock->type; |
| 1865 | new_sock->ops = sock->ops; | 1865 | new_sock->ops = sock->ops; |
| 1866 | ret = sock->ops->accept(sock, new_sock, O_NONBLOCK); | 1866 | ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, false); |
| 1867 | if (ret < 0) | 1867 | if (ret < 0) |
| 1868 | goto out; | 1868 | goto out; |
| 1869 | 1869 | ||
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 1953986ee6bc..6e610a205e15 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c | |||
| @@ -12,7 +12,6 @@ | |||
| 12 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
| 13 | #include <linux/cred.h> | 13 | #include <linux/cred.h> |
| 14 | #include <linux/xattr.h> | 14 | #include <linux/xattr.h> |
| 15 | #include <linux/sched/signal.h> | ||
| 16 | #include "overlayfs.h" | 15 | #include "overlayfs.h" |
| 17 | #include "ovl_entry.h" | 16 | #include "ovl_entry.h" |
| 18 | 17 | ||
diff --git a/fs/timerfd.c b/fs/timerfd.c index 384fa759a563..c543cdb5f8ed 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c | |||
| @@ -400,9 +400,9 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) | |||
| 400 | clockid != CLOCK_BOOTTIME_ALARM)) | 400 | clockid != CLOCK_BOOTTIME_ALARM)) |
| 401 | return -EINVAL; | 401 | return -EINVAL; |
| 402 | 402 | ||
| 403 | if (!capable(CAP_WAKE_ALARM) && | 403 | if ((clockid == CLOCK_REALTIME_ALARM || |
| 404 | (clockid == CLOCK_REALTIME_ALARM || | 404 | clockid == CLOCK_BOOTTIME_ALARM) && |
| 405 | clockid == CLOCK_BOOTTIME_ALARM)) | 405 | !capable(CAP_WAKE_ALARM)) |
| 406 | return -EPERM; | 406 | return -EPERM; |
| 407 | 407 | ||
| 408 | ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); | 408 | ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); |
| @@ -449,7 +449,7 @@ static int do_timerfd_settime(int ufd, int flags, | |||
| 449 | return ret; | 449 | return ret; |
| 450 | ctx = f.file->private_data; | 450 | ctx = f.file->private_data; |
| 451 | 451 | ||
| 452 | if (!capable(CAP_WAKE_ALARM) && isalarm(ctx)) { | 452 | if (isalarm(ctx) && !capable(CAP_WAKE_ALARM)) { |
| 453 | fdput(f); | 453 | fdput(f); |
| 454 | return -EPERM; | 454 | return -EPERM; |
| 455 | } | 455 | } |
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 973607df579d..1d227b0fcf49 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c | |||
| @@ -138,8 +138,6 @@ out: | |||
| 138 | * userfaultfd_ctx_get - Acquires a reference to the internal userfaultfd | 138 | * userfaultfd_ctx_get - Acquires a reference to the internal userfaultfd |
| 139 | * context. | 139 | * context. |
| 140 | * @ctx: [in] Pointer to the userfaultfd context. | 140 | * @ctx: [in] Pointer to the userfaultfd context. |
| 141 | * | ||
| 142 | * Returns: In case of success, returns not zero. | ||
| 143 | */ | 141 | */ |
| 144 | static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx) | 142 | static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx) |
| 145 | { | 143 | { |
| @@ -267,6 +265,7 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx, | |||
| 267 | { | 265 | { |
| 268 | struct mm_struct *mm = ctx->mm; | 266 | struct mm_struct *mm = ctx->mm; |
| 269 | pgd_t *pgd; | 267 | pgd_t *pgd; |
| 268 | p4d_t *p4d; | ||
| 270 | pud_t *pud; | 269 | pud_t *pud; |
| 271 | pmd_t *pmd, _pmd; | 270 | pmd_t *pmd, _pmd; |
| 272 | pte_t *pte; | 271 | pte_t *pte; |
| @@ -277,7 +276,10 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx, | |||
| 277 | pgd = pgd_offset(mm, address); | 276 | pgd = pgd_offset(mm, address); |
| 278 | if (!pgd_present(*pgd)) | 277 | if (!pgd_present(*pgd)) |
| 279 | goto out; | 278 | goto out; |
| 280 | pud = pud_offset(pgd, address); | 279 | p4d = p4d_offset(pgd, address); |
| 280 | if (!p4d_present(*p4d)) | ||
| 281 | goto out; | ||
| 282 | pud = pud_offset(p4d, address); | ||
| 281 | if (!pud_present(*pud)) | 283 | if (!pud_present(*pud)) |
| 282 | goto out; | 284 | goto out; |
| 283 | pmd = pmd_offset(pud, address); | 285 | pmd = pmd_offset(pud, address); |
| @@ -490,7 +492,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) | |||
| 490 | * in such case. | 492 | * in such case. |
| 491 | */ | 493 | */ |
| 492 | down_read(&mm->mmap_sem); | 494 | down_read(&mm->mmap_sem); |
| 493 | ret = 0; | 495 | ret = VM_FAULT_NOPAGE; |
| 494 | } | 496 | } |
| 495 | } | 497 | } |
| 496 | 498 | ||
| @@ -527,10 +529,11 @@ out: | |||
| 527 | return ret; | 529 | return ret; |
| 528 | } | 530 | } |
| 529 | 531 | ||
| 530 | static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, | 532 | static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, |
| 531 | struct userfaultfd_wait_queue *ewq) | 533 | struct userfaultfd_wait_queue *ewq) |
| 532 | { | 534 | { |
| 533 | int ret = 0; | 535 | if (WARN_ON_ONCE(current->flags & PF_EXITING)) |
| 536 | goto out; | ||
| 534 | 537 | ||
| 535 | ewq->ctx = ctx; | 538 | ewq->ctx = ctx; |
| 536 | init_waitqueue_entry(&ewq->wq, current); | 539 | init_waitqueue_entry(&ewq->wq, current); |
| @@ -547,8 +550,16 @@ static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, | |||
| 547 | break; | 550 | break; |
| 548 | if (ACCESS_ONCE(ctx->released) || | 551 | if (ACCESS_ONCE(ctx->released) || |
| 549 | fatal_signal_pending(current)) { | 552 | fatal_signal_pending(current)) { |
| 550 | ret = -1; | ||
| 551 | __remove_wait_queue(&ctx->event_wqh, &ewq->wq); | 553 | __remove_wait_queue(&ctx->event_wqh, &ewq->wq); |
| 554 | if (ewq->msg.event == UFFD_EVENT_FORK) { | ||
| 555 | struct userfaultfd_ctx *new; | ||
| 556 | |||
| 557 | new = (struct userfaultfd_ctx *) | ||
| 558 | (unsigned long) | ||
| 559 | ewq->msg.arg.reserved.reserved1; | ||
| 560 | |||
| 561 | userfaultfd_ctx_put(new); | ||
| 562 | } | ||
| 552 | break; | 563 | break; |
| 553 | } | 564 | } |
| 554 | 565 | ||
| @@ -566,9 +577,8 @@ static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, | |||
| 566 | * ctx may go away after this if the userfault pseudo fd is | 577 | * ctx may go away after this if the userfault pseudo fd is |
| 567 | * already released. | 578 | * already released. |
| 568 | */ | 579 | */ |
| 569 | 580 | out: | |
| 570 | userfaultfd_ctx_put(ctx); | 581 | userfaultfd_ctx_put(ctx); |
| 571 | return ret; | ||
| 572 | } | 582 | } |
| 573 | 583 | ||
| 574 | static void userfaultfd_event_complete(struct userfaultfd_ctx *ctx, | 584 | static void userfaultfd_event_complete(struct userfaultfd_ctx *ctx, |
| @@ -626,7 +636,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs) | |||
| 626 | return 0; | 636 | return 0; |
| 627 | } | 637 | } |
| 628 | 638 | ||
| 629 | static int dup_fctx(struct userfaultfd_fork_ctx *fctx) | 639 | static void dup_fctx(struct userfaultfd_fork_ctx *fctx) |
| 630 | { | 640 | { |
| 631 | struct userfaultfd_ctx *ctx = fctx->orig; | 641 | struct userfaultfd_ctx *ctx = fctx->orig; |
| 632 | struct userfaultfd_wait_queue ewq; | 642 | struct userfaultfd_wait_queue ewq; |
| @@ -636,17 +646,15 @@ static int dup_fctx(struct userfaultfd_fork_ctx *fctx) | |||
| 636 | ewq.msg.event = UFFD_EVENT_FORK; | 646 | ewq.msg.event = UFFD_EVENT_FORK; |
| 637 | ewq.msg.arg.reserved.reserved1 = (unsigned long)fctx->new; | 647 | ewq.msg.arg.reserved.reserved1 = (unsigned long)fctx->new; |
| 638 | 648 | ||
| 639 | return userfaultfd_event_wait_completion(ctx, &ewq); | 649 | userfaultfd_event_wait_completion(ctx, &ewq); |
| 640 | } | 650 | } |
| 641 | 651 | ||
| 642 | void dup_userfaultfd_complete(struct list_head *fcs) | 652 | void dup_userfaultfd_complete(struct list_head *fcs) |
| 643 | { | 653 | { |
| 644 | int ret = 0; | ||
| 645 | struct userfaultfd_fork_ctx *fctx, *n; | 654 | struct userfaultfd_fork_ctx *fctx, *n; |
| 646 | 655 | ||
| 647 | list_for_each_entry_safe(fctx, n, fcs, list) { | 656 | list_for_each_entry_safe(fctx, n, fcs, list) { |
| 648 | if (!ret) | 657 | dup_fctx(fctx); |
| 649 | ret = dup_fctx(fctx); | ||
| 650 | list_del(&fctx->list); | 658 | list_del(&fctx->list); |
| 651 | kfree(fctx); | 659 | kfree(fctx); |
| 652 | } | 660 | } |
| @@ -689,8 +697,7 @@ void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx, | |||
| 689 | userfaultfd_event_wait_completion(ctx, &ewq); | 697 | userfaultfd_event_wait_completion(ctx, &ewq); |
| 690 | } | 698 | } |
| 691 | 699 | ||
| 692 | void userfaultfd_remove(struct vm_area_struct *vma, | 700 | bool userfaultfd_remove(struct vm_area_struct *vma, |
| 693 | struct vm_area_struct **prev, | ||
| 694 | unsigned long start, unsigned long end) | 701 | unsigned long start, unsigned long end) |
| 695 | { | 702 | { |
| 696 | struct mm_struct *mm = vma->vm_mm; | 703 | struct mm_struct *mm = vma->vm_mm; |
| @@ -699,13 +706,11 @@ void userfaultfd_remove(struct vm_area_struct *vma, | |||
| 699 | 706 | ||
| 700 | ctx = vma->vm_userfaultfd_ctx.ctx; | 707 | ctx = vma->vm_userfaultfd_ctx.ctx; |
| 701 | if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE)) | 708 | if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE)) |
| 702 | return; | 709 | return true; |
| 703 | 710 | ||
| 704 | userfaultfd_ctx_get(ctx); | 711 | userfaultfd_ctx_get(ctx); |
| 705 | up_read(&mm->mmap_sem); | 712 | up_read(&mm->mmap_sem); |
| 706 | 713 | ||
| 707 | *prev = NULL; /* We wait for ACK w/o the mmap semaphore */ | ||
| 708 | |||
| 709 | msg_init(&ewq.msg); | 714 | msg_init(&ewq.msg); |
| 710 | 715 | ||
| 711 | ewq.msg.event = UFFD_EVENT_REMOVE; | 716 | ewq.msg.event = UFFD_EVENT_REMOVE; |
| @@ -714,7 +719,7 @@ void userfaultfd_remove(struct vm_area_struct *vma, | |||
| 714 | 719 | ||
| 715 | userfaultfd_event_wait_completion(ctx, &ewq); | 720 | userfaultfd_event_wait_completion(ctx, &ewq); |
| 716 | 721 | ||
| 717 | down_read(&mm->mmap_sem); | 722 | return false; |
| 718 | } | 723 | } |
| 719 | 724 | ||
| 720 | static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps, | 725 | static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps, |
| @@ -775,34 +780,6 @@ void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf) | |||
| 775 | } | 780 | } |
| 776 | } | 781 | } |
| 777 | 782 | ||
| 778 | void userfaultfd_exit(struct mm_struct *mm) | ||
| 779 | { | ||
| 780 | struct vm_area_struct *vma = mm->mmap; | ||
| 781 | |||
| 782 | /* | ||
| 783 | * We can do the vma walk without locking because the caller | ||
| 784 | * (exit_mm) knows it now has exclusive access | ||
| 785 | */ | ||
| 786 | while (vma) { | ||
| 787 | struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx; | ||
| 788 | |||
| 789 | if (ctx && (ctx->features & UFFD_FEATURE_EVENT_EXIT)) { | ||
| 790 | struct userfaultfd_wait_queue ewq; | ||
| 791 | |||
| 792 | userfaultfd_ctx_get(ctx); | ||
| 793 | |||
| 794 | msg_init(&ewq.msg); | ||
| 795 | ewq.msg.event = UFFD_EVENT_EXIT; | ||
| 796 | |||
| 797 | userfaultfd_event_wait_completion(ctx, &ewq); | ||
| 798 | |||
| 799 | ctx->features &= ~UFFD_FEATURE_EVENT_EXIT; | ||
| 800 | } | ||
| 801 | |||
| 802 | vma = vma->vm_next; | ||
| 803 | } | ||
| 804 | } | ||
| 805 | |||
| 806 | static int userfaultfd_release(struct inode *inode, struct file *file) | 783 | static int userfaultfd_release(struct inode *inode, struct file *file) |
| 807 | { | 784 | { |
| 808 | struct userfaultfd_ctx *ctx = file->private_data; | 785 | struct userfaultfd_ctx *ctx = file->private_data; |
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index 2dfdc62f795e..70a5b55e0870 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c | |||
| @@ -25,24 +25,6 @@ | |||
| 25 | #include "kmem.h" | 25 | #include "kmem.h" |
| 26 | #include "xfs_message.h" | 26 | #include "xfs_message.h" |
| 27 | 27 | ||
| 28 | /* | ||
| 29 | * Greedy allocation. May fail and may return vmalloced memory. | ||
| 30 | */ | ||
| 31 | void * | ||
| 32 | kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize) | ||
| 33 | { | ||
| 34 | void *ptr; | ||
| 35 | size_t kmsize = maxsize; | ||
| 36 | |||
| 37 | while (!(ptr = vzalloc(kmsize))) { | ||
| 38 | if ((kmsize >>= 1) <= minsize) | ||
| 39 | kmsize = minsize; | ||
| 40 | } | ||
| 41 | if (ptr) | ||
| 42 | *size = kmsize; | ||
| 43 | return ptr; | ||
| 44 | } | ||
| 45 | |||
| 46 | void * | 28 | void * |
| 47 | kmem_alloc(size_t size, xfs_km_flags_t flags) | 29 | kmem_alloc(size_t size, xfs_km_flags_t flags) |
| 48 | { | 30 | { |
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 689f746224e7..f0fc84fcaac2 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h | |||
| @@ -69,8 +69,6 @@ static inline void kmem_free(const void *ptr) | |||
| 69 | } | 69 | } |
| 70 | 70 | ||
| 71 | 71 | ||
| 72 | extern void *kmem_zalloc_greedy(size_t *, size_t, size_t); | ||
| 73 | |||
| 74 | static inline void * | 72 | static inline void * |
| 75 | kmem_zalloc(size_t size, xfs_km_flags_t flags) | 73 | kmem_zalloc(size_t size, xfs_km_flags_t flags) |
| 76 | { | 74 | { |
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index a9c66d47757a..9bd104f32908 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c | |||
| @@ -763,8 +763,8 @@ xfs_bmap_extents_to_btree( | |||
| 763 | args.type = XFS_ALLOCTYPE_START_BNO; | 763 | args.type = XFS_ALLOCTYPE_START_BNO; |
| 764 | args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); | 764 | args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); |
| 765 | } else if (dfops->dop_low) { | 765 | } else if (dfops->dop_low) { |
| 766 | try_another_ag: | ||
| 767 | args.type = XFS_ALLOCTYPE_START_BNO; | 766 | args.type = XFS_ALLOCTYPE_START_BNO; |
| 767 | try_another_ag: | ||
| 768 | args.fsbno = *firstblock; | 768 | args.fsbno = *firstblock; |
| 769 | } else { | 769 | } else { |
| 770 | args.type = XFS_ALLOCTYPE_NEAR_BNO; | 770 | args.type = XFS_ALLOCTYPE_NEAR_BNO; |
| @@ -790,13 +790,17 @@ try_another_ag: | |||
| 790 | if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) && | 790 | if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) && |
| 791 | args.fsbno == NULLFSBLOCK && | 791 | args.fsbno == NULLFSBLOCK && |
| 792 | args.type == XFS_ALLOCTYPE_NEAR_BNO) { | 792 | args.type == XFS_ALLOCTYPE_NEAR_BNO) { |
| 793 | dfops->dop_low = true; | 793 | args.type = XFS_ALLOCTYPE_FIRST_AG; |
| 794 | goto try_another_ag; | 794 | goto try_another_ag; |
| 795 | } | 795 | } |
| 796 | if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { | ||
| 797 | xfs_iroot_realloc(ip, -1, whichfork); | ||
| 798 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | ||
| 799 | return -ENOSPC; | ||
| 800 | } | ||
| 796 | /* | 801 | /* |
| 797 | * Allocation can't fail, the space was reserved. | 802 | * Allocation can't fail, the space was reserved. |
| 798 | */ | 803 | */ |
| 799 | ASSERT(args.fsbno != NULLFSBLOCK); | ||
| 800 | ASSERT(*firstblock == NULLFSBLOCK || | 804 | ASSERT(*firstblock == NULLFSBLOCK || |
| 801 | args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock)); | 805 | args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock)); |
| 802 | *firstblock = cur->bc_private.b.firstblock = args.fsbno; | 806 | *firstblock = cur->bc_private.b.firstblock = args.fsbno; |
| @@ -4150,6 +4154,19 @@ xfs_bmapi_read( | |||
| 4150 | return 0; | 4154 | return 0; |
| 4151 | } | 4155 | } |
| 4152 | 4156 | ||
| 4157 | /* | ||
| 4158 | * Add a delayed allocation extent to an inode. Blocks are reserved from the | ||
| 4159 | * global pool and the extent inserted into the inode in-core extent tree. | ||
| 4160 | * | ||
| 4161 | * On entry, got refers to the first extent beyond the offset of the extent to | ||
| 4162 | * allocate or eof is specified if no such extent exists. On return, got refers | ||
| 4163 | * to the extent record that was inserted to the inode fork. | ||
| 4164 | * | ||
| 4165 | * Note that the allocated extent may have been merged with contiguous extents | ||
| 4166 | * during insertion into the inode fork. Thus, got does not reflect the current | ||
| 4167 | * state of the inode fork on return. If necessary, the caller can use lastx to | ||
| 4168 | * look up the updated record in the inode fork. | ||
| 4169 | */ | ||
| 4153 | int | 4170 | int |
| 4154 | xfs_bmapi_reserve_delalloc( | 4171 | xfs_bmapi_reserve_delalloc( |
| 4155 | struct xfs_inode *ip, | 4172 | struct xfs_inode *ip, |
| @@ -4236,13 +4253,8 @@ xfs_bmapi_reserve_delalloc( | |||
| 4236 | got->br_startblock = nullstartblock(indlen); | 4253 | got->br_startblock = nullstartblock(indlen); |
| 4237 | got->br_blockcount = alen; | 4254 | got->br_blockcount = alen; |
| 4238 | got->br_state = XFS_EXT_NORM; | 4255 | got->br_state = XFS_EXT_NORM; |
| 4239 | xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got); | ||
| 4240 | 4256 | ||
| 4241 | /* | 4257 | xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got); |
| 4242 | * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay | ||
| 4243 | * might have merged it into one of the neighbouring ones. | ||
| 4244 | */ | ||
| 4245 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got); | ||
| 4246 | 4258 | ||
| 4247 | /* | 4259 | /* |
| 4248 | * Tag the inode if blocks were preallocated. Note that COW fork | 4260 | * Tag the inode if blocks were preallocated. Note that COW fork |
| @@ -4254,10 +4266,6 @@ xfs_bmapi_reserve_delalloc( | |||
| 4254 | if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len)) | 4266 | if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len)) |
| 4255 | xfs_inode_set_cowblocks_tag(ip); | 4267 | xfs_inode_set_cowblocks_tag(ip); |
| 4256 | 4268 | ||
| 4257 | ASSERT(got->br_startoff <= aoff); | ||
| 4258 | ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen); | ||
| 4259 | ASSERT(isnullstartblock(got->br_startblock)); | ||
| 4260 | ASSERT(got->br_state == XFS_EXT_NORM); | ||
| 4261 | return 0; | 4269 | return 0; |
| 4262 | 4270 | ||
| 4263 | out_unreserve_blocks: | 4271 | out_unreserve_blocks: |
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index f93072b58a58..fd55db479385 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c | |||
| @@ -447,8 +447,8 @@ xfs_bmbt_alloc_block( | |||
| 447 | 447 | ||
| 448 | if (args.fsbno == NULLFSBLOCK) { | 448 | if (args.fsbno == NULLFSBLOCK) { |
| 449 | args.fsbno = be64_to_cpu(start->l); | 449 | args.fsbno = be64_to_cpu(start->l); |
| 450 | try_another_ag: | ||
| 451 | args.type = XFS_ALLOCTYPE_START_BNO; | 450 | args.type = XFS_ALLOCTYPE_START_BNO; |
| 451 | try_another_ag: | ||
| 452 | /* | 452 | /* |
| 453 | * Make sure there is sufficient room left in the AG to | 453 | * Make sure there is sufficient room left in the AG to |
| 454 | * complete a full tree split for an extent insert. If | 454 | * complete a full tree split for an extent insert. If |
| @@ -488,8 +488,8 @@ try_another_ag: | |||
| 488 | if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) && | 488 | if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) && |
| 489 | args.fsbno == NULLFSBLOCK && | 489 | args.fsbno == NULLFSBLOCK && |
| 490 | args.type == XFS_ALLOCTYPE_NEAR_BNO) { | 490 | args.type == XFS_ALLOCTYPE_NEAR_BNO) { |
| 491 | cur->bc_private.b.dfops->dop_low = true; | ||
| 492 | args.fsbno = cur->bc_private.b.firstblock; | 491 | args.fsbno = cur->bc_private.b.firstblock; |
| 492 | args.type = XFS_ALLOCTYPE_FIRST_AG; | ||
| 493 | goto try_another_ag; | 493 | goto try_another_ag; |
| 494 | } | 494 | } |
| 495 | 495 | ||
| @@ -506,7 +506,7 @@ try_another_ag: | |||
| 506 | goto error0; | 506 | goto error0; |
| 507 | cur->bc_private.b.dfops->dop_low = true; | 507 | cur->bc_private.b.dfops->dop_low = true; |
| 508 | } | 508 | } |
| 509 | if (args.fsbno == NULLFSBLOCK) { | 509 | if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { |
| 510 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | 510 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); |
| 511 | *stat = 0; | 511 | *stat = 0; |
| 512 | return 0; | 512 | return 0; |
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index bf65a9ea8642..61494295d92f 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
| @@ -274,54 +274,49 @@ xfs_end_io( | |||
| 274 | struct xfs_ioend *ioend = | 274 | struct xfs_ioend *ioend = |
| 275 | container_of(work, struct xfs_ioend, io_work); | 275 | container_of(work, struct xfs_ioend, io_work); |
| 276 | struct xfs_inode *ip = XFS_I(ioend->io_inode); | 276 | struct xfs_inode *ip = XFS_I(ioend->io_inode); |
| 277 | xfs_off_t offset = ioend->io_offset; | ||
| 278 | size_t size = ioend->io_size; | ||
| 277 | int error = ioend->io_bio->bi_error; | 279 | int error = ioend->io_bio->bi_error; |
| 278 | 280 | ||
| 279 | /* | 281 | /* |
| 280 | * Set an error if the mount has shut down and proceed with end I/O | 282 | * Just clean up the in-memory strutures if the fs has been shut down. |
| 281 | * processing so it can perform whatever cleanups are necessary. | ||
| 282 | */ | 283 | */ |
| 283 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 284 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
| 284 | error = -EIO; | 285 | error = -EIO; |
| 286 | goto done; | ||
| 287 | } | ||
| 285 | 288 | ||
| 286 | /* | 289 | /* |
| 287 | * For a CoW extent, we need to move the mapping from the CoW fork | 290 | * Clean up any COW blocks on an I/O error. |
| 288 | * to the data fork. If instead an error happened, just dump the | ||
| 289 | * new blocks. | ||
| 290 | */ | 291 | */ |
| 291 | if (ioend->io_type == XFS_IO_COW) { | 292 | if (unlikely(error)) { |
| 292 | if (error) | 293 | switch (ioend->io_type) { |
| 293 | goto done; | 294 | case XFS_IO_COW: |
| 294 | if (ioend->io_bio->bi_error) { | 295 | xfs_reflink_cancel_cow_range(ip, offset, size, true); |
| 295 | error = xfs_reflink_cancel_cow_range(ip, | 296 | break; |
| 296 | ioend->io_offset, ioend->io_size); | ||
| 297 | goto done; | ||
| 298 | } | 297 | } |
| 299 | error = xfs_reflink_end_cow(ip, ioend->io_offset, | 298 | |
| 300 | ioend->io_size); | 299 | goto done; |
| 301 | if (error) | ||
| 302 | goto done; | ||
| 303 | } | 300 | } |
| 304 | 301 | ||
| 305 | /* | 302 | /* |
| 306 | * For unwritten extents we need to issue transactions to convert a | 303 | * Success: commit the COW or unwritten blocks if needed. |
| 307 | * range to normal written extens after the data I/O has finished. | ||
| 308 | * Detecting and handling completion IO errors is done individually | ||
| 309 | * for each case as different cleanup operations need to be performed | ||
| 310 | * on error. | ||
| 311 | */ | 304 | */ |
| 312 | if (ioend->io_type == XFS_IO_UNWRITTEN) { | 305 | switch (ioend->io_type) { |
| 313 | if (error) | 306 | case XFS_IO_COW: |
| 314 | goto done; | 307 | error = xfs_reflink_end_cow(ip, offset, size); |
| 315 | error = xfs_iomap_write_unwritten(ip, ioend->io_offset, | 308 | break; |
| 316 | ioend->io_size); | 309 | case XFS_IO_UNWRITTEN: |
| 317 | } else if (ioend->io_append_trans) { | 310 | error = xfs_iomap_write_unwritten(ip, offset, size); |
| 318 | error = xfs_setfilesize_ioend(ioend, error); | 311 | break; |
| 319 | } else { | 312 | default: |
| 320 | ASSERT(!xfs_ioend_is_append(ioend) || | 313 | ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans); |
| 321 | ioend->io_type == XFS_IO_COW); | 314 | break; |
| 322 | } | 315 | } |
| 323 | 316 | ||
| 324 | done: | 317 | done: |
| 318 | if (ioend->io_append_trans) | ||
| 319 | error = xfs_setfilesize_ioend(ioend, error); | ||
| 325 | xfs_destroy_ioend(ioend, error); | 320 | xfs_destroy_ioend(ioend, error); |
| 326 | } | 321 | } |
| 327 | 322 | ||
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 7234b9748c36..3531f8f72fa5 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c | |||
| @@ -1608,7 +1608,7 @@ xfs_inode_free_cowblocks( | |||
| 1608 | xfs_ilock(ip, XFS_IOLOCK_EXCL); | 1608 | xfs_ilock(ip, XFS_IOLOCK_EXCL); |
| 1609 | xfs_ilock(ip, XFS_MMAPLOCK_EXCL); | 1609 | xfs_ilock(ip, XFS_MMAPLOCK_EXCL); |
| 1610 | 1610 | ||
| 1611 | ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF); | 1611 | ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); |
| 1612 | 1612 | ||
| 1613 | xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); | 1613 | xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); |
| 1614 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | 1614 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index edfa6a55b064..7eaf1ef74e3c 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
| @@ -1615,7 +1615,7 @@ xfs_itruncate_extents( | |||
| 1615 | 1615 | ||
| 1616 | /* Remove all pending CoW reservations. */ | 1616 | /* Remove all pending CoW reservations. */ |
| 1617 | error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block, | 1617 | error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block, |
| 1618 | last_block); | 1618 | last_block, true); |
| 1619 | if (error) | 1619 | if (error) |
| 1620 | goto out; | 1620 | goto out; |
| 1621 | 1621 | ||
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 41662fb14e87..288ee5b840d7 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
| @@ -630,6 +630,11 @@ retry: | |||
| 630 | goto out_unlock; | 630 | goto out_unlock; |
| 631 | } | 631 | } |
| 632 | 632 | ||
| 633 | /* | ||
| 634 | * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch | ||
| 635 | * them out if the write happens to fail. | ||
| 636 | */ | ||
| 637 | iomap->flags = IOMAP_F_NEW; | ||
| 633 | trace_xfs_iomap_alloc(ip, offset, count, 0, &got); | 638 | trace_xfs_iomap_alloc(ip, offset, count, 0, &got); |
| 634 | done: | 639 | done: |
| 635 | if (isnullstartblock(got.br_startblock)) | 640 | if (isnullstartblock(got.br_startblock)) |
| @@ -1071,16 +1076,22 @@ xfs_file_iomap_end_delalloc( | |||
| 1071 | struct xfs_inode *ip, | 1076 | struct xfs_inode *ip, |
| 1072 | loff_t offset, | 1077 | loff_t offset, |
| 1073 | loff_t length, | 1078 | loff_t length, |
| 1074 | ssize_t written) | 1079 | ssize_t written, |
| 1080 | struct iomap *iomap) | ||
| 1075 | { | 1081 | { |
| 1076 | struct xfs_mount *mp = ip->i_mount; | 1082 | struct xfs_mount *mp = ip->i_mount; |
| 1077 | xfs_fileoff_t start_fsb; | 1083 | xfs_fileoff_t start_fsb; |
| 1078 | xfs_fileoff_t end_fsb; | 1084 | xfs_fileoff_t end_fsb; |
| 1079 | int error = 0; | 1085 | int error = 0; |
| 1080 | 1086 | ||
| 1081 | /* behave as if the write failed if drop writes is enabled */ | 1087 | /* |
| 1082 | if (xfs_mp_drop_writes(mp)) | 1088 | * Behave as if the write failed if drop writes is enabled. Set the NEW |
| 1089 | * flag to force delalloc cleanup. | ||
| 1090 | */ | ||
| 1091 | if (xfs_mp_drop_writes(mp)) { | ||
| 1092 | iomap->flags |= IOMAP_F_NEW; | ||
| 1083 | written = 0; | 1093 | written = 0; |
| 1094 | } | ||
| 1084 | 1095 | ||
| 1085 | /* | 1096 | /* |
| 1086 | * start_fsb refers to the first unused block after a short write. If | 1097 | * start_fsb refers to the first unused block after a short write. If |
| @@ -1094,14 +1105,14 @@ xfs_file_iomap_end_delalloc( | |||
| 1094 | end_fsb = XFS_B_TO_FSB(mp, offset + length); | 1105 | end_fsb = XFS_B_TO_FSB(mp, offset + length); |
| 1095 | 1106 | ||
| 1096 | /* | 1107 | /* |
| 1097 | * Trim back delalloc blocks if we didn't manage to write the whole | 1108 | * Trim delalloc blocks if they were allocated by this write and we |
| 1098 | * range reserved. | 1109 | * didn't manage to write the whole range. |
| 1099 | * | 1110 | * |
| 1100 | * We don't need to care about racing delalloc as we hold i_mutex | 1111 | * We don't need to care about racing delalloc as we hold i_mutex |
| 1101 | * across the reserve/allocate/unreserve calls. If there are delalloc | 1112 | * across the reserve/allocate/unreserve calls. If there are delalloc |
| 1102 | * blocks in the range, they are ours. | 1113 | * blocks in the range, they are ours. |
| 1103 | */ | 1114 | */ |
| 1104 | if (start_fsb < end_fsb) { | 1115 | if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) { |
| 1105 | truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb), | 1116 | truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb), |
| 1106 | XFS_FSB_TO_B(mp, end_fsb) - 1); | 1117 | XFS_FSB_TO_B(mp, end_fsb) - 1); |
| 1107 | 1118 | ||
| @@ -1131,7 +1142,7 @@ xfs_file_iomap_end( | |||
| 1131 | { | 1142 | { |
| 1132 | if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC) | 1143 | if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC) |
| 1133 | return xfs_file_iomap_end_delalloc(XFS_I(inode), offset, | 1144 | return xfs_file_iomap_end_delalloc(XFS_I(inode), offset, |
| 1134 | length, written); | 1145 | length, written, iomap); |
| 1135 | return 0; | 1146 | return 0; |
| 1136 | } | 1147 | } |
| 1137 | 1148 | ||
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 66e881790c17..2a6d9b1558e0 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c | |||
| @@ -361,7 +361,6 @@ xfs_bulkstat( | |||
| 361 | xfs_agino_t agino; /* inode # in allocation group */ | 361 | xfs_agino_t agino; /* inode # in allocation group */ |
| 362 | xfs_agnumber_t agno; /* allocation group number */ | 362 | xfs_agnumber_t agno; /* allocation group number */ |
| 363 | xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ | 363 | xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ |
| 364 | size_t irbsize; /* size of irec buffer in bytes */ | ||
| 365 | xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ | 364 | xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ |
| 366 | int nirbuf; /* size of irbuf */ | 365 | int nirbuf; /* size of irbuf */ |
| 367 | int ubcount; /* size of user's buffer */ | 366 | int ubcount; /* size of user's buffer */ |
| @@ -388,11 +387,10 @@ xfs_bulkstat( | |||
| 388 | *ubcountp = 0; | 387 | *ubcountp = 0; |
| 389 | *done = 0; | 388 | *done = 0; |
| 390 | 389 | ||
| 391 | irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4); | 390 | irbuf = kmem_zalloc_large(PAGE_SIZE * 4, KM_SLEEP); |
| 392 | if (!irbuf) | 391 | if (!irbuf) |
| 393 | return -ENOMEM; | 392 | return -ENOMEM; |
| 394 | 393 | nirbuf = (PAGE_SIZE * 4) / sizeof(*irbuf); | |
| 395 | nirbuf = irbsize / sizeof(*irbuf); | ||
| 396 | 394 | ||
| 397 | /* | 395 | /* |
| 398 | * Loop over the allocation groups, starting from the last | 396 | * Loop over the allocation groups, starting from the last |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 450bde68bb75..688ebff1f663 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
| @@ -513,8 +513,7 @@ STATIC void | |||
| 513 | xfs_set_inoalignment(xfs_mount_t *mp) | 513 | xfs_set_inoalignment(xfs_mount_t *mp) |
| 514 | { | 514 | { |
| 515 | if (xfs_sb_version_hasalign(&mp->m_sb) && | 515 | if (xfs_sb_version_hasalign(&mp->m_sb) && |
| 516 | mp->m_sb.sb_inoalignmt >= | 516 | mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp)) |
| 517 | XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) | ||
| 518 | mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1; | 517 | mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1; |
| 519 | else | 518 | else |
| 520 | mp->m_inoalign_mask = 0; | 519 | mp->m_inoalign_mask = 0; |
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index da6d08fb359c..4a84c5ea266d 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c | |||
| @@ -548,14 +548,18 @@ xfs_reflink_trim_irec_to_next_cow( | |||
| 548 | } | 548 | } |
| 549 | 549 | ||
| 550 | /* | 550 | /* |
| 551 | * Cancel all pending CoW reservations for some block range of an inode. | 551 | * Cancel CoW reservations for some block range of an inode. |
| 552 | * | ||
| 553 | * If cancel_real is true this function cancels all COW fork extents for the | ||
| 554 | * inode; if cancel_real is false, real extents are not cleared. | ||
| 552 | */ | 555 | */ |
| 553 | int | 556 | int |
| 554 | xfs_reflink_cancel_cow_blocks( | 557 | xfs_reflink_cancel_cow_blocks( |
| 555 | struct xfs_inode *ip, | 558 | struct xfs_inode *ip, |
| 556 | struct xfs_trans **tpp, | 559 | struct xfs_trans **tpp, |
| 557 | xfs_fileoff_t offset_fsb, | 560 | xfs_fileoff_t offset_fsb, |
| 558 | xfs_fileoff_t end_fsb) | 561 | xfs_fileoff_t end_fsb, |
| 562 | bool cancel_real) | ||
| 559 | { | 563 | { |
| 560 | struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); | 564 | struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); |
| 561 | struct xfs_bmbt_irec got, del; | 565 | struct xfs_bmbt_irec got, del; |
| @@ -579,7 +583,7 @@ xfs_reflink_cancel_cow_blocks( | |||
| 579 | &idx, &got, &del); | 583 | &idx, &got, &del); |
| 580 | if (error) | 584 | if (error) |
| 581 | break; | 585 | break; |
| 582 | } else { | 586 | } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) { |
| 583 | xfs_trans_ijoin(*tpp, ip, 0); | 587 | xfs_trans_ijoin(*tpp, ip, 0); |
| 584 | xfs_defer_init(&dfops, &firstfsb); | 588 | xfs_defer_init(&dfops, &firstfsb); |
| 585 | 589 | ||
| @@ -621,13 +625,17 @@ xfs_reflink_cancel_cow_blocks( | |||
| 621 | } | 625 | } |
| 622 | 626 | ||
| 623 | /* | 627 | /* |
| 624 | * Cancel all pending CoW reservations for some byte range of an inode. | 628 | * Cancel CoW reservations for some byte range of an inode. |
| 629 | * | ||
| 630 | * If cancel_real is true this function cancels all COW fork extents for the | ||
| 631 | * inode; if cancel_real is false, real extents are not cleared. | ||
| 625 | */ | 632 | */ |
| 626 | int | 633 | int |
| 627 | xfs_reflink_cancel_cow_range( | 634 | xfs_reflink_cancel_cow_range( |
| 628 | struct xfs_inode *ip, | 635 | struct xfs_inode *ip, |
| 629 | xfs_off_t offset, | 636 | xfs_off_t offset, |
| 630 | xfs_off_t count) | 637 | xfs_off_t count, |
| 638 | bool cancel_real) | ||
| 631 | { | 639 | { |
| 632 | struct xfs_trans *tp; | 640 | struct xfs_trans *tp; |
| 633 | xfs_fileoff_t offset_fsb; | 641 | xfs_fileoff_t offset_fsb; |
| @@ -653,7 +661,8 @@ xfs_reflink_cancel_cow_range( | |||
| 653 | xfs_trans_ijoin(tp, ip, 0); | 661 | xfs_trans_ijoin(tp, ip, 0); |
| 654 | 662 | ||
| 655 | /* Scrape out the old CoW reservations */ | 663 | /* Scrape out the old CoW reservations */ |
| 656 | error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb); | 664 | error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb, |
| 665 | cancel_real); | ||
| 657 | if (error) | 666 | if (error) |
| 658 | goto out_cancel; | 667 | goto out_cancel; |
| 659 | 668 | ||
| @@ -1450,7 +1459,7 @@ next: | |||
| 1450 | * We didn't find any shared blocks so turn off the reflink flag. | 1459 | * We didn't find any shared blocks so turn off the reflink flag. |
| 1451 | * First, get rid of any leftover CoW mappings. | 1460 | * First, get rid of any leftover CoW mappings. |
| 1452 | */ | 1461 | */ |
| 1453 | error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF); | 1462 | error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true); |
| 1454 | if (error) | 1463 | if (error) |
| 1455 | return error; | 1464 | return error; |
| 1456 | 1465 | ||
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index 33ac9b8db683..d29a7967f029 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h | |||
| @@ -39,9 +39,9 @@ extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip, | |||
| 39 | 39 | ||
| 40 | extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip, | 40 | extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip, |
| 41 | struct xfs_trans **tpp, xfs_fileoff_t offset_fsb, | 41 | struct xfs_trans **tpp, xfs_fileoff_t offset_fsb, |
| 42 | xfs_fileoff_t end_fsb); | 42 | xfs_fileoff_t end_fsb, bool cancel_real); |
| 43 | extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset, | 43 | extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset, |
| 44 | xfs_off_t count); | 44 | xfs_off_t count, bool cancel_real); |
| 45 | extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, | 45 | extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, |
| 46 | xfs_off_t count); | 46 | xfs_off_t count); |
| 47 | extern int xfs_reflink_recover_cow(struct xfs_mount *mp); | 47 | extern int xfs_reflink_recover_cow(struct xfs_mount *mp); |
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 890862f2447c..685c042a120f 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
| @@ -953,7 +953,7 @@ xfs_fs_destroy_inode( | |||
| 953 | XFS_STATS_INC(ip->i_mount, vn_remove); | 953 | XFS_STATS_INC(ip->i_mount, vn_remove); |
| 954 | 954 | ||
| 955 | if (xfs_is_reflink_inode(ip)) { | 955 | if (xfs_is_reflink_inode(ip)) { |
| 956 | error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF); | 956 | error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); |
| 957 | if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) | 957 | if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) |
| 958 | xfs_warn(ip->i_mount, | 958 | xfs_warn(ip->i_mount, |
| 959 | "Error %d while evicting CoW blocks for inode %llu.", | 959 | "Error %d while evicting CoW blocks for inode %llu.", |
