diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/afs/rxrpc.c | 11 | ||||
-rw-r--r-- | fs/dax.c | 60 | ||||
-rw-r--r-- | fs/exec.c | 5 | ||||
-rw-r--r-- | fs/iomap.c | 53 | ||||
-rw-r--r-- | fs/nfs/callback_proc.c | 22 | ||||
-rw-r--r-- | fs/nfs/flexfilelayout/flexfilelayout.c | 21 | ||||
-rw-r--r-- | fs/nfs/flexfilelayout/flexfilelayout.h | 4 | ||||
-rw-r--r-- | fs/nfs/flexfilelayout/flexfilelayoutdev.c | 19 | ||||
-rw-r--r-- | fs/nfs/nfs42proc.c | 19 | ||||
-rw-r--r-- | fs/nfs/nfs4_fs.h | 2 | ||||
-rw-r--r-- | fs/nfs/nfs4state.c | 16 | ||||
-rw-r--r-- | fs/nilfs2/btnode.c | 4 | ||||
-rw-r--r-- | fs/read_write.c | 15 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c | 5 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ialloc_btree.c | 11 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.c | 10 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.h | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_buf_item.c | 28 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_reflink.c | 18 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h | 5 |
21 files changed, 219 insertions, 114 deletions
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 59970886690f..a7b44863d502 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c | |||
@@ -576,6 +576,7 @@ static long afs_wait_for_call_to_complete(struct afs_call *call, | |||
576 | { | 576 | { |
577 | signed long rtt2, timeout; | 577 | signed long rtt2, timeout; |
578 | long ret; | 578 | long ret; |
579 | bool stalled = false; | ||
579 | u64 rtt; | 580 | u64 rtt; |
580 | u32 life, last_life; | 581 | u32 life, last_life; |
581 | 582 | ||
@@ -609,12 +610,20 @@ static long afs_wait_for_call_to_complete(struct afs_call *call, | |||
609 | 610 | ||
610 | life = rxrpc_kernel_check_life(call->net->socket, call->rxcall); | 611 | life = rxrpc_kernel_check_life(call->net->socket, call->rxcall); |
611 | if (timeout == 0 && | 612 | if (timeout == 0 && |
612 | life == last_life && signal_pending(current)) | 613 | life == last_life && signal_pending(current)) { |
614 | if (stalled) | ||
613 | break; | 615 | break; |
616 | __set_current_state(TASK_RUNNING); | ||
617 | rxrpc_kernel_probe_life(call->net->socket, call->rxcall); | ||
618 | timeout = rtt2; | ||
619 | stalled = true; | ||
620 | continue; | ||
621 | } | ||
614 | 622 | ||
615 | if (life != last_life) { | 623 | if (life != last_life) { |
616 | timeout = rtt2; | 624 | timeout = rtt2; |
617 | last_life = life; | 625 | last_life = life; |
626 | stalled = false; | ||
618 | } | 627 | } |
619 | 628 | ||
620 | timeout = schedule_timeout(timeout); | 629 | timeout = schedule_timeout(timeout); |
@@ -98,12 +98,6 @@ static void *dax_make_entry(pfn_t pfn, unsigned long flags) | |||
98 | return xa_mk_value(flags | (pfn_t_to_pfn(pfn) << DAX_SHIFT)); | 98 | return xa_mk_value(flags | (pfn_t_to_pfn(pfn) << DAX_SHIFT)); |
99 | } | 99 | } |
100 | 100 | ||
101 | static void *dax_make_page_entry(struct page *page) | ||
102 | { | ||
103 | pfn_t pfn = page_to_pfn_t(page); | ||
104 | return dax_make_entry(pfn, PageHead(page) ? DAX_PMD : 0); | ||
105 | } | ||
106 | |||
107 | static bool dax_is_locked(void *entry) | 101 | static bool dax_is_locked(void *entry) |
108 | { | 102 | { |
109 | return xa_to_value(entry) & DAX_LOCKED; | 103 | return xa_to_value(entry) & DAX_LOCKED; |
@@ -116,12 +110,12 @@ static unsigned int dax_entry_order(void *entry) | |||
116 | return 0; | 110 | return 0; |
117 | } | 111 | } |
118 | 112 | ||
119 | static int dax_is_pmd_entry(void *entry) | 113 | static unsigned long dax_is_pmd_entry(void *entry) |
120 | { | 114 | { |
121 | return xa_to_value(entry) & DAX_PMD; | 115 | return xa_to_value(entry) & DAX_PMD; |
122 | } | 116 | } |
123 | 117 | ||
124 | static int dax_is_pte_entry(void *entry) | 118 | static bool dax_is_pte_entry(void *entry) |
125 | { | 119 | { |
126 | return !(xa_to_value(entry) & DAX_PMD); | 120 | return !(xa_to_value(entry) & DAX_PMD); |
127 | } | 121 | } |
@@ -222,9 +216,8 @@ static void *get_unlocked_entry(struct xa_state *xas) | |||
222 | ewait.wait.func = wake_exceptional_entry_func; | 216 | ewait.wait.func = wake_exceptional_entry_func; |
223 | 217 | ||
224 | for (;;) { | 218 | for (;;) { |
225 | entry = xas_load(xas); | 219 | entry = xas_find_conflict(xas); |
226 | if (!entry || xa_is_internal(entry) || | 220 | if (!entry || WARN_ON_ONCE(!xa_is_value(entry)) || |
227 | WARN_ON_ONCE(!xa_is_value(entry)) || | ||
228 | !dax_is_locked(entry)) | 221 | !dax_is_locked(entry)) |
229 | return entry; | 222 | return entry; |
230 | 223 | ||
@@ -255,6 +248,7 @@ static void dax_unlock_entry(struct xa_state *xas, void *entry) | |||
255 | { | 248 | { |
256 | void *old; | 249 | void *old; |
257 | 250 | ||
251 | BUG_ON(dax_is_locked(entry)); | ||
258 | xas_reset(xas); | 252 | xas_reset(xas); |
259 | xas_lock_irq(xas); | 253 | xas_lock_irq(xas); |
260 | old = xas_store(xas, entry); | 254 | old = xas_store(xas, entry); |
@@ -352,16 +346,27 @@ static struct page *dax_busy_page(void *entry) | |||
352 | return NULL; | 346 | return NULL; |
353 | } | 347 | } |
354 | 348 | ||
349 | /* | ||
350 | * dax_lock_mapping_entry - Lock the DAX entry corresponding to a page | ||
351 | * @page: The page whose entry we want to lock | ||
352 | * | ||
353 | * Context: Process context. | ||
354 | * Return: %true if the entry was locked or does not need to be locked. | ||
355 | */ | ||
355 | bool dax_lock_mapping_entry(struct page *page) | 356 | bool dax_lock_mapping_entry(struct page *page) |
356 | { | 357 | { |
357 | XA_STATE(xas, NULL, 0); | 358 | XA_STATE(xas, NULL, 0); |
358 | void *entry; | 359 | void *entry; |
360 | bool locked; | ||
359 | 361 | ||
362 | /* Ensure page->mapping isn't freed while we look at it */ | ||
363 | rcu_read_lock(); | ||
360 | for (;;) { | 364 | for (;;) { |
361 | struct address_space *mapping = READ_ONCE(page->mapping); | 365 | struct address_space *mapping = READ_ONCE(page->mapping); |
362 | 366 | ||
367 | locked = false; | ||
363 | if (!dax_mapping(mapping)) | 368 | if (!dax_mapping(mapping)) |
364 | return false; | 369 | break; |
365 | 370 | ||
366 | /* | 371 | /* |
367 | * In the device-dax case there's no need to lock, a | 372 | * In the device-dax case there's no need to lock, a |
@@ -370,8 +375,9 @@ bool dax_lock_mapping_entry(struct page *page) | |||
370 | * otherwise we would not have a valid pfn_to_page() | 375 | * otherwise we would not have a valid pfn_to_page() |
371 | * translation. | 376 | * translation. |
372 | */ | 377 | */ |
378 | locked = true; | ||
373 | if (S_ISCHR(mapping->host->i_mode)) | 379 | if (S_ISCHR(mapping->host->i_mode)) |
374 | return true; | 380 | break; |
375 | 381 | ||
376 | xas.xa = &mapping->i_pages; | 382 | xas.xa = &mapping->i_pages; |
377 | xas_lock_irq(&xas); | 383 | xas_lock_irq(&xas); |
@@ -382,28 +388,35 @@ bool dax_lock_mapping_entry(struct page *page) | |||
382 | xas_set(&xas, page->index); | 388 | xas_set(&xas, page->index); |
383 | entry = xas_load(&xas); | 389 | entry = xas_load(&xas); |
384 | if (dax_is_locked(entry)) { | 390 | if (dax_is_locked(entry)) { |
391 | rcu_read_unlock(); | ||
385 | entry = get_unlocked_entry(&xas); | 392 | entry = get_unlocked_entry(&xas); |
386 | /* Did the page move while we slept? */ | 393 | xas_unlock_irq(&xas); |
387 | if (dax_to_pfn(entry) != page_to_pfn(page)) { | 394 | put_unlocked_entry(&xas, entry); |
388 | xas_unlock_irq(&xas); | 395 | rcu_read_lock(); |
389 | continue; | 396 | continue; |
390 | } | ||
391 | } | 397 | } |
392 | dax_lock_entry(&xas, entry); | 398 | dax_lock_entry(&xas, entry); |
393 | xas_unlock_irq(&xas); | 399 | xas_unlock_irq(&xas); |
394 | return true; | 400 | break; |
395 | } | 401 | } |
402 | rcu_read_unlock(); | ||
403 | return locked; | ||
396 | } | 404 | } |
397 | 405 | ||
398 | void dax_unlock_mapping_entry(struct page *page) | 406 | void dax_unlock_mapping_entry(struct page *page) |
399 | { | 407 | { |
400 | struct address_space *mapping = page->mapping; | 408 | struct address_space *mapping = page->mapping; |
401 | XA_STATE(xas, &mapping->i_pages, page->index); | 409 | XA_STATE(xas, &mapping->i_pages, page->index); |
410 | void *entry; | ||
402 | 411 | ||
403 | if (S_ISCHR(mapping->host->i_mode)) | 412 | if (S_ISCHR(mapping->host->i_mode)) |
404 | return; | 413 | return; |
405 | 414 | ||
406 | dax_unlock_entry(&xas, dax_make_page_entry(page)); | 415 | rcu_read_lock(); |
416 | entry = xas_load(&xas); | ||
417 | rcu_read_unlock(); | ||
418 | entry = dax_make_entry(page_to_pfn_t(page), dax_is_pmd_entry(entry)); | ||
419 | dax_unlock_entry(&xas, entry); | ||
407 | } | 420 | } |
408 | 421 | ||
409 | /* | 422 | /* |
@@ -445,11 +458,9 @@ static void *grab_mapping_entry(struct xa_state *xas, | |||
445 | retry: | 458 | retry: |
446 | xas_lock_irq(xas); | 459 | xas_lock_irq(xas); |
447 | entry = get_unlocked_entry(xas); | 460 | entry = get_unlocked_entry(xas); |
448 | if (xa_is_internal(entry)) | ||
449 | goto fallback; | ||
450 | 461 | ||
451 | if (entry) { | 462 | if (entry) { |
452 | if (WARN_ON_ONCE(!xa_is_value(entry))) { | 463 | if (!xa_is_value(entry)) { |
453 | xas_set_err(xas, EIO); | 464 | xas_set_err(xas, EIO); |
454 | goto out_unlock; | 465 | goto out_unlock; |
455 | } | 466 | } |
@@ -1628,8 +1639,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order) | |||
1628 | /* Did we race with someone splitting entry or so? */ | 1639 | /* Did we race with someone splitting entry or so? */ |
1629 | if (!entry || | 1640 | if (!entry || |
1630 | (order == 0 && !dax_is_pte_entry(entry)) || | 1641 | (order == 0 && !dax_is_pte_entry(entry)) || |
1631 | (order == PMD_ORDER && (xa_is_internal(entry) || | 1642 | (order == PMD_ORDER && !dax_is_pmd_entry(entry))) { |
1632 | !dax_is_pmd_entry(entry)))) { | ||
1633 | put_unlocked_entry(&xas, entry); | 1643 | put_unlocked_entry(&xas, entry); |
1634 | xas_unlock_irq(&xas); | 1644 | xas_unlock_irq(&xas); |
1635 | trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf, | 1645 | trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf, |
@@ -62,6 +62,7 @@ | |||
62 | #include <linux/oom.h> | 62 | #include <linux/oom.h> |
63 | #include <linux/compat.h> | 63 | #include <linux/compat.h> |
64 | #include <linux/vmalloc.h> | 64 | #include <linux/vmalloc.h> |
65 | #include <linux/freezer.h> | ||
65 | 66 | ||
66 | #include <linux/uaccess.h> | 67 | #include <linux/uaccess.h> |
67 | #include <asm/mmu_context.h> | 68 | #include <asm/mmu_context.h> |
@@ -1083,7 +1084,7 @@ static int de_thread(struct task_struct *tsk) | |||
1083 | while (sig->notify_count) { | 1084 | while (sig->notify_count) { |
1084 | __set_current_state(TASK_KILLABLE); | 1085 | __set_current_state(TASK_KILLABLE); |
1085 | spin_unlock_irq(lock); | 1086 | spin_unlock_irq(lock); |
1086 | schedule(); | 1087 | freezable_schedule(); |
1087 | if (unlikely(__fatal_signal_pending(tsk))) | 1088 | if (unlikely(__fatal_signal_pending(tsk))) |
1088 | goto killed; | 1089 | goto killed; |
1089 | spin_lock_irq(lock); | 1090 | spin_lock_irq(lock); |
@@ -1111,7 +1112,7 @@ static int de_thread(struct task_struct *tsk) | |||
1111 | __set_current_state(TASK_KILLABLE); | 1112 | __set_current_state(TASK_KILLABLE); |
1112 | write_unlock_irq(&tasklist_lock); | 1113 | write_unlock_irq(&tasklist_lock); |
1113 | cgroup_threadgroup_change_end(tsk); | 1114 | cgroup_threadgroup_change_end(tsk); |
1114 | schedule(); | 1115 | freezable_schedule(); |
1115 | if (unlikely(__fatal_signal_pending(tsk))) | 1116 | if (unlikely(__fatal_signal_pending(tsk))) |
1116 | goto killed; | 1117 | goto killed; |
1117 | } | 1118 | } |
diff --git a/fs/iomap.c b/fs/iomap.c index 64ce240217a1..3ffb776fbebe 100644 --- a/fs/iomap.c +++ b/fs/iomap.c | |||
@@ -142,13 +142,14 @@ static void | |||
142 | iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop, | 142 | iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop, |
143 | loff_t *pos, loff_t length, unsigned *offp, unsigned *lenp) | 143 | loff_t *pos, loff_t length, unsigned *offp, unsigned *lenp) |
144 | { | 144 | { |
145 | loff_t orig_pos = *pos; | ||
146 | loff_t isize = i_size_read(inode); | ||
145 | unsigned block_bits = inode->i_blkbits; | 147 | unsigned block_bits = inode->i_blkbits; |
146 | unsigned block_size = (1 << block_bits); | 148 | unsigned block_size = (1 << block_bits); |
147 | unsigned poff = offset_in_page(*pos); | 149 | unsigned poff = offset_in_page(*pos); |
148 | unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length); | 150 | unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length); |
149 | unsigned first = poff >> block_bits; | 151 | unsigned first = poff >> block_bits; |
150 | unsigned last = (poff + plen - 1) >> block_bits; | 152 | unsigned last = (poff + plen - 1) >> block_bits; |
151 | unsigned end = offset_in_page(i_size_read(inode)) >> block_bits; | ||
152 | 153 | ||
153 | /* | 154 | /* |
154 | * If the block size is smaller than the page size we need to check the | 155 | * If the block size is smaller than the page size we need to check the |
@@ -183,8 +184,12 @@ iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop, | |||
183 | * handle both halves separately so that we properly zero data in the | 184 | * handle both halves separately so that we properly zero data in the |
184 | * page cache for blocks that are entirely outside of i_size. | 185 | * page cache for blocks that are entirely outside of i_size. |
185 | */ | 186 | */ |
186 | if (first <= end && last > end) | 187 | if (orig_pos <= isize && orig_pos + length > isize) { |
187 | plen -= (last - end) * block_size; | 188 | unsigned end = offset_in_page(isize - 1) >> block_bits; |
189 | |||
190 | if (first <= end && last > end) | ||
191 | plen -= (last - end) * block_size; | ||
192 | } | ||
188 | 193 | ||
189 | *offp = poff; | 194 | *offp = poff; |
190 | *lenp = plen; | 195 | *lenp = plen; |
@@ -1580,7 +1585,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, | |||
1580 | struct bio *bio; | 1585 | struct bio *bio; |
1581 | bool need_zeroout = false; | 1586 | bool need_zeroout = false; |
1582 | bool use_fua = false; | 1587 | bool use_fua = false; |
1583 | int nr_pages, ret; | 1588 | int nr_pages, ret = 0; |
1584 | size_t copied = 0; | 1589 | size_t copied = 0; |
1585 | 1590 | ||
1586 | if ((pos | length | align) & ((1 << blkbits) - 1)) | 1591 | if ((pos | length | align) & ((1 << blkbits) - 1)) |
@@ -1596,12 +1601,13 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, | |||
1596 | 1601 | ||
1597 | if (iomap->flags & IOMAP_F_NEW) { | 1602 | if (iomap->flags & IOMAP_F_NEW) { |
1598 | need_zeroout = true; | 1603 | need_zeroout = true; |
1599 | } else { | 1604 | } else if (iomap->type == IOMAP_MAPPED) { |
1600 | /* | 1605 | /* |
1601 | * Use a FUA write if we need datasync semantics, this | 1606 | * Use a FUA write if we need datasync semantics, this is a pure |
1602 | * is a pure data IO that doesn't require any metadata | 1607 | * data IO that doesn't require any metadata updates (including |
1603 | * updates and the underlying device supports FUA. This | 1608 | * after IO completion such as unwritten extent conversion) and |
1604 | * allows us to avoid cache flushes on IO completion. | 1609 | * the underlying device supports FUA. This allows us to avoid |
1610 | * cache flushes on IO completion. | ||
1605 | */ | 1611 | */ |
1606 | if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) && | 1612 | if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) && |
1607 | (dio->flags & IOMAP_DIO_WRITE_FUA) && | 1613 | (dio->flags & IOMAP_DIO_WRITE_FUA) && |
@@ -1644,8 +1650,14 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, | |||
1644 | 1650 | ||
1645 | ret = bio_iov_iter_get_pages(bio, &iter); | 1651 | ret = bio_iov_iter_get_pages(bio, &iter); |
1646 | if (unlikely(ret)) { | 1652 | if (unlikely(ret)) { |
1653 | /* | ||
1654 | * We have to stop part way through an IO. We must fall | ||
1655 | * through to the sub-block tail zeroing here, otherwise | ||
1656 | * this short IO may expose stale data in the tail of | ||
1657 | * the block we haven't written data to. | ||
1658 | */ | ||
1647 | bio_put(bio); | 1659 | bio_put(bio); |
1648 | return copied ? copied : ret; | 1660 | goto zero_tail; |
1649 | } | 1661 | } |
1650 | 1662 | ||
1651 | n = bio->bi_iter.bi_size; | 1663 | n = bio->bi_iter.bi_size; |
@@ -1676,13 +1688,21 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, | |||
1676 | dio->submit.cookie = submit_bio(bio); | 1688 | dio->submit.cookie = submit_bio(bio); |
1677 | } while (nr_pages); | 1689 | } while (nr_pages); |
1678 | 1690 | ||
1679 | if (need_zeroout) { | 1691 | /* |
1692 | * We need to zeroout the tail of a sub-block write if the extent type | ||
1693 | * requires zeroing or the write extends beyond EOF. If we don't zero | ||
1694 | * the block tail in the latter case, we can expose stale data via mmap | ||
1695 | * reads of the EOF block. | ||
1696 | */ | ||
1697 | zero_tail: | ||
1698 | if (need_zeroout || | ||
1699 | ((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode))) { | ||
1680 | /* zero out from the end of the write to the end of the block */ | 1700 | /* zero out from the end of the write to the end of the block */ |
1681 | pad = pos & (fs_block_size - 1); | 1701 | pad = pos & (fs_block_size - 1); |
1682 | if (pad) | 1702 | if (pad) |
1683 | iomap_dio_zero(dio, iomap, pos, fs_block_size - pad); | 1703 | iomap_dio_zero(dio, iomap, pos, fs_block_size - pad); |
1684 | } | 1704 | } |
1685 | return copied; | 1705 | return copied ? copied : ret; |
1686 | } | 1706 | } |
1687 | 1707 | ||
1688 | static loff_t | 1708 | static loff_t |
@@ -1857,6 +1877,15 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, | |||
1857 | dio->wait_for_completion = true; | 1877 | dio->wait_for_completion = true; |
1858 | ret = 0; | 1878 | ret = 0; |
1859 | } | 1879 | } |
1880 | |||
1881 | /* | ||
1882 | * Splicing to pipes can fail on a full pipe. We have to | ||
1883 | * swallow this to make it look like a short IO | ||
1884 | * otherwise the higher splice layers will completely | ||
1885 | * mishandle the error and stop moving data. | ||
1886 | */ | ||
1887 | if (ret == -EFAULT) | ||
1888 | ret = 0; | ||
1860 | break; | 1889 | break; |
1861 | } | 1890 | } |
1862 | pos += ret; | 1891 | pos += ret; |
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 7b861bbc0b43..315967354954 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
@@ -686,20 +686,24 @@ __be32 nfs4_callback_offload(void *data, void *dummy, | |||
686 | { | 686 | { |
687 | struct cb_offloadargs *args = data; | 687 | struct cb_offloadargs *args = data; |
688 | struct nfs_server *server; | 688 | struct nfs_server *server; |
689 | struct nfs4_copy_state *copy; | 689 | struct nfs4_copy_state *copy, *tmp_copy; |
690 | bool found = false; | 690 | bool found = false; |
691 | 691 | ||
692 | copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS); | ||
693 | if (!copy) | ||
694 | return htonl(NFS4ERR_SERVERFAULT); | ||
695 | |||
692 | spin_lock(&cps->clp->cl_lock); | 696 | spin_lock(&cps->clp->cl_lock); |
693 | rcu_read_lock(); | 697 | rcu_read_lock(); |
694 | list_for_each_entry_rcu(server, &cps->clp->cl_superblocks, | 698 | list_for_each_entry_rcu(server, &cps->clp->cl_superblocks, |
695 | client_link) { | 699 | client_link) { |
696 | list_for_each_entry(copy, &server->ss_copies, copies) { | 700 | list_for_each_entry(tmp_copy, &server->ss_copies, copies) { |
697 | if (memcmp(args->coa_stateid.other, | 701 | if (memcmp(args->coa_stateid.other, |
698 | copy->stateid.other, | 702 | tmp_copy->stateid.other, |
699 | sizeof(args->coa_stateid.other))) | 703 | sizeof(args->coa_stateid.other))) |
700 | continue; | 704 | continue; |
701 | nfs4_copy_cb_args(copy, args); | 705 | nfs4_copy_cb_args(tmp_copy, args); |
702 | complete(©->completion); | 706 | complete(&tmp_copy->completion); |
703 | found = true; | 707 | found = true; |
704 | goto out; | 708 | goto out; |
705 | } | 709 | } |
@@ -707,15 +711,11 @@ __be32 nfs4_callback_offload(void *data, void *dummy, | |||
707 | out: | 711 | out: |
708 | rcu_read_unlock(); | 712 | rcu_read_unlock(); |
709 | if (!found) { | 713 | if (!found) { |
710 | copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS); | ||
711 | if (!copy) { | ||
712 | spin_unlock(&cps->clp->cl_lock); | ||
713 | return htonl(NFS4ERR_SERVERFAULT); | ||
714 | } | ||
715 | memcpy(©->stateid, &args->coa_stateid, NFS4_STATEID_SIZE); | 714 | memcpy(©->stateid, &args->coa_stateid, NFS4_STATEID_SIZE); |
716 | nfs4_copy_cb_args(copy, args); | 715 | nfs4_copy_cb_args(copy, args); |
717 | list_add_tail(©->copies, &cps->clp->pending_cb_stateids); | 716 | list_add_tail(©->copies, &cps->clp->pending_cb_stateids); |
718 | } | 717 | } else |
718 | kfree(copy); | ||
719 | spin_unlock(&cps->clp->cl_lock); | 719 | spin_unlock(&cps->clp->cl_lock); |
720 | 720 | ||
721 | return 0; | 721 | return 0; |
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 86bcba40ca61..74b36ed883ca 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c | |||
@@ -1361,12 +1361,7 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data) | |||
1361 | task)) | 1361 | task)) |
1362 | return; | 1362 | return; |
1363 | 1363 | ||
1364 | if (ff_layout_read_prepare_common(task, hdr)) | 1364 | ff_layout_read_prepare_common(task, hdr); |
1365 | return; | ||
1366 | |||
1367 | if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, | ||
1368 | hdr->args.lock_context, FMODE_READ) == -EIO) | ||
1369 | rpc_exit(task, -EIO); /* lost lock, terminate I/O */ | ||
1370 | } | 1365 | } |
1371 | 1366 | ||
1372 | static void ff_layout_read_call_done(struct rpc_task *task, void *data) | 1367 | static void ff_layout_read_call_done(struct rpc_task *task, void *data) |
@@ -1542,12 +1537,7 @@ static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data) | |||
1542 | task)) | 1537 | task)) |
1543 | return; | 1538 | return; |
1544 | 1539 | ||
1545 | if (ff_layout_write_prepare_common(task, hdr)) | 1540 | ff_layout_write_prepare_common(task, hdr); |
1546 | return; | ||
1547 | |||
1548 | if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, | ||
1549 | hdr->args.lock_context, FMODE_WRITE) == -EIO) | ||
1550 | rpc_exit(task, -EIO); /* lost lock, terminate I/O */ | ||
1551 | } | 1541 | } |
1552 | 1542 | ||
1553 | static void ff_layout_write_call_done(struct rpc_task *task, void *data) | 1543 | static void ff_layout_write_call_done(struct rpc_task *task, void *data) |
@@ -1742,6 +1732,10 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) | |||
1742 | fh = nfs4_ff_layout_select_ds_fh(lseg, idx); | 1732 | fh = nfs4_ff_layout_select_ds_fh(lseg, idx); |
1743 | if (fh) | 1733 | if (fh) |
1744 | hdr->args.fh = fh; | 1734 | hdr->args.fh = fh; |
1735 | |||
1736 | if (!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid)) | ||
1737 | goto out_failed; | ||
1738 | |||
1745 | /* | 1739 | /* |
1746 | * Note that if we ever decide to split across DSes, | 1740 | * Note that if we ever decide to split across DSes, |
1747 | * then we may need to handle dense-like offsets. | 1741 | * then we may need to handle dense-like offsets. |
@@ -1804,6 +1798,9 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) | |||
1804 | if (fh) | 1798 | if (fh) |
1805 | hdr->args.fh = fh; | 1799 | hdr->args.fh = fh; |
1806 | 1800 | ||
1801 | if (!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid)) | ||
1802 | goto out_failed; | ||
1803 | |||
1807 | /* | 1804 | /* |
1808 | * Note that if we ever decide to split across DSes, | 1805 | * Note that if we ever decide to split across DSes, |
1809 | * then we may need to handle dense-like offsets. | 1806 | * then we may need to handle dense-like offsets. |
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 411798346e48..de50a342d5a5 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h | |||
@@ -215,6 +215,10 @@ unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo, | |||
215 | unsigned int maxnum); | 215 | unsigned int maxnum); |
216 | struct nfs_fh * | 216 | struct nfs_fh * |
217 | nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx); | 217 | nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx); |
218 | int | ||
219 | nfs4_ff_layout_select_ds_stateid(struct pnfs_layout_segment *lseg, | ||
220 | u32 mirror_idx, | ||
221 | nfs4_stateid *stateid); | ||
218 | 222 | ||
219 | struct nfs4_pnfs_ds * | 223 | struct nfs4_pnfs_ds * |
220 | nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, | 224 | nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, |
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 74d8d5352438..d23347389626 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c | |||
@@ -370,6 +370,25 @@ out: | |||
370 | return fh; | 370 | return fh; |
371 | } | 371 | } |
372 | 372 | ||
373 | int | ||
374 | nfs4_ff_layout_select_ds_stateid(struct pnfs_layout_segment *lseg, | ||
375 | u32 mirror_idx, | ||
376 | nfs4_stateid *stateid) | ||
377 | { | ||
378 | struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx); | ||
379 | |||
380 | if (!ff_layout_mirror_valid(lseg, mirror, false)) { | ||
381 | pr_err_ratelimited("NFS: %s: No data server for mirror offset index %d\n", | ||
382 | __func__, mirror_idx); | ||
383 | goto out; | ||
384 | } | ||
385 | |||
386 | nfs4_stateid_copy(stateid, &mirror->stateid); | ||
387 | return 1; | ||
388 | out: | ||
389 | return 0; | ||
390 | } | ||
391 | |||
373 | /** | 392 | /** |
374 | * nfs4_ff_layout_prepare_ds - prepare a DS connection for an RPC call | 393 | * nfs4_ff_layout_prepare_ds - prepare a DS connection for an RPC call |
375 | * @lseg: the layout segment we're operating on | 394 | * @lseg: the layout segment we're operating on |
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index ac5b784a1de0..fed06fd9998d 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c | |||
@@ -137,31 +137,32 @@ static int handle_async_copy(struct nfs42_copy_res *res, | |||
137 | struct file *dst, | 137 | struct file *dst, |
138 | nfs4_stateid *src_stateid) | 138 | nfs4_stateid *src_stateid) |
139 | { | 139 | { |
140 | struct nfs4_copy_state *copy; | 140 | struct nfs4_copy_state *copy, *tmp_copy; |
141 | int status = NFS4_OK; | 141 | int status = NFS4_OK; |
142 | bool found_pending = false; | 142 | bool found_pending = false; |
143 | struct nfs_open_context *ctx = nfs_file_open_context(dst); | 143 | struct nfs_open_context *ctx = nfs_file_open_context(dst); |
144 | 144 | ||
145 | copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS); | ||
146 | if (!copy) | ||
147 | return -ENOMEM; | ||
148 | |||
145 | spin_lock(&server->nfs_client->cl_lock); | 149 | spin_lock(&server->nfs_client->cl_lock); |
146 | list_for_each_entry(copy, &server->nfs_client->pending_cb_stateids, | 150 | list_for_each_entry(tmp_copy, &server->nfs_client->pending_cb_stateids, |
147 | copies) { | 151 | copies) { |
148 | if (memcmp(&res->write_res.stateid, ©->stateid, | 152 | if (memcmp(&res->write_res.stateid, &tmp_copy->stateid, |
149 | NFS4_STATEID_SIZE)) | 153 | NFS4_STATEID_SIZE)) |
150 | continue; | 154 | continue; |
151 | found_pending = true; | 155 | found_pending = true; |
152 | list_del(©->copies); | 156 | list_del(&tmp_copy->copies); |
153 | break; | 157 | break; |
154 | } | 158 | } |
155 | if (found_pending) { | 159 | if (found_pending) { |
156 | spin_unlock(&server->nfs_client->cl_lock); | 160 | spin_unlock(&server->nfs_client->cl_lock); |
161 | kfree(copy); | ||
162 | copy = tmp_copy; | ||
157 | goto out; | 163 | goto out; |
158 | } | 164 | } |
159 | 165 | ||
160 | copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS); | ||
161 | if (!copy) { | ||
162 | spin_unlock(&server->nfs_client->cl_lock); | ||
163 | return -ENOMEM; | ||
164 | } | ||
165 | memcpy(©->stateid, &res->write_res.stateid, NFS4_STATEID_SIZE); | 166 | memcpy(©->stateid, &res->write_res.stateid, NFS4_STATEID_SIZE); |
166 | init_completion(©->completion); | 167 | init_completion(©->completion); |
167 | copy->parent_state = ctx->state; | 168 | copy->parent_state = ctx->state; |
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 8d59c9655ec4..1b994b527518 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
@@ -41,6 +41,8 @@ enum nfs4_client_state { | |||
41 | NFS4CLNT_MOVED, | 41 | NFS4CLNT_MOVED, |
42 | NFS4CLNT_LEASE_MOVED, | 42 | NFS4CLNT_LEASE_MOVED, |
43 | NFS4CLNT_DELEGATION_EXPIRED, | 43 | NFS4CLNT_DELEGATION_EXPIRED, |
44 | NFS4CLNT_RUN_MANAGER, | ||
45 | NFS4CLNT_DELEGRETURN_RUNNING, | ||
44 | }; | 46 | }; |
45 | 47 | ||
46 | #define NFS4_RENEW_TIMEOUT 0x01 | 48 | #define NFS4_RENEW_TIMEOUT 0x01 |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index ffea57885394..d8decf2ec48f 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -1210,6 +1210,7 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) | |||
1210 | struct task_struct *task; | 1210 | struct task_struct *task; |
1211 | char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1]; | 1211 | char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1]; |
1212 | 1212 | ||
1213 | set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); | ||
1213 | if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) | 1214 | if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) |
1214 | return; | 1215 | return; |
1215 | __module_get(THIS_MODULE); | 1216 | __module_get(THIS_MODULE); |
@@ -2503,6 +2504,7 @@ static void nfs4_state_manager(struct nfs_client *clp) | |||
2503 | 2504 | ||
2504 | /* Ensure exclusive access to NFSv4 state */ | 2505 | /* Ensure exclusive access to NFSv4 state */ |
2505 | do { | 2506 | do { |
2507 | clear_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); | ||
2506 | if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) { | 2508 | if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) { |
2507 | section = "purge state"; | 2509 | section = "purge state"; |
2508 | status = nfs4_purge_lease(clp); | 2510 | status = nfs4_purge_lease(clp); |
@@ -2593,14 +2595,18 @@ static void nfs4_state_manager(struct nfs_client *clp) | |||
2593 | } | 2595 | } |
2594 | 2596 | ||
2595 | nfs4_end_drain_session(clp); | 2597 | nfs4_end_drain_session(clp); |
2596 | if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) { | 2598 | nfs4_clear_state_manager_bit(clp); |
2597 | nfs_client_return_marked_delegations(clp); | 2599 | |
2598 | continue; | 2600 | if (!test_and_set_bit(NFS4CLNT_DELEGRETURN_RUNNING, &clp->cl_state)) { |
2601 | if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) { | ||
2602 | nfs_client_return_marked_delegations(clp); | ||
2603 | set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); | ||
2604 | } | ||
2605 | clear_bit(NFS4CLNT_DELEGRETURN_RUNNING, &clp->cl_state); | ||
2599 | } | 2606 | } |
2600 | 2607 | ||
2601 | nfs4_clear_state_manager_bit(clp); | ||
2602 | /* Did we race with an attempt to give us more work? */ | 2608 | /* Did we race with an attempt to give us more work? */ |
2603 | if (clp->cl_state == 0) | 2609 | if (!test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state)) |
2604 | return; | 2610 | return; |
2605 | if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) | 2611 | if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) |
2606 | return; | 2612 | return; |
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index de99db518571..f2129a5d9f23 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c | |||
@@ -266,9 +266,7 @@ void nilfs_btnode_abort_change_key(struct address_space *btnc, | |||
266 | return; | 266 | return; |
267 | 267 | ||
268 | if (nbh == NULL) { /* blocksize == pagesize */ | 268 | if (nbh == NULL) { /* blocksize == pagesize */ |
269 | xa_lock_irq(&btnc->i_pages); | 269 | xa_erase_irq(&btnc->i_pages, newkey); |
270 | __xa_erase(&btnc->i_pages, newkey); | ||
271 | xa_unlock_irq(&btnc->i_pages); | ||
272 | unlock_page(ctxt->bh->b_page); | 270 | unlock_page(ctxt->bh->b_page); |
273 | } else | 271 | } else |
274 | brelse(nbh); | 272 | brelse(nbh); |
diff --git a/fs/read_write.c b/fs/read_write.c index bfcb4ced5664..4dae0399c75a 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -2094,17 +2094,18 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) | |||
2094 | off = same->src_offset; | 2094 | off = same->src_offset; |
2095 | len = same->src_length; | 2095 | len = same->src_length; |
2096 | 2096 | ||
2097 | ret = -EISDIR; | ||
2098 | if (S_ISDIR(src->i_mode)) | 2097 | if (S_ISDIR(src->i_mode)) |
2099 | goto out; | 2098 | return -EISDIR; |
2100 | 2099 | ||
2101 | ret = -EINVAL; | ||
2102 | if (!S_ISREG(src->i_mode)) | 2100 | if (!S_ISREG(src->i_mode)) |
2103 | goto out; | 2101 | return -EINVAL; |
2102 | |||
2103 | if (!file->f_op->remap_file_range) | ||
2104 | return -EOPNOTSUPP; | ||
2104 | 2105 | ||
2105 | ret = remap_verify_area(file, off, len, false); | 2106 | ret = remap_verify_area(file, off, len, false); |
2106 | if (ret < 0) | 2107 | if (ret < 0) |
2107 | goto out; | 2108 | return ret; |
2108 | ret = 0; | 2109 | ret = 0; |
2109 | 2110 | ||
2110 | if (off + len > i_size_read(src)) | 2111 | if (off + len > i_size_read(src)) |
@@ -2147,10 +2148,8 @@ next_fdput: | |||
2147 | fdput(dst_fd); | 2148 | fdput(dst_fd); |
2148 | next_loop: | 2149 | next_loop: |
2149 | if (fatal_signal_pending(current)) | 2150 | if (fatal_signal_pending(current)) |
2150 | goto out; | 2151 | break; |
2151 | } | 2152 | } |
2152 | |||
2153 | out: | ||
2154 | return ret; | 2153 | return ret; |
2155 | } | 2154 | } |
2156 | EXPORT_SYMBOL(vfs_dedupe_file_range); | 2155 | EXPORT_SYMBOL(vfs_dedupe_file_range); |
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 74d7228e755b..19e921d1586f 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c | |||
@@ -1694,10 +1694,13 @@ xfs_bmap_add_extent_delay_real( | |||
1694 | case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: | 1694 | case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: |
1695 | /* | 1695 | /* |
1696 | * Filling in all of a previously delayed allocation extent. | 1696 | * Filling in all of a previously delayed allocation extent. |
1697 | * The right neighbor is contiguous, the left is not. | 1697 | * The right neighbor is contiguous, the left is not. Take care |
1698 | * with delay -> unwritten extent allocation here because the | ||
1699 | * delalloc record we are overwriting is always written. | ||
1698 | */ | 1700 | */ |
1699 | PREV.br_startblock = new->br_startblock; | 1701 | PREV.br_startblock = new->br_startblock; |
1700 | PREV.br_blockcount += RIGHT.br_blockcount; | 1702 | PREV.br_blockcount += RIGHT.br_blockcount; |
1703 | PREV.br_state = new->br_state; | ||
1701 | 1704 | ||
1702 | xfs_iext_next(ifp, &bma->icur); | 1705 | xfs_iext_next(ifp, &bma->icur); |
1703 | xfs_iext_remove(bma->ip, &bma->icur, state); | 1706 | xfs_iext_remove(bma->ip, &bma->icur, state); |
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 86c50208a143..7fbf8af0b159 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c | |||
@@ -538,15 +538,18 @@ xfs_inobt_rec_check_count( | |||
538 | 538 | ||
539 | static xfs_extlen_t | 539 | static xfs_extlen_t |
540 | xfs_inobt_max_size( | 540 | xfs_inobt_max_size( |
541 | struct xfs_mount *mp) | 541 | struct xfs_mount *mp, |
542 | xfs_agnumber_t agno) | ||
542 | { | 543 | { |
544 | xfs_agblock_t agblocks = xfs_ag_block_count(mp, agno); | ||
545 | |||
543 | /* Bail out if we're uninitialized, which can happen in mkfs. */ | 546 | /* Bail out if we're uninitialized, which can happen in mkfs. */ |
544 | if (mp->m_inobt_mxr[0] == 0) | 547 | if (mp->m_inobt_mxr[0] == 0) |
545 | return 0; | 548 | return 0; |
546 | 549 | ||
547 | return xfs_btree_calc_size(mp->m_inobt_mnr, | 550 | return xfs_btree_calc_size(mp->m_inobt_mnr, |
548 | (uint64_t)mp->m_sb.sb_agblocks * mp->m_sb.sb_inopblock / | 551 | (uint64_t)agblocks * mp->m_sb.sb_inopblock / |
549 | XFS_INODES_PER_CHUNK); | 552 | XFS_INODES_PER_CHUNK); |
550 | } | 553 | } |
551 | 554 | ||
552 | static int | 555 | static int |
@@ -594,7 +597,7 @@ xfs_finobt_calc_reserves( | |||
594 | if (error) | 597 | if (error) |
595 | return error; | 598 | return error; |
596 | 599 | ||
597 | *ask += xfs_inobt_max_size(mp); | 600 | *ask += xfs_inobt_max_size(mp, agno); |
598 | *used += tree_len; | 601 | *used += tree_len; |
599 | return 0; | 602 | return 0; |
600 | } | 603 | } |
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 5d263dfdb3bc..404e581f1ea1 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c | |||
@@ -1042,7 +1042,7 @@ out_trans_cancel: | |||
1042 | goto out_unlock; | 1042 | goto out_unlock; |
1043 | } | 1043 | } |
1044 | 1044 | ||
1045 | static int | 1045 | int |
1046 | xfs_flush_unmap_range( | 1046 | xfs_flush_unmap_range( |
1047 | struct xfs_inode *ip, | 1047 | struct xfs_inode *ip, |
1048 | xfs_off_t offset, | 1048 | xfs_off_t offset, |
@@ -1195,13 +1195,7 @@ xfs_prepare_shift( | |||
1195 | * Writeback and invalidate cache for the remainder of the file as we're | 1195 | * Writeback and invalidate cache for the remainder of the file as we're |
1196 | * about to shift down every extent from offset to EOF. | 1196 | * about to shift down every extent from offset to EOF. |
1197 | */ | 1197 | */ |
1198 | error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, offset, -1); | 1198 | error = xfs_flush_unmap_range(ip, offset, XFS_ISIZE(ip)); |
1199 | if (error) | ||
1200 | return error; | ||
1201 | error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, | ||
1202 | offset >> PAGE_SHIFT, -1); | ||
1203 | if (error) | ||
1204 | return error; | ||
1205 | 1199 | ||
1206 | /* | 1200 | /* |
1207 | * Clean out anything hanging around in the cow fork now that | 1201 | * Clean out anything hanging around in the cow fork now that |
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 87363d136bb6..7a78229cf1a7 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h | |||
@@ -80,4 +80,7 @@ int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip, | |||
80 | int whichfork, xfs_extnum_t *nextents, | 80 | int whichfork, xfs_extnum_t *nextents, |
81 | xfs_filblks_t *count); | 81 | xfs_filblks_t *count); |
82 | 82 | ||
83 | int xfs_flush_unmap_range(struct xfs_inode *ip, xfs_off_t offset, | ||
84 | xfs_off_t len); | ||
85 | |||
83 | #endif /* __XFS_BMAP_UTIL_H__ */ | 86 | #endif /* __XFS_BMAP_UTIL_H__ */ |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 12d8455bfbb2..010db5f8fb00 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -1233,9 +1233,23 @@ xfs_buf_iodone( | |||
1233 | } | 1233 | } |
1234 | 1234 | ||
1235 | /* | 1235 | /* |
1236 | * Requeue a failed buffer for writeback | 1236 | * Requeue a failed buffer for writeback. |
1237 | * | 1237 | * |
1238 | * Return true if the buffer has been re-queued properly, false otherwise | 1238 | * We clear the log item failed state here as well, but we have to be careful |
1239 | * about reference counts because the only active reference counts on the buffer | ||
1240 | * may be the failed log items. Hence if we clear the log item failed state | ||
1241 | * before queuing the buffer for IO we can release all active references to | ||
1242 | * the buffer and free it, leading to use after free problems in | ||
1243 | * xfs_buf_delwri_queue. It makes no difference to the buffer or log items which | ||
1244 | * order we process them in - the buffer is locked, and we own the buffer list | ||
1245 | * so nothing on them is going to change while we are performing this action. | ||
1246 | * | ||
1247 | * Hence we can safely queue the buffer for IO before we clear the failed log | ||
1248 | * item state, therefore always having an active reference to the buffer and | ||
1249 | * avoiding the transient zero-reference state that leads to use-after-free. | ||
1250 | * | ||
1251 | * Return true if the buffer was added to the buffer list, false if it was | ||
1252 | * already on the buffer list. | ||
1239 | */ | 1253 | */ |
1240 | bool | 1254 | bool |
1241 | xfs_buf_resubmit_failed_buffers( | 1255 | xfs_buf_resubmit_failed_buffers( |
@@ -1243,16 +1257,16 @@ xfs_buf_resubmit_failed_buffers( | |||
1243 | struct list_head *buffer_list) | 1257 | struct list_head *buffer_list) |
1244 | { | 1258 | { |
1245 | struct xfs_log_item *lip; | 1259 | struct xfs_log_item *lip; |
1260 | bool ret; | ||
1261 | |||
1262 | ret = xfs_buf_delwri_queue(bp, buffer_list); | ||
1246 | 1263 | ||
1247 | /* | 1264 | /* |
1248 | * Clear XFS_LI_FAILED flag from all items before resubmit | 1265 | * XFS_LI_FAILED set/clear is protected by ail_lock, caller of this |
1249 | * | ||
1250 | * XFS_LI_FAILED set/clear is protected by ail_lock, caller this | ||
1251 | * function already have it acquired | 1266 | * function already have it acquired |
1252 | */ | 1267 | */ |
1253 | list_for_each_entry(lip, &bp->b_li_list, li_bio_list) | 1268 | list_for_each_entry(lip, &bp->b_li_list, li_bio_list) |
1254 | xfs_clear_li_failed(lip); | 1269 | xfs_clear_li_failed(lip); |
1255 | 1270 | ||
1256 | /* Add this buffer back to the delayed write list */ | 1271 | return ret; |
1257 | return xfs_buf_delwri_queue(bp, buffer_list); | ||
1258 | } | 1272 | } |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 53c9ab8fb777..e47425071e65 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -920,7 +920,7 @@ out_unlock: | |||
920 | } | 920 | } |
921 | 921 | ||
922 | 922 | ||
923 | loff_t | 923 | STATIC loff_t |
924 | xfs_file_remap_range( | 924 | xfs_file_remap_range( |
925 | struct file *file_in, | 925 | struct file *file_in, |
926 | loff_t pos_in, | 926 | loff_t pos_in, |
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index ecdb086bc23e..322a852ce284 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c | |||
@@ -296,6 +296,7 @@ xfs_reflink_reserve_cow( | |||
296 | if (error) | 296 | if (error) |
297 | return error; | 297 | return error; |
298 | 298 | ||
299 | xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); | ||
299 | trace_xfs_reflink_cow_alloc(ip, &got); | 300 | trace_xfs_reflink_cow_alloc(ip, &got); |
300 | return 0; | 301 | return 0; |
301 | } | 302 | } |
@@ -1351,10 +1352,19 @@ xfs_reflink_remap_prep( | |||
1351 | if (ret) | 1352 | if (ret) |
1352 | goto out_unlock; | 1353 | goto out_unlock; |
1353 | 1354 | ||
1354 | /* Zap any page cache for the destination file's range. */ | 1355 | /* |
1355 | truncate_inode_pages_range(&inode_out->i_data, | 1356 | * If pos_out > EOF, we may have dirtied blocks between EOF and |
1356 | round_down(pos_out, PAGE_SIZE), | 1357 | * pos_out. In that case, we need to extend the flush and unmap to cover |
1357 | round_up(pos_out + *len, PAGE_SIZE) - 1); | 1358 | * from EOF to the end of the copy length. |
1359 | */ | ||
1360 | if (pos_out > XFS_ISIZE(dest)) { | ||
1361 | loff_t flen = *len + (pos_out - XFS_ISIZE(dest)); | ||
1362 | ret = xfs_flush_unmap_range(dest, XFS_ISIZE(dest), flen); | ||
1363 | } else { | ||
1364 | ret = xfs_flush_unmap_range(dest, pos_out, *len); | ||
1365 | } | ||
1366 | if (ret) | ||
1367 | goto out_unlock; | ||
1358 | 1368 | ||
1359 | return 1; | 1369 | return 1; |
1360 | out_unlock: | 1370 | out_unlock: |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 3043e5ed6495..8a6532aae779 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
@@ -280,7 +280,10 @@ DECLARE_EVENT_CLASS(xfs_buf_class, | |||
280 | ), | 280 | ), |
281 | TP_fast_assign( | 281 | TP_fast_assign( |
282 | __entry->dev = bp->b_target->bt_dev; | 282 | __entry->dev = bp->b_target->bt_dev; |
283 | __entry->bno = bp->b_bn; | 283 | if (bp->b_bn == XFS_BUF_DADDR_NULL) |
284 | __entry->bno = bp->b_maps[0].bm_bn; | ||
285 | else | ||
286 | __entry->bno = bp->b_bn; | ||
284 | __entry->nblks = bp->b_length; | 287 | __entry->nblks = bp->b_length; |
285 | __entry->hold = atomic_read(&bp->b_hold); | 288 | __entry->hold = atomic_read(&bp->b_hold); |
286 | __entry->pincount = atomic_read(&bp->b_pin_count); | 289 | __entry->pincount = atomic_read(&bp->b_pin_count); |