summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/file.c
diff options
context:
space:
mode:
authorChris Mason <clm@fb.com>2016-05-16 12:21:01 -0400
committerChris Mason <clm@fb.com>2016-05-26 16:23:59 -0400
commit56244ef151c3cd11f505020ab0b3f45454363bcc (patch)
tree4ee1dea828e6be4e0dd746dfe19d40a0790e303d /fs/btrfs/file.c
parent9257b4ca9dff074918164e4fa10e9001c62bc27a (diff)
Btrfs: fix handling of faults from btrfs_copy_from_user
When btrfs_copy_from_user isn't able to copy all of the pages, we need to adjust our accounting to reflect the work that was actually done. Commit 2e78c927d79 changed around the decisions a little and we ended up skipping the accounting adjustments some of the time. This commit makes sure that when we don't copy anything at all, we still hop into the adjustments, and switches to release_bytes instead of write_bytes, since write_bytes isn't aligned. The accounting errors led to warnings during btrfs_destroy_inode: [ 70.847532] WARNING: CPU: 10 PID: 514 at fs/btrfs/inode.c:9350 btrfs_destroy_inode+0x2b3/0x2c0 [ 70.847536] Modules linked in: i2c_piix4 virtio_net i2c_core input_leds button led_class serio_raw acpi_cpufreq sch_fq_codel autofs4 virtio_blk [ 70.847538] CPU: 10 PID: 514 Comm: umount Tainted: G W 4.6.0-rc6_00062_g2997da1-dirty #23 [ 70.847539] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.0-1.fc24 04/01/2014 [ 70.847542] 0000000000000000 ffff880ff5cafab8 ffffffff8149d5e9 0000000000000202 [ 70.847543] 0000000000000000 0000000000000000 0000000000000000 ffff880ff5cafb08 [ 70.847547] ffffffff8107bdfd ffff880ff5cafaf8 000024868120013d ffff880ff5cafb28 [ 70.847547] Call Trace: [ 70.847550] [<ffffffff8149d5e9>] dump_stack+0x51/0x78 [ 70.847551] [<ffffffff8107bdfd>] __warn+0xfd/0x120 [ 70.847553] [<ffffffff8107be3d>] warn_slowpath_null+0x1d/0x20 [ 70.847555] [<ffffffff8139c9e3>] btrfs_destroy_inode+0x2b3/0x2c0 [ 70.847556] [<ffffffff812003a1>] ? __destroy_inode+0x71/0x140 [ 70.847558] [<ffffffff812004b3>] destroy_inode+0x43/0x70 [ 70.847559] [<ffffffff810b7b5f>] ? wake_up_bit+0x2f/0x40 [ 70.847560] [<ffffffff81200c68>] evict+0x148/0x1d0 [ 70.847562] [<ffffffff81398ade>] ? start_transaction+0x3de/0x460 [ 70.847564] [<ffffffff81200d49>] dispose_list+0x59/0x80 [ 70.847565] [<ffffffff81201ba0>] evict_inodes+0x180/0x190 [ 70.847566] [<ffffffff812191ff>] ? __sync_filesystem+0x3f/0x50 [ 70.847568] [<ffffffff811e95f8>] generic_shutdown_super+0x48/0x100 [ 70.847569] [<ffffffff810b75c0>] ? woken_wake_function+0x20/0x20 [ 70.847571] [<ffffffff811e9796>] kill_anon_super+0x16/0x30 [ 70.847573] [<ffffffff81365cde>] btrfs_kill_super+0x1e/0x130 [ 70.847574] [<ffffffff811e99be>] deactivate_locked_super+0x4e/0x90 [ 70.847576] [<ffffffff811e9e61>] deactivate_super+0x51/0x70 [ 70.847577] [<ffffffff8120536f>] cleanup_mnt+0x3f/0x80 [ 70.847579] [<ffffffff81205402>] __cleanup_mnt+0x12/0x20 [ 70.847581] [<ffffffff81098358>] task_work_run+0x68/0xa0 [ 70.847582] [<ffffffff810022b6>] exit_to_usermode_loop+0xd6/0xe0 [ 70.847583] [<ffffffff81002e1d>] do_syscall_64+0xbd/0x170 [ 70.847586] [<ffffffff817d4dbc>] entry_SYSCALL64_slow_path+0x25/0x25 This is the test program I used to force short returns from btrfs_copy_from_user void *dontneed(void *arg) { char *p = arg; int ret; while(1) { ret = madvise(p, BUFSIZE/4, MADV_DONTNEED); if (ret) { perror("madvise"); exit(1); } } } int main(int ac, char **av) { int ret; int fd; char *filename; unsigned long offset; char *buf; int i; pthread_t tid; if (ac != 2) { fprintf(stderr, "usage: dammitdave filename\n"); exit(1); } buf = mmap(NULL, BUFSIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if (buf == MAP_FAILED) { perror("mmap"); exit(1); } memset(buf, 'a', BUFSIZE); filename = av[1]; ret = pthread_create(&tid, NULL, dontneed, buf); if (ret) { fprintf(stderr, "error %d from pthread_create\n", ret); exit(1); } ret = pthread_detach(tid); if (ret) { fprintf(stderr, "pthread detach failed %d\n", ret); exit(1); } while (1) { fd = open(filename, O_RDWR | O_CREAT, 0600); if (fd < 0) { perror("open"); exit(1); } for (i = 0; i < ROUNDS; i++) { int this_write = BUFSIZE; offset = rand() % MAXSIZE; ret = pwrite(fd, buf, this_write, offset); if (ret < 0) { perror("pwrite"); exit(1); } else if (ret != this_write) { fprintf(stderr, "short write to %s offset %lu ret %d\n", filename, offset, ret); exit(1); } if (i == ROUNDS - 1) { ret = sync_file_range(fd, offset, 4096, SYNC_FILE_RANGE_WRITE); if (ret < 0) { perror("sync_file_range"); exit(1); } } } ret = ftruncate(fd, 0); if (ret < 0) { perror("ftruncate"); exit(1); } ret = close(fd); if (ret) { perror("close"); exit(1); } ret = unlink(filename); if (ret) { perror("unlink"); exit(1); } } return 0; } Signed-off-by: Chris Mason <clm@fb.com> Reported-by: Dave Jones <dsj@fb.com> Fixes: 2e78c927d79333f299a8ac81c2fd2952caeef335 cc: stable@vger.kernel.org # v4.6 Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r--fs/btrfs/file.c27
1 files changed, 17 insertions, 10 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1933e8b07ed7..159a93450e26 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1596,6 +1596,13 @@ again:
1596 1596
1597 copied = btrfs_copy_from_user(pos, write_bytes, pages, i); 1597 copied = btrfs_copy_from_user(pos, write_bytes, pages, i);
1598 1598
1599 num_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
1600 reserve_bytes);
1601 dirty_sectors = round_up(copied + sector_offset,
1602 root->sectorsize);
1603 dirty_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
1604 dirty_sectors);
1605
1599 /* 1606 /*
1600 * if we have trouble faulting in the pages, fall 1607 * if we have trouble faulting in the pages, fall
1601 * back to one page at a time 1608 * back to one page at a time
@@ -1605,6 +1612,7 @@ again:
1605 1612
1606 if (copied == 0) { 1613 if (copied == 0) {
1607 force_page_uptodate = true; 1614 force_page_uptodate = true;
1615 dirty_sectors = 0;
1608 dirty_pages = 0; 1616 dirty_pages = 0;
1609 } else { 1617 } else {
1610 force_page_uptodate = false; 1618 force_page_uptodate = false;
@@ -1615,20 +1623,19 @@ again:
1615 /* 1623 /*
1616 * If we had a short copy we need to release the excess delaloc 1624 * If we had a short copy we need to release the excess delaloc
1617 * bytes we reserved. We need to increment outstanding_extents 1625 * bytes we reserved. We need to increment outstanding_extents
1618 * because btrfs_delalloc_release_space will decrement it, but 1626 * because btrfs_delalloc_release_space and
1627 * btrfs_delalloc_release_metadata will decrement it, but
1619 * we still have an outstanding extent for the chunk we actually 1628 * we still have an outstanding extent for the chunk we actually
1620 * managed to copy. 1629 * managed to copy.
1621 */ 1630 */
1622 num_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
1623 reserve_bytes);
1624 dirty_sectors = round_up(copied + sector_offset,
1625 root->sectorsize);
1626 dirty_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
1627 dirty_sectors);
1628
1629 if (num_sectors > dirty_sectors) { 1631 if (num_sectors > dirty_sectors) {
1630 release_bytes = (write_bytes - copied) 1632 /*
1631 & ~((u64)root->sectorsize - 1); 1633 * we round down because we don't want to count
1634 * any partial blocks actually sent through the
1635 * IO machines
1636 */
1637 release_bytes = round_down(release_bytes - copied,
1638 root->sectorsize);
1632 if (copied > 0) { 1639 if (copied > 0) {
1633 spin_lock(&BTRFS_I(inode)->lock); 1640 spin_lock(&BTRFS_I(inode)->lock);
1634 BTRFS_I(inode)->outstanding_extents++; 1641 BTRFS_I(inode)->outstanding_extents++;