diff options
author | Dave Kleikamp <dave.kleikamp@oracle.com> | 2018-11-27 14:31:30 -0500 |
---|---|---|
committer | Trond Myklebust <trond.myklebust@hammerspace.com> | 2018-12-02 09:43:56 -0500 |
commit | ad3cba223ac02dc769c3bbe88efe277bbb457566 (patch) | |
tree | e0b84128743530bb33f2bbd640cbaf7b5b185e88 /fs/nfs/direct.c | |
parent | 320f35b7bf8cccf1997ca3126843535e1b95e9c4 (diff) |
nfs: don't dirty kernel pages read by direct-io
When we use direct_IO with an NFS backing store, we can trigger a
WARNING in __set_page_dirty(), as below, since we're dirtying the page
unnecessarily in nfs_direct_read_completion().
To fix, replicate the logic in commit 53cbf3b157a0 ("fs: direct-io:
don't dirtying pages for ITER_BVEC/ITER_KVEC direct read").
Other filesystems that implement direct_IO handle this; most use
blockdev_direct_IO(). ceph and cifs have similar logic.
mount 127.0.0.1:/export /nfs
dd if=/dev/zero of=/nfs/image bs=1M count=200
losetup --direct-io=on -f /nfs/image
mkfs.btrfs /dev/loop0
mount -t btrfs /dev/loop0 /mnt/
kernel: WARNING: CPU: 0 PID: 8067 at fs/buffer.c:580 __set_page_dirty+0xaf/0xd0
kernel: Modules linked in: loop(E) nfsv3(E) rpcsec_gss_krb5(E) nfsv4(E) dns_resolver(E) nfs(E) fscache(E) nfsd(E) auth_rpcgss(E) nfs_acl(E) lockd(E) grace(E) fuse(E) tun(E) ip6t_rpfilter(E) ipt_REJECT(E) nf_
kernel: snd_seq(E) snd_seq_device(E) snd_pcm(E) video(E) snd_timer(E) snd(E) soundcore(E) ip_tables(E) xfs(E) libcrc32c(E) sd_mod(E) sr_mod(E) cdrom(E) ata_generic(E) pata_acpi(E) crc32c_intel(E) ahci(E) li
kernel: CPU: 0 PID: 8067 Comm: kworker/0:2 Tainted: G E 4.20.0-rc1.master.20181111.ol7.x86_64 #1
kernel: Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
kernel: Workqueue: nfsiod rpc_async_release [sunrpc]
kernel: RIP: 0010:__set_page_dirty+0xaf/0xd0
kernel: Code: c3 48 8b 02 f6 c4 04 74 d4 48 89 df e8 ba 05 f7 ff 48 89 c6 eb cb 48 8b 43 08 a8 01 75 1f 48 89 d8 48 8b 00 a8 04 74 02 eb 87 <0f> 0b eb 83 48 83 e8 01 eb 9f 48 83 ea 01 0f 1f 00 eb 8b 48 83 e8
kernel: RSP: 0000:ffffc1c8825b7d78 EFLAGS: 00013046
kernel: RAX: 000fffffc0020089 RBX: fffff2b603308b80 RCX: 0000000000000001
kernel: RDX: 0000000000000001 RSI: ffff9d11478115c8 RDI: ffff9d11478115d0
kernel: RBP: ffffc1c8825b7da0 R08: 0000646f6973666e R09: 8080808080808080
kernel: R10: 0000000000000001 R11: 0000000000000000 R12: ffff9d11478115d0
kernel: R13: ffff9d11478115c8 R14: 0000000000003246 R15: 0000000000000001
kernel: FS: 0000000000000000(0000) GS:ffff9d115ba00000(0000) knlGS:0000000000000000
kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
kernel: CR2: 00007f408686f640 CR3: 0000000104d8e004 CR4: 00000000000606f0
kernel: DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
kernel: DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
kernel: Call Trace:
kernel: __set_page_dirty_buffers+0xb6/0x110
kernel: set_page_dirty+0x52/0xb0
kernel: nfs_direct_read_completion+0xc4/0x120 [nfs]
kernel: nfs_pgio_release+0x10/0x20 [nfs]
kernel: rpc_free_task+0x30/0x70 [sunrpc]
kernel: rpc_async_release+0x12/0x20 [sunrpc]
kernel: process_one_work+0x174/0x390
kernel: worker_thread+0x4f/0x3e0
kernel: kthread+0x102/0x140
kernel: ? drain_workqueue+0x130/0x130
kernel: ? kthread_stop+0x110/0x110
kernel: ret_from_fork+0x35/0x40
kernel: ---[ end trace 01341980905412c9 ]---
Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
[forward-ported to v4.20]
Signed-off-by: Calum Mackay <calum.mackay@oracle.com>
Reviewed-by: Dave Kleikamp <dave.kleikamp@oracle.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Diffstat (limited to 'fs/nfs/direct.c')
-rw-r--r-- | fs/nfs/direct.c | 9 |
1 files changed, 8 insertions, 1 deletions
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index aa12c3063bae..33824a0a57bf 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -98,8 +98,11 @@ struct nfs_direct_req { | |||
98 | struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */ | 98 | struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */ |
99 | struct work_struct work; | 99 | struct work_struct work; |
100 | int flags; | 100 | int flags; |
101 | /* for write */ | ||
101 | #define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ | 102 | #define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ |
102 | #define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */ | 103 | #define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */ |
104 | /* for read */ | ||
105 | #define NFS_ODIRECT_SHOULD_DIRTY (3) /* dirty user-space page after read */ | ||
103 | struct nfs_writeverf verf; /* unstable write verifier */ | 106 | struct nfs_writeverf verf; /* unstable write verifier */ |
104 | }; | 107 | }; |
105 | 108 | ||
@@ -412,7 +415,8 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) | |||
412 | struct nfs_page *req = nfs_list_entry(hdr->pages.next); | 415 | struct nfs_page *req = nfs_list_entry(hdr->pages.next); |
413 | struct page *page = req->wb_page; | 416 | struct page *page = req->wb_page; |
414 | 417 | ||
415 | if (!PageCompound(page) && bytes < hdr->good_bytes) | 418 | if (!PageCompound(page) && bytes < hdr->good_bytes && |
419 | (dreq->flags == NFS_ODIRECT_SHOULD_DIRTY)) | ||
416 | set_page_dirty(page); | 420 | set_page_dirty(page); |
417 | bytes += req->wb_bytes; | 421 | bytes += req->wb_bytes; |
418 | nfs_list_remove_request(req); | 422 | nfs_list_remove_request(req); |
@@ -587,6 +591,9 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) | |||
587 | if (!is_sync_kiocb(iocb)) | 591 | if (!is_sync_kiocb(iocb)) |
588 | dreq->iocb = iocb; | 592 | dreq->iocb = iocb; |
589 | 593 | ||
594 | if (iter_is_iovec(iter)) | ||
595 | dreq->flags = NFS_ODIRECT_SHOULD_DIRTY; | ||
596 | |||
590 | nfs_start_io_direct(inode); | 597 | nfs_start_io_direct(inode); |
591 | 598 | ||
592 | NFS_I(inode)->read_io += count; | 599 | NFS_I(inode)->read_io += count; |