summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/ceph/Makefile2
-rw-r--r--fs/ceph/file.c35
-rw-r--r--fs/ceph/io.c163
-rw-r--r--fs/ceph/io.h12
-rw-r--r--fs/ceph/super.h4
5 files changed, 200 insertions, 16 deletions
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile
index a699e320393f..c1da294418d1 100644
--- a/fs/ceph/Makefile
+++ b/fs/ceph/Makefile
@@ -6,7 +6,7 @@
6obj-$(CONFIG_CEPH_FS) += ceph.o 6obj-$(CONFIG_CEPH_FS) += ceph.o
7 7
8ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ 8ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
9 export.o caps.o snap.o xattr.o quota.o \ 9 export.o caps.o snap.o xattr.o quota.o io.o \
10 mds_client.o mdsmap.o strings.o ceph_frag.o \ 10 mds_client.o mdsmap.o strings.o ceph_frag.o \
11 debugfs.o 11 debugfs.o
12 12
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 5182e1a49d6f..ff17a81bf2e2 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -15,6 +15,7 @@
15#include "super.h" 15#include "super.h"
16#include "mds_client.h" 16#include "mds_client.h"
17#include "cache.h" 17#include "cache.h"
18#include "io.h"
18 19
19static __le32 ceph_flags_sys2wire(u32 flags) 20static __le32 ceph_flags_sys2wire(u32 flags)
20{ 21{
@@ -930,7 +931,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
930 struct ceph_aio_request *aio_req = NULL; 931 struct ceph_aio_request *aio_req = NULL;
931 int num_pages = 0; 932 int num_pages = 0;
932 int flags; 933 int flags;
933 int ret; 934 int ret = 0;
934 struct timespec64 mtime = current_time(inode); 935 struct timespec64 mtime = current_time(inode);
935 size_t count = iov_iter_count(iter); 936 size_t count = iov_iter_count(iter);
936 loff_t pos = iocb->ki_pos; 937 loff_t pos = iocb->ki_pos;
@@ -944,11 +945,6 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
944 (write ? "write" : "read"), file, pos, (unsigned)count, 945 (write ? "write" : "read"), file, pos, (unsigned)count,
945 snapc, snapc ? snapc->seq : 0); 946 snapc, snapc ? snapc->seq : 0);
946 947
947 ret = filemap_write_and_wait_range(inode->i_mapping,
948 pos, pos + count - 1);
949 if (ret < 0)
950 return ret;
951
952 if (write) { 948 if (write) {
953 int ret2 = invalidate_inode_pages2_range(inode->i_mapping, 949 int ret2 = invalidate_inode_pages2_range(inode->i_mapping,
954 pos >> PAGE_SHIFT, 950 pos >> PAGE_SHIFT,
@@ -1284,12 +1280,16 @@ again:
1284 1280
1285 if (ci->i_inline_version == CEPH_INLINE_NONE) { 1281 if (ci->i_inline_version == CEPH_INLINE_NONE) {
1286 if (!retry_op && (iocb->ki_flags & IOCB_DIRECT)) { 1282 if (!retry_op && (iocb->ki_flags & IOCB_DIRECT)) {
1283 ceph_start_io_direct(inode);
1287 ret = ceph_direct_read_write(iocb, to, 1284 ret = ceph_direct_read_write(iocb, to,
1288 NULL, NULL); 1285 NULL, NULL);
1286 ceph_end_io_direct(inode);
1289 if (ret >= 0 && ret < len) 1287 if (ret >= 0 && ret < len)
1290 retry_op = CHECK_EOF; 1288 retry_op = CHECK_EOF;
1291 } else { 1289 } else {
1290 ceph_start_io_read(inode);
1292 ret = ceph_sync_read(iocb, to, &retry_op); 1291 ret = ceph_sync_read(iocb, to, &retry_op);
1292 ceph_end_io_read(inode);
1293 } 1293 }
1294 } else { 1294 } else {
1295 retry_op = READ_INLINE; 1295 retry_op = READ_INLINE;
@@ -1300,7 +1300,9 @@ again:
1300 inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, 1300 inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
1301 ceph_cap_string(got)); 1301 ceph_cap_string(got));
1302 ceph_add_rw_context(fi, &rw_ctx); 1302 ceph_add_rw_context(fi, &rw_ctx);
1303 ceph_start_io_read(inode);
1303 ret = generic_file_read_iter(iocb, to); 1304 ret = generic_file_read_iter(iocb, to);
1305 ceph_end_io_read(inode);
1304 ceph_del_rw_context(fi, &rw_ctx); 1306 ceph_del_rw_context(fi, &rw_ctx);
1305 } 1307 }
1306 dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", 1308 dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
@@ -1409,7 +1411,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
1409 return -ENOMEM; 1411 return -ENOMEM;
1410 1412
1411retry_snap: 1413retry_snap:
1412 inode_lock(inode); 1414 if (iocb->ki_flags & IOCB_DIRECT)
1415 ceph_start_io_direct(inode);
1416 else
1417 ceph_start_io_write(inode);
1413 1418
1414 /* We can write back this queue in page reclaim */ 1419 /* We can write back this queue in page reclaim */
1415 current->backing_dev_info = inode_to_bdi(inode); 1420 current->backing_dev_info = inode_to_bdi(inode);
@@ -1480,7 +1485,6 @@ retry_snap:
1480 (ci->i_ceph_flags & CEPH_I_ERROR_WRITE)) { 1485 (ci->i_ceph_flags & CEPH_I_ERROR_WRITE)) {
1481 struct ceph_snap_context *snapc; 1486 struct ceph_snap_context *snapc;
1482 struct iov_iter data; 1487 struct iov_iter data;
1483 inode_unlock(inode);
1484 1488
1485 spin_lock(&ci->i_ceph_lock); 1489 spin_lock(&ci->i_ceph_lock);
1486 if (__ceph_have_pending_cap_snap(ci)) { 1490 if (__ceph_have_pending_cap_snap(ci)) {
@@ -1497,11 +1501,14 @@ retry_snap:
1497 1501
1498 /* we might need to revert back to that point */ 1502 /* we might need to revert back to that point */
1499 data = *from; 1503 data = *from;
1500 if (iocb->ki_flags & IOCB_DIRECT) 1504 if (iocb->ki_flags & IOCB_DIRECT) {
1501 written = ceph_direct_read_write(iocb, &data, snapc, 1505 written = ceph_direct_read_write(iocb, &data, snapc,
1502 &prealloc_cf); 1506 &prealloc_cf);
1503 else 1507 ceph_end_io_direct(inode);
1508 } else {
1504 written = ceph_sync_write(iocb, &data, pos, snapc); 1509 written = ceph_sync_write(iocb, &data, pos, snapc);
1510 ceph_end_io_write(inode);
1511 }
1505 if (written > 0) 1512 if (written > 0)
1506 iov_iter_advance(from, written); 1513 iov_iter_advance(from, written);
1507 ceph_put_snap_context(snapc); 1514 ceph_put_snap_context(snapc);
@@ -1516,7 +1523,7 @@ retry_snap:
1516 written = generic_perform_write(file, from, pos); 1523 written = generic_perform_write(file, from, pos);
1517 if (likely(written >= 0)) 1524 if (likely(written >= 0))
1518 iocb->ki_pos = pos + written; 1525 iocb->ki_pos = pos + written;
1519 inode_unlock(inode); 1526 ceph_end_io_write(inode);
1520 } 1527 }
1521 1528
1522 if (written >= 0) { 1529 if (written >= 0) {
@@ -1551,9 +1558,11 @@ retry_snap:
1551 } 1558 }
1552 1559
1553 goto out_unlocked; 1560 goto out_unlocked;
1554
1555out: 1561out:
1556 inode_unlock(inode); 1562 if (iocb->ki_flags & IOCB_DIRECT)
1563 ceph_end_io_direct(inode);
1564 else
1565 ceph_end_io_write(inode);
1557out_unlocked: 1566out_unlocked:
1558 ceph_free_cap_flush(prealloc_cf); 1567 ceph_free_cap_flush(prealloc_cf);
1559 current->backing_dev_info = NULL; 1568 current->backing_dev_info = NULL;
diff --git a/fs/ceph/io.c b/fs/ceph/io.c
new file mode 100644
index 000000000000..97602ea92ff4
--- /dev/null
+++ b/fs/ceph/io.c
@@ -0,0 +1,163 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2016 Trond Myklebust
4 * Copyright (c) 2019 Jeff Layton
5 *
6 * I/O and data path helper functionality.
7 *
8 * Heavily borrowed from equivalent code in fs/nfs/io.c
9 */
10
11#include <linux/ceph/ceph_debug.h>
12
13#include <linux/types.h>
14#include <linux/kernel.h>
15#include <linux/rwsem.h>
16#include <linux/fs.h>
17
18#include "super.h"
19#include "io.h"
20
21/* Call with exclusively locked inode->i_rwsem */
22static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
23{
24 lockdep_assert_held_write(&inode->i_rwsem);
25
26 if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT) {
27 spin_lock(&ci->i_ceph_lock);
28 ci->i_ceph_flags &= ~CEPH_I_ODIRECT;
29 spin_unlock(&ci->i_ceph_lock);
30 inode_dio_wait(inode);
31 }
32}
33
34/**
35 * ceph_start_io_read - declare the file is being used for buffered reads
36 * @inode: file inode
37 *
38 * Declare that a buffered read operation is about to start, and ensure
39 * that we block all direct I/O.
40 * On exit, the function ensures that the CEPH_I_ODIRECT flag is unset,
41 * and holds a shared lock on inode->i_rwsem to ensure that the flag
42 * cannot be changed.
43 * In practice, this means that buffered read operations are allowed to
44 * execute in parallel, thanks to the shared lock, whereas direct I/O
45 * operations need to wait to grab an exclusive lock in order to set
46 * CEPH_I_ODIRECT.
47 * Note that buffered writes and truncates both take a write lock on
48 * inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
49 */
50void
51ceph_start_io_read(struct inode *inode)
52{
53 struct ceph_inode_info *ci = ceph_inode(inode);
54
55 /* Be an optimist! */
56 down_read(&inode->i_rwsem);
57 if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT))
58 return;
59 up_read(&inode->i_rwsem);
60 /* Slow path.... */
61 down_write(&inode->i_rwsem);
62 ceph_block_o_direct(ci, inode);
63 downgrade_write(&inode->i_rwsem);
64}
65
66/**
67 * ceph_end_io_read - declare that the buffered read operation is done
68 * @inode: file inode
69 *
70 * Declare that a buffered read operation is done, and release the shared
71 * lock on inode->i_rwsem.
72 */
73void
74ceph_end_io_read(struct inode *inode)
75{
76 up_read(&inode->i_rwsem);
77}
78
79/**
80 * ceph_start_io_write - declare the file is being used for buffered writes
81 * @inode: file inode
82 *
83 * Declare that a buffered write operation is about to start, and ensure
84 * that we block all direct I/O.
85 */
86void
87ceph_start_io_write(struct inode *inode)
88{
89 down_write(&inode->i_rwsem);
90 ceph_block_o_direct(ceph_inode(inode), inode);
91}
92
93/**
94 * ceph_end_io_write - declare that the buffered write operation is done
95 * @inode: file inode
96 *
97 * Declare that a buffered write operation is done, and release the
98 * lock on inode->i_rwsem.
99 */
100void
101ceph_end_io_write(struct inode *inode)
102{
103 up_write(&inode->i_rwsem);
104}
105
106/* Call with exclusively locked inode->i_rwsem */
107static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
108{
109 lockdep_assert_held_write(&inode->i_rwsem);
110
111 if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)) {
112 spin_lock(&ci->i_ceph_lock);
113 ci->i_ceph_flags |= CEPH_I_ODIRECT;
114 spin_unlock(&ci->i_ceph_lock);
115 /* FIXME: unmap_mapping_range? */
116 filemap_write_and_wait(inode->i_mapping);
117 }
118}
119
120/**
121 * ceph_end_io_direct - declare the file is being used for direct i/o
122 * @inode: file inode
123 *
124 * Declare that a direct I/O operation is about to start, and ensure
125 * that we block all buffered I/O.
126 * On exit, the function ensures that the CEPH_I_ODIRECT flag is set,
127 * and holds a shared lock on inode->i_rwsem to ensure that the flag
128 * cannot be changed.
129 * In practice, this means that direct I/O operations are allowed to
130 * execute in parallel, thanks to the shared lock, whereas buffered I/O
131 * operations need to wait to grab an exclusive lock in order to clear
132 * CEPH_I_ODIRECT.
133 * Note that buffered writes and truncates both take a write lock on
134 * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
135 */
136void
137ceph_start_io_direct(struct inode *inode)
138{
139 struct ceph_inode_info *ci = ceph_inode(inode);
140
141 /* Be an optimist! */
142 down_read(&inode->i_rwsem);
143 if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)
144 return;
145 up_read(&inode->i_rwsem);
146 /* Slow path.... */
147 down_write(&inode->i_rwsem);
148 ceph_block_buffered(ci, inode);
149 downgrade_write(&inode->i_rwsem);
150}
151
152/**
153 * ceph_end_io_direct - declare that the direct i/o operation is done
154 * @inode: file inode
155 *
156 * Declare that a direct I/O operation is done, and release the shared
157 * lock on inode->i_rwsem.
158 */
159void
160ceph_end_io_direct(struct inode *inode)
161{
162 up_read(&inode->i_rwsem);
163}
diff --git a/fs/ceph/io.h b/fs/ceph/io.h
new file mode 100644
index 000000000000..fa594cd77348
--- /dev/null
+++ b/fs/ceph/io.h
@@ -0,0 +1,12 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _FS_CEPH_IO_H
3#define _FS_CEPH_IO_H
4
5void ceph_start_io_read(struct inode *inode);
6void ceph_end_io_read(struct inode *inode);
7void ceph_start_io_write(struct inode *inode);
8void ceph_end_io_write(struct inode *inode);
9void ceph_start_io_direct(struct inode *inode);
10void ceph_end_io_direct(struct inode *inode);
11
12#endif /* FS_CEPH_IO_H */
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 03e4828c7635..98d7190289c8 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -513,10 +513,10 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
513#define CEPH_I_SEC_INITED (1 << 6) /* security initialized */ 513#define CEPH_I_SEC_INITED (1 << 6) /* security initialized */
514#define CEPH_I_CAP_DROPPED (1 << 7) /* caps were forcibly dropped */ 514#define CEPH_I_CAP_DROPPED (1 << 7) /* caps were forcibly dropped */
515#define CEPH_I_KICK_FLUSH (1 << 8) /* kick flushing caps */ 515#define CEPH_I_KICK_FLUSH (1 << 8) /* kick flushing caps */
516#define CEPH_I_FLUSH_SNAPS (1 << 9) /* need flush snapss */ 516#define CEPH_I_FLUSH_SNAPS (1 << 9) /* need flush snapss */
517#define CEPH_I_ERROR_WRITE (1 << 10) /* have seen write errors */ 517#define CEPH_I_ERROR_WRITE (1 << 10) /* have seen write errors */
518#define CEPH_I_ERROR_FILELOCK (1 << 11) /* have seen file lock errors */ 518#define CEPH_I_ERROR_FILELOCK (1 << 11) /* have seen file lock errors */
519 519#define CEPH_I_ODIRECT (1 << 12) /* inode in direct I/O mode */
520 520
521/* 521/*
522 * Masks of ceph inode work. 522 * Masks of ceph inode work.