diff options
-rw-r--r-- | fs/ceph/Makefile | 2 | ||||
-rw-r--r-- | fs/ceph/file.c | 35 | ||||
-rw-r--r-- | fs/ceph/io.c | 163 | ||||
-rw-r--r-- | fs/ceph/io.h | 12 | ||||
-rw-r--r-- | fs/ceph/super.h | 4 |
5 files changed, 200 insertions, 16 deletions
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile index a699e320393f..c1da294418d1 100644 --- a/fs/ceph/Makefile +++ b/fs/ceph/Makefile | |||
@@ -6,7 +6,7 @@ | |||
6 | obj-$(CONFIG_CEPH_FS) += ceph.o | 6 | obj-$(CONFIG_CEPH_FS) += ceph.o |
7 | 7 | ||
8 | ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ | 8 | ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ |
9 | export.o caps.o snap.o xattr.o quota.o \ | 9 | export.o caps.o snap.o xattr.o quota.o io.o \ |
10 | mds_client.o mdsmap.o strings.o ceph_frag.o \ | 10 | mds_client.o mdsmap.o strings.o ceph_frag.o \ |
11 | debugfs.o | 11 | debugfs.o |
12 | 12 | ||
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 5182e1a49d6f..ff17a81bf2e2 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include "super.h" | 15 | #include "super.h" |
16 | #include "mds_client.h" | 16 | #include "mds_client.h" |
17 | #include "cache.h" | 17 | #include "cache.h" |
18 | #include "io.h" | ||
18 | 19 | ||
19 | static __le32 ceph_flags_sys2wire(u32 flags) | 20 | static __le32 ceph_flags_sys2wire(u32 flags) |
20 | { | 21 | { |
@@ -930,7 +931,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, | |||
930 | struct ceph_aio_request *aio_req = NULL; | 931 | struct ceph_aio_request *aio_req = NULL; |
931 | int num_pages = 0; | 932 | int num_pages = 0; |
932 | int flags; | 933 | int flags; |
933 | int ret; | 934 | int ret = 0; |
934 | struct timespec64 mtime = current_time(inode); | 935 | struct timespec64 mtime = current_time(inode); |
935 | size_t count = iov_iter_count(iter); | 936 | size_t count = iov_iter_count(iter); |
936 | loff_t pos = iocb->ki_pos; | 937 | loff_t pos = iocb->ki_pos; |
@@ -944,11 +945,6 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, | |||
944 | (write ? "write" : "read"), file, pos, (unsigned)count, | 945 | (write ? "write" : "read"), file, pos, (unsigned)count, |
945 | snapc, snapc ? snapc->seq : 0); | 946 | snapc, snapc ? snapc->seq : 0); |
946 | 947 | ||
947 | ret = filemap_write_and_wait_range(inode->i_mapping, | ||
948 | pos, pos + count - 1); | ||
949 | if (ret < 0) | ||
950 | return ret; | ||
951 | |||
952 | if (write) { | 948 | if (write) { |
953 | int ret2 = invalidate_inode_pages2_range(inode->i_mapping, | 949 | int ret2 = invalidate_inode_pages2_range(inode->i_mapping, |
954 | pos >> PAGE_SHIFT, | 950 | pos >> PAGE_SHIFT, |
@@ -1284,12 +1280,16 @@ again: | |||
1284 | 1280 | ||
1285 | if (ci->i_inline_version == CEPH_INLINE_NONE) { | 1281 | if (ci->i_inline_version == CEPH_INLINE_NONE) { |
1286 | if (!retry_op && (iocb->ki_flags & IOCB_DIRECT)) { | 1282 | if (!retry_op && (iocb->ki_flags & IOCB_DIRECT)) { |
1283 | ceph_start_io_direct(inode); | ||
1287 | ret = ceph_direct_read_write(iocb, to, | 1284 | ret = ceph_direct_read_write(iocb, to, |
1288 | NULL, NULL); | 1285 | NULL, NULL); |
1286 | ceph_end_io_direct(inode); | ||
1289 | if (ret >= 0 && ret < len) | 1287 | if (ret >= 0 && ret < len) |
1290 | retry_op = CHECK_EOF; | 1288 | retry_op = CHECK_EOF; |
1291 | } else { | 1289 | } else { |
1290 | ceph_start_io_read(inode); | ||
1292 | ret = ceph_sync_read(iocb, to, &retry_op); | 1291 | ret = ceph_sync_read(iocb, to, &retry_op); |
1292 | ceph_end_io_read(inode); | ||
1293 | } | 1293 | } |
1294 | } else { | 1294 | } else { |
1295 | retry_op = READ_INLINE; | 1295 | retry_op = READ_INLINE; |
@@ -1300,7 +1300,9 @@ again: | |||
1300 | inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, | 1300 | inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, |
1301 | ceph_cap_string(got)); | 1301 | ceph_cap_string(got)); |
1302 | ceph_add_rw_context(fi, &rw_ctx); | 1302 | ceph_add_rw_context(fi, &rw_ctx); |
1303 | ceph_start_io_read(inode); | ||
1303 | ret = generic_file_read_iter(iocb, to); | 1304 | ret = generic_file_read_iter(iocb, to); |
1305 | ceph_end_io_read(inode); | ||
1304 | ceph_del_rw_context(fi, &rw_ctx); | 1306 | ceph_del_rw_context(fi, &rw_ctx); |
1305 | } | 1307 | } |
1306 | dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", | 1308 | dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", |
@@ -1409,7 +1411,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) | |||
1409 | return -ENOMEM; | 1411 | return -ENOMEM; |
1410 | 1412 | ||
1411 | retry_snap: | 1413 | retry_snap: |
1412 | inode_lock(inode); | 1414 | if (iocb->ki_flags & IOCB_DIRECT) |
1415 | ceph_start_io_direct(inode); | ||
1416 | else | ||
1417 | ceph_start_io_write(inode); | ||
1413 | 1418 | ||
1414 | /* We can write back this queue in page reclaim */ | 1419 | /* We can write back this queue in page reclaim */ |
1415 | current->backing_dev_info = inode_to_bdi(inode); | 1420 | current->backing_dev_info = inode_to_bdi(inode); |
@@ -1480,7 +1485,6 @@ retry_snap: | |||
1480 | (ci->i_ceph_flags & CEPH_I_ERROR_WRITE)) { | 1485 | (ci->i_ceph_flags & CEPH_I_ERROR_WRITE)) { |
1481 | struct ceph_snap_context *snapc; | 1486 | struct ceph_snap_context *snapc; |
1482 | struct iov_iter data; | 1487 | struct iov_iter data; |
1483 | inode_unlock(inode); | ||
1484 | 1488 | ||
1485 | spin_lock(&ci->i_ceph_lock); | 1489 | spin_lock(&ci->i_ceph_lock); |
1486 | if (__ceph_have_pending_cap_snap(ci)) { | 1490 | if (__ceph_have_pending_cap_snap(ci)) { |
@@ -1497,11 +1501,14 @@ retry_snap: | |||
1497 | 1501 | ||
1498 | /* we might need to revert back to that point */ | 1502 | /* we might need to revert back to that point */ |
1499 | data = *from; | 1503 | data = *from; |
1500 | if (iocb->ki_flags & IOCB_DIRECT) | 1504 | if (iocb->ki_flags & IOCB_DIRECT) { |
1501 | written = ceph_direct_read_write(iocb, &data, snapc, | 1505 | written = ceph_direct_read_write(iocb, &data, snapc, |
1502 | &prealloc_cf); | 1506 | &prealloc_cf); |
1503 | else | 1507 | ceph_end_io_direct(inode); |
1508 | } else { | ||
1504 | written = ceph_sync_write(iocb, &data, pos, snapc); | 1509 | written = ceph_sync_write(iocb, &data, pos, snapc); |
1510 | ceph_end_io_write(inode); | ||
1511 | } | ||
1505 | if (written > 0) | 1512 | if (written > 0) |
1506 | iov_iter_advance(from, written); | 1513 | iov_iter_advance(from, written); |
1507 | ceph_put_snap_context(snapc); | 1514 | ceph_put_snap_context(snapc); |
@@ -1516,7 +1523,7 @@ retry_snap: | |||
1516 | written = generic_perform_write(file, from, pos); | 1523 | written = generic_perform_write(file, from, pos); |
1517 | if (likely(written >= 0)) | 1524 | if (likely(written >= 0)) |
1518 | iocb->ki_pos = pos + written; | 1525 | iocb->ki_pos = pos + written; |
1519 | inode_unlock(inode); | 1526 | ceph_end_io_write(inode); |
1520 | } | 1527 | } |
1521 | 1528 | ||
1522 | if (written >= 0) { | 1529 | if (written >= 0) { |
@@ -1551,9 +1558,11 @@ retry_snap: | |||
1551 | } | 1558 | } |
1552 | 1559 | ||
1553 | goto out_unlocked; | 1560 | goto out_unlocked; |
1554 | |||
1555 | out: | 1561 | out: |
1556 | inode_unlock(inode); | 1562 | if (iocb->ki_flags & IOCB_DIRECT) |
1563 | ceph_end_io_direct(inode); | ||
1564 | else | ||
1565 | ceph_end_io_write(inode); | ||
1557 | out_unlocked: | 1566 | out_unlocked: |
1558 | ceph_free_cap_flush(prealloc_cf); | 1567 | ceph_free_cap_flush(prealloc_cf); |
1559 | current->backing_dev_info = NULL; | 1568 | current->backing_dev_info = NULL; |
diff --git a/fs/ceph/io.c b/fs/ceph/io.c new file mode 100644 index 000000000000..97602ea92ff4 --- /dev/null +++ b/fs/ceph/io.c | |||
@@ -0,0 +1,163 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* | ||
3 | * Copyright (c) 2016 Trond Myklebust | ||
4 | * Copyright (c) 2019 Jeff Layton | ||
5 | * | ||
6 | * I/O and data path helper functionality. | ||
7 | * | ||
8 | * Heavily borrowed from equivalent code in fs/nfs/io.c | ||
9 | */ | ||
10 | |||
11 | #include <linux/ceph/ceph_debug.h> | ||
12 | |||
13 | #include <linux/types.h> | ||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/rwsem.h> | ||
16 | #include <linux/fs.h> | ||
17 | |||
18 | #include "super.h" | ||
19 | #include "io.h" | ||
20 | |||
21 | /* Call with exclusively locked inode->i_rwsem */ | ||
22 | static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode) | ||
23 | { | ||
24 | lockdep_assert_held_write(&inode->i_rwsem); | ||
25 | |||
26 | if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT) { | ||
27 | spin_lock(&ci->i_ceph_lock); | ||
28 | ci->i_ceph_flags &= ~CEPH_I_ODIRECT; | ||
29 | spin_unlock(&ci->i_ceph_lock); | ||
30 | inode_dio_wait(inode); | ||
31 | } | ||
32 | } | ||
33 | |||
34 | /** | ||
35 | * ceph_start_io_read - declare the file is being used for buffered reads | ||
36 | * @inode: file inode | ||
37 | * | ||
38 | * Declare that a buffered read operation is about to start, and ensure | ||
39 | * that we block all direct I/O. | ||
40 | * On exit, the function ensures that the CEPH_I_ODIRECT flag is unset, | ||
41 | * and holds a shared lock on inode->i_rwsem to ensure that the flag | ||
42 | * cannot be changed. | ||
43 | * In practice, this means that buffered read operations are allowed to | ||
44 | * execute in parallel, thanks to the shared lock, whereas direct I/O | ||
45 | * operations need to wait to grab an exclusive lock in order to set | ||
46 | * CEPH_I_ODIRECT. | ||
47 | * Note that buffered writes and truncates both take a write lock on | ||
48 | * inode->i_rwsem, meaning that those are serialised w.r.t. the reads. | ||
49 | */ | ||
50 | void | ||
51 | ceph_start_io_read(struct inode *inode) | ||
52 | { | ||
53 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
54 | |||
55 | /* Be an optimist! */ | ||
56 | down_read(&inode->i_rwsem); | ||
57 | if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)) | ||
58 | return; | ||
59 | up_read(&inode->i_rwsem); | ||
60 | /* Slow path.... */ | ||
61 | down_write(&inode->i_rwsem); | ||
62 | ceph_block_o_direct(ci, inode); | ||
63 | downgrade_write(&inode->i_rwsem); | ||
64 | } | ||
65 | |||
66 | /** | ||
67 | * ceph_end_io_read - declare that the buffered read operation is done | ||
68 | * @inode: file inode | ||
69 | * | ||
70 | * Declare that a buffered read operation is done, and release the shared | ||
71 | * lock on inode->i_rwsem. | ||
72 | */ | ||
73 | void | ||
74 | ceph_end_io_read(struct inode *inode) | ||
75 | { | ||
76 | up_read(&inode->i_rwsem); | ||
77 | } | ||
78 | |||
79 | /** | ||
80 | * ceph_start_io_write - declare the file is being used for buffered writes | ||
81 | * @inode: file inode | ||
82 | * | ||
83 | * Declare that a buffered write operation is about to start, and ensure | ||
84 | * that we block all direct I/O. | ||
85 | */ | ||
86 | void | ||
87 | ceph_start_io_write(struct inode *inode) | ||
88 | { | ||
89 | down_write(&inode->i_rwsem); | ||
90 | ceph_block_o_direct(ceph_inode(inode), inode); | ||
91 | } | ||
92 | |||
93 | /** | ||
94 | * ceph_end_io_write - declare that the buffered write operation is done | ||
95 | * @inode: file inode | ||
96 | * | ||
97 | * Declare that a buffered write operation is done, and release the | ||
98 | * lock on inode->i_rwsem. | ||
99 | */ | ||
100 | void | ||
101 | ceph_end_io_write(struct inode *inode) | ||
102 | { | ||
103 | up_write(&inode->i_rwsem); | ||
104 | } | ||
105 | |||
106 | /* Call with exclusively locked inode->i_rwsem */ | ||
107 | static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode) | ||
108 | { | ||
109 | lockdep_assert_held_write(&inode->i_rwsem); | ||
110 | |||
111 | if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)) { | ||
112 | spin_lock(&ci->i_ceph_lock); | ||
113 | ci->i_ceph_flags |= CEPH_I_ODIRECT; | ||
114 | spin_unlock(&ci->i_ceph_lock); | ||
115 | /* FIXME: unmap_mapping_range? */ | ||
116 | filemap_write_and_wait(inode->i_mapping); | ||
117 | } | ||
118 | } | ||
119 | |||
120 | /** | ||
121 | * ceph_end_io_direct - declare the file is being used for direct i/o | ||
122 | * @inode: file inode | ||
123 | * | ||
124 | * Declare that a direct I/O operation is about to start, and ensure | ||
125 | * that we block all buffered I/O. | ||
126 | * On exit, the function ensures that the CEPH_I_ODIRECT flag is set, | ||
127 | * and holds a shared lock on inode->i_rwsem to ensure that the flag | ||
128 | * cannot be changed. | ||
129 | * In practice, this means that direct I/O operations are allowed to | ||
130 | * execute in parallel, thanks to the shared lock, whereas buffered I/O | ||
131 | * operations need to wait to grab an exclusive lock in order to clear | ||
132 | * CEPH_I_ODIRECT. | ||
133 | * Note that buffered writes and truncates both take a write lock on | ||
134 | * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT. | ||
135 | */ | ||
136 | void | ||
137 | ceph_start_io_direct(struct inode *inode) | ||
138 | { | ||
139 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
140 | |||
141 | /* Be an optimist! */ | ||
142 | down_read(&inode->i_rwsem); | ||
143 | if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT) | ||
144 | return; | ||
145 | up_read(&inode->i_rwsem); | ||
146 | /* Slow path.... */ | ||
147 | down_write(&inode->i_rwsem); | ||
148 | ceph_block_buffered(ci, inode); | ||
149 | downgrade_write(&inode->i_rwsem); | ||
150 | } | ||
151 | |||
152 | /** | ||
153 | * ceph_end_io_direct - declare that the direct i/o operation is done | ||
154 | * @inode: file inode | ||
155 | * | ||
156 | * Declare that a direct I/O operation is done, and release the shared | ||
157 | * lock on inode->i_rwsem. | ||
158 | */ | ||
159 | void | ||
160 | ceph_end_io_direct(struct inode *inode) | ||
161 | { | ||
162 | up_read(&inode->i_rwsem); | ||
163 | } | ||
diff --git a/fs/ceph/io.h b/fs/ceph/io.h new file mode 100644 index 000000000000..fa594cd77348 --- /dev/null +++ b/fs/ceph/io.h | |||
@@ -0,0 +1,12 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | #ifndef _FS_CEPH_IO_H | ||
3 | #define _FS_CEPH_IO_H | ||
4 | |||
5 | void ceph_start_io_read(struct inode *inode); | ||
6 | void ceph_end_io_read(struct inode *inode); | ||
7 | void ceph_start_io_write(struct inode *inode); | ||
8 | void ceph_end_io_write(struct inode *inode); | ||
9 | void ceph_start_io_direct(struct inode *inode); | ||
10 | void ceph_end_io_direct(struct inode *inode); | ||
11 | |||
12 | #endif /* FS_CEPH_IO_H */ | ||
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 03e4828c7635..98d7190289c8 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -513,10 +513,10 @@ static inline struct inode *ceph_find_inode(struct super_block *sb, | |||
513 | #define CEPH_I_SEC_INITED (1 << 6) /* security initialized */ | 513 | #define CEPH_I_SEC_INITED (1 << 6) /* security initialized */ |
514 | #define CEPH_I_CAP_DROPPED (1 << 7) /* caps were forcibly dropped */ | 514 | #define CEPH_I_CAP_DROPPED (1 << 7) /* caps were forcibly dropped */ |
515 | #define CEPH_I_KICK_FLUSH (1 << 8) /* kick flushing caps */ | 515 | #define CEPH_I_KICK_FLUSH (1 << 8) /* kick flushing caps */ |
516 | #define CEPH_I_FLUSH_SNAPS (1 << 9) /* need flush snapss */ | 516 | #define CEPH_I_FLUSH_SNAPS (1 << 9) /* need flush snapss */ |
517 | #define CEPH_I_ERROR_WRITE (1 << 10) /* have seen write errors */ | 517 | #define CEPH_I_ERROR_WRITE (1 << 10) /* have seen write errors */ |
518 | #define CEPH_I_ERROR_FILELOCK (1 << 11) /* have seen file lock errors */ | 518 | #define CEPH_I_ERROR_FILELOCK (1 << 11) /* have seen file lock errors */ |
519 | 519 | #define CEPH_I_ODIRECT (1 << 12) /* inode in direct I/O mode */ | |
520 | 520 | ||
521 | /* | 521 | /* |
522 | * Masks of ceph inode work. | 522 | * Masks of ceph inode work. |