aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph/file.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-12-17 19:03:12 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-12-17 19:03:12 -0500
commit57666509b70030a9483d13222bfec8eec5db07df (patch)
tree1e0021c2aabc2ce8832e8c816e2aa94b0b77a323 /fs/ceph/file.c
parent87c31b39abcb6fb6bd7d111200c9627a594bf6a9 (diff)
parent0aeff37abada9f8c08d2b10481a43d3ae406c823 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull ceph updates from Sage Weil: "The big item here is support for inline data for CephFS and for message signatures from Zheng. There are also several bug fixes, including interrupted flock request handling, 0-length xattrs, mksnap, cached readdir results, and a message version compat field. Finally there are several cleanups from Ilya, Dan, and Markus. Note that there is another series coming soon that fixes some bugs in the RBD 'lingering' requests, but it isn't quite ready yet" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (27 commits) ceph: fix setting empty extended attribute ceph: fix mksnap crash ceph: do_sync is never initialized libceph: fixup includes in pagelist.h ceph: support inline data feature ceph: flush inline version ceph: convert inline data to normal data before data write ceph: sync read inline data ceph: fetch inline data when getting Fcr cap refs ceph: use getattr request to fetch inline data ceph: add inline data to pagecache ceph: parse inline data in MClientReply and MClientCaps libceph: specify position of extent operation libceph: add CREATE osd operation support libceph: add SETXATTR/CMPXATTR osd operations support rbd: don't treat CEPH_OSD_OP_DELETE as extent op ceph: remove unused stringification macros libceph: require cephx message signature by default ceph: introduce global empty snap context ceph: message versioning fixes ...
Diffstat (limited to 'fs/ceph/file.c')
-rw-r--r--fs/ceph/file.c97
1 files changed, 83 insertions, 14 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 9f8e3572040e..ce74b394b49d 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -333,6 +333,11 @@ int ceph_release(struct inode *inode, struct file *file)
333 return 0; 333 return 0;
334} 334}
335 335
336enum {
337 CHECK_EOF = 1,
338 READ_INLINE = 2,
339};
340
336/* 341/*
337 * Read a range of bytes striped over one or more objects. Iterate over 342 * Read a range of bytes striped over one or more objects. Iterate over
338 * objects we stripe over. (That's not atomic, but good enough for now.) 343 * objects we stripe over. (That's not atomic, but good enough for now.)
@@ -412,7 +417,7 @@ more:
412 ret = read; 417 ret = read;
413 /* did we bounce off eof? */ 418 /* did we bounce off eof? */
414 if (pos + left > inode->i_size) 419 if (pos + left > inode->i_size)
415 *checkeof = 1; 420 *checkeof = CHECK_EOF;
416 } 421 }
417 422
418 dout("striped_read returns %d\n", ret); 423 dout("striped_read returns %d\n", ret);
@@ -598,7 +603,7 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
598 snapc = ci->i_snap_realm->cached_context; 603 snapc = ci->i_snap_realm->cached_context;
599 vino = ceph_vino(inode); 604 vino = ceph_vino(inode);
600 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, 605 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
601 vino, pos, &len, 606 vino, pos, &len, 0,
602 2,/*include a 'startsync' command*/ 607 2,/*include a 'startsync' command*/
603 CEPH_OSD_OP_WRITE, flags, snapc, 608 CEPH_OSD_OP_WRITE, flags, snapc,
604 ci->i_truncate_seq, 609 ci->i_truncate_seq,
@@ -609,6 +614,8 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
609 break; 614 break;
610 } 615 }
611 616
617 osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
618
612 n = iov_iter_get_pages_alloc(from, &pages, len, &start); 619 n = iov_iter_get_pages_alloc(from, &pages, len, &start);
613 if (unlikely(n < 0)) { 620 if (unlikely(n < 0)) {
614 ret = n; 621 ret = n;
@@ -713,7 +720,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
713 snapc = ci->i_snap_realm->cached_context; 720 snapc = ci->i_snap_realm->cached_context;
714 vino = ceph_vino(inode); 721 vino = ceph_vino(inode);
715 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, 722 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
716 vino, pos, &len, 1, 723 vino, pos, &len, 0, 1,
717 CEPH_OSD_OP_WRITE, flags, snapc, 724 CEPH_OSD_OP_WRITE, flags, snapc,
718 ci->i_truncate_seq, 725 ci->i_truncate_seq,
719 ci->i_truncate_size, 726 ci->i_truncate_size,
@@ -803,9 +810,10 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
803 size_t len = iocb->ki_nbytes; 810 size_t len = iocb->ki_nbytes;
804 struct inode *inode = file_inode(filp); 811 struct inode *inode = file_inode(filp);
805 struct ceph_inode_info *ci = ceph_inode(inode); 812 struct ceph_inode_info *ci = ceph_inode(inode);
813 struct page *pinned_page = NULL;
806 ssize_t ret; 814 ssize_t ret;
807 int want, got = 0; 815 int want, got = 0;
808 int checkeof = 0, read = 0; 816 int retry_op = 0, read = 0;
809 817
810again: 818again:
811 dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", 819 dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
@@ -815,7 +823,7 @@ again:
815 want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; 823 want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
816 else 824 else
817 want = CEPH_CAP_FILE_CACHE; 825 want = CEPH_CAP_FILE_CACHE;
818 ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1); 826 ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page);
819 if (ret < 0) 827 if (ret < 0)
820 return ret; 828 return ret;
821 829
@@ -827,8 +835,12 @@ again:
827 inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, 835 inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
828 ceph_cap_string(got)); 836 ceph_cap_string(got));
829 837
830 /* hmm, this isn't really async... */ 838 if (ci->i_inline_version == CEPH_INLINE_NONE) {
831 ret = ceph_sync_read(iocb, to, &checkeof); 839 /* hmm, this isn't really async... */
840 ret = ceph_sync_read(iocb, to, &retry_op);
841 } else {
842 retry_op = READ_INLINE;
843 }
832 } else { 844 } else {
833 dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", 845 dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
834 inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, 846 inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
@@ -838,13 +850,55 @@ again:
838 } 850 }
839 dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", 851 dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
840 inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); 852 inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
853 if (pinned_page) {
854 page_cache_release(pinned_page);
855 pinned_page = NULL;
856 }
841 ceph_put_cap_refs(ci, got); 857 ceph_put_cap_refs(ci, got);
858 if (retry_op && ret >= 0) {
859 int statret;
860 struct page *page = NULL;
861 loff_t i_size;
862 if (retry_op == READ_INLINE) {
863 page = __page_cache_alloc(GFP_NOFS);
864 if (!page)
865 return -ENOMEM;
866 }
842 867
843 if (checkeof && ret >= 0) { 868 statret = __ceph_do_getattr(inode, page,
844 int statret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false); 869 CEPH_STAT_CAP_INLINE_DATA, !!page);
870 if (statret < 0) {
871 __free_page(page);
872 if (statret == -ENODATA) {
873 BUG_ON(retry_op != READ_INLINE);
874 goto again;
875 }
876 return statret;
877 }
878
879 i_size = i_size_read(inode);
880 if (retry_op == READ_INLINE) {
881 /* does not support inline data > PAGE_SIZE */
882 if (i_size > PAGE_CACHE_SIZE) {
883 ret = -EIO;
884 } else if (iocb->ki_pos < i_size) {
885 loff_t end = min_t(loff_t, i_size,
886 iocb->ki_pos + len);
887 if (statret < end)
888 zero_user_segment(page, statret, end);
889 ret = copy_page_to_iter(page,
890 iocb->ki_pos & ~PAGE_MASK,
891 end - iocb->ki_pos, to);
892 iocb->ki_pos += ret;
893 } else {
894 ret = 0;
895 }
896 __free_pages(page, 0);
897 return ret;
898 }
845 899
846 /* hit EOF or hole? */ 900 /* hit EOF or hole? */
847 if (statret == 0 && iocb->ki_pos < inode->i_size && 901 if (retry_op == CHECK_EOF && iocb->ki_pos < i_size &&
848 ret < len) { 902 ret < len) {
849 dout("sync_read hit hole, ppos %lld < size %lld" 903 dout("sync_read hit hole, ppos %lld < size %lld"
850 ", reading more\n", iocb->ki_pos, 904 ", reading more\n", iocb->ki_pos,
@@ -852,7 +906,7 @@ again:
852 906
853 read += ret; 907 read += ret;
854 len -= ret; 908 len -= ret;
855 checkeof = 0; 909 retry_op = 0;
856 goto again; 910 goto again;
857 } 911 }
858 } 912 }
@@ -909,6 +963,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
909 if (err) 963 if (err)
910 goto out; 964 goto out;
911 965
966 if (ci->i_inline_version != CEPH_INLINE_NONE) {
967 err = ceph_uninline_data(file, NULL);
968 if (err < 0)
969 goto out;
970 }
971
912retry_snap: 972retry_snap:
913 if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) { 973 if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) {
914 err = -ENOSPC; 974 err = -ENOSPC;
@@ -922,7 +982,8 @@ retry_snap:
922 else 982 else
923 want = CEPH_CAP_FILE_BUFFER; 983 want = CEPH_CAP_FILE_BUFFER;
924 got = 0; 984 got = 0;
925 err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, pos + count); 985 err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, pos + count,
986 &got, NULL);
926 if (err < 0) 987 if (err < 0)
927 goto out; 988 goto out;
928 989
@@ -969,6 +1030,7 @@ retry_snap:
969 if (written >= 0) { 1030 if (written >= 0) {
970 int dirty; 1031 int dirty;
971 spin_lock(&ci->i_ceph_lock); 1032 spin_lock(&ci->i_ceph_lock);
1033 ci->i_inline_version = CEPH_INLINE_NONE;
972 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); 1034 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
973 spin_unlock(&ci->i_ceph_lock); 1035 spin_unlock(&ci->i_ceph_lock);
974 if (dirty) 1036 if (dirty)
@@ -1111,7 +1173,7 @@ static int ceph_zero_partial_object(struct inode *inode,
1111 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, 1173 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
1112 ceph_vino(inode), 1174 ceph_vino(inode),
1113 offset, length, 1175 offset, length,
1114 1, op, 1176 0, 1, op,
1115 CEPH_OSD_FLAG_WRITE | 1177 CEPH_OSD_FLAG_WRITE |
1116 CEPH_OSD_FLAG_ONDISK, 1178 CEPH_OSD_FLAG_ONDISK,
1117 NULL, 0, 0, false); 1179 NULL, 0, 0, false);
@@ -1214,6 +1276,12 @@ static long ceph_fallocate(struct file *file, int mode,
1214 goto unlock; 1276 goto unlock;
1215 } 1277 }
1216 1278
1279 if (ci->i_inline_version != CEPH_INLINE_NONE) {
1280 ret = ceph_uninline_data(file, NULL);
1281 if (ret < 0)
1282 goto unlock;
1283 }
1284
1217 size = i_size_read(inode); 1285 size = i_size_read(inode);
1218 if (!(mode & FALLOC_FL_KEEP_SIZE)) 1286 if (!(mode & FALLOC_FL_KEEP_SIZE))
1219 endoff = offset + length; 1287 endoff = offset + length;
@@ -1223,7 +1291,7 @@ static long ceph_fallocate(struct file *file, int mode,
1223 else 1291 else
1224 want = CEPH_CAP_FILE_BUFFER; 1292 want = CEPH_CAP_FILE_BUFFER;
1225 1293
1226 ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); 1294 ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, endoff, &got, NULL);
1227 if (ret < 0) 1295 if (ret < 0)
1228 goto unlock; 1296 goto unlock;
1229 1297
@@ -1240,6 +1308,7 @@ static long ceph_fallocate(struct file *file, int mode,
1240 1308
1241 if (!ret) { 1309 if (!ret) {
1242 spin_lock(&ci->i_ceph_lock); 1310 spin_lock(&ci->i_ceph_lock);
1311 ci->i_inline_version = CEPH_INLINE_NONE;
1243 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); 1312 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
1244 spin_unlock(&ci->i_ceph_lock); 1313 spin_unlock(&ci->i_ceph_lock);
1245 if (dirty) 1314 if (dirty)