diff options
author | Li Wang <liwang@ubuntukylin.com> | 2013-08-14 23:51:44 -0400 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-08-15 14:12:17 -0400 |
commit | ad7a60de882aca31afb58721db166f7e77afcd92 (patch) | |
tree | 61a21b2249d77d575982786a04fbaca923361fc8 | |
parent | 3871cbb9a41b1371dc13fc619e3ab4e0a1e29b4a (diff) |
ceph: punch hole support
This patch implements fallocate and punch hole support for Ceph kernel client.
Signed-off-by: Li Wang <liwang@ubuntukylin.com>
Signed-off-by: Yunchuan Wen <yunchuanwen@ubuntukylin.com>
-rw-r--r-- | fs/ceph/file.c | 196 | ||||
-rw-r--r-- | net/ceph/osd_client.c | 11 |
2 files changed, 205 insertions, 2 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index abc0e0759bdc..68af489c2abd 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/namei.h> | 8 | #include <linux/namei.h> |
9 | #include <linux/writeback.h> | 9 | #include <linux/writeback.h> |
10 | #include <linux/aio.h> | 10 | #include <linux/aio.h> |
11 | #include <linux/falloc.h> | ||
11 | 12 | ||
12 | #include "super.h" | 13 | #include "super.h" |
13 | #include "mds_client.h" | 14 | #include "mds_client.h" |
@@ -874,6 +875,200 @@ out: | |||
874 | return offset; | 875 | return offset; |
875 | } | 876 | } |
876 | 877 | ||
878 | static inline void ceph_zero_partial_page( | ||
879 | struct inode *inode, loff_t offset, unsigned size) | ||
880 | { | ||
881 | struct page *page; | ||
882 | pgoff_t index = offset >> PAGE_CACHE_SHIFT; | ||
883 | |||
884 | page = find_lock_page(inode->i_mapping, index); | ||
885 | if (page) { | ||
886 | wait_on_page_writeback(page); | ||
887 | zero_user(page, offset & (PAGE_CACHE_SIZE - 1), size); | ||
888 | unlock_page(page); | ||
889 | page_cache_release(page); | ||
890 | } | ||
891 | } | ||
892 | |||
893 | static void ceph_zero_pagecache_range(struct inode *inode, loff_t offset, | ||
894 | loff_t length) | ||
895 | { | ||
896 | loff_t nearly = round_up(offset, PAGE_CACHE_SIZE); | ||
897 | if (offset < nearly) { | ||
898 | loff_t size = nearly - offset; | ||
899 | if (length < size) | ||
900 | size = length; | ||
901 | ceph_zero_partial_page(inode, offset, size); | ||
902 | offset += size; | ||
903 | length -= size; | ||
904 | } | ||
905 | if (length >= PAGE_CACHE_SIZE) { | ||
906 | loff_t size = round_down(length, PAGE_CACHE_SIZE); | ||
907 | truncate_pagecache_range(inode, offset, offset + size - 1); | ||
908 | offset += size; | ||
909 | length -= size; | ||
910 | } | ||
911 | if (length) | ||
912 | ceph_zero_partial_page(inode, offset, length); | ||
913 | } | ||
914 | |||
915 | static int ceph_zero_partial_object(struct inode *inode, | ||
916 | loff_t offset, loff_t *length) | ||
917 | { | ||
918 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
919 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); | ||
920 | struct ceph_osd_request *req; | ||
921 | int ret = 0; | ||
922 | loff_t zero = 0; | ||
923 | int op; | ||
924 | |||
925 | if (!length) { | ||
926 | op = offset ? CEPH_OSD_OP_DELETE : CEPH_OSD_OP_TRUNCATE; | ||
927 | length = &zero; | ||
928 | } else { | ||
929 | op = CEPH_OSD_OP_ZERO; | ||
930 | } | ||
931 | |||
932 | req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, | ||
933 | ceph_vino(inode), | ||
934 | offset, length, | ||
935 | 1, op, | ||
936 | CEPH_OSD_FLAG_WRITE | | ||
937 | CEPH_OSD_FLAG_ONDISK, | ||
938 | NULL, 0, 0, false); | ||
939 | if (IS_ERR(req)) { | ||
940 | ret = PTR_ERR(req); | ||
941 | goto out; | ||
942 | } | ||
943 | |||
944 | ceph_osdc_build_request(req, offset, NULL, ceph_vino(inode).snap, | ||
945 | &inode->i_mtime); | ||
946 | |||
947 | ret = ceph_osdc_start_request(&fsc->client->osdc, req, false); | ||
948 | if (!ret) { | ||
949 | ret = ceph_osdc_wait_request(&fsc->client->osdc, req); | ||
950 | if (ret == -ENOENT) | ||
951 | ret = 0; | ||
952 | } | ||
953 | ceph_osdc_put_request(req); | ||
954 | |||
955 | out: | ||
956 | return ret; | ||
957 | } | ||
958 | |||
959 | static int ceph_zero_objects(struct inode *inode, loff_t offset, loff_t length) | ||
960 | { | ||
961 | int ret = 0; | ||
962 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
963 | __s32 stripe_unit = ceph_file_layout_su(ci->i_layout); | ||
964 | __s32 stripe_count = ceph_file_layout_stripe_count(ci->i_layout); | ||
965 | __s32 object_size = ceph_file_layout_object_size(ci->i_layout); | ||
966 | loff_t object_set_size = (loff_t)object_size * stripe_count; | ||
967 | |||
968 | loff_t nearly = (offset + object_set_size - 1) | ||
969 | / object_set_size * object_set_size; | ||
970 | while (length && offset < nearly) { | ||
971 | loff_t size = length; | ||
972 | ret = ceph_zero_partial_object(inode, offset, &size); | ||
973 | if (ret < 0) | ||
974 | return ret; | ||
975 | offset += size; | ||
976 | length -= size; | ||
977 | } | ||
978 | while (length >= object_set_size) { | ||
979 | int i; | ||
980 | loff_t pos = offset; | ||
981 | for (i = 0; i < stripe_count; ++i) { | ||
982 | ret = ceph_zero_partial_object(inode, pos, NULL); | ||
983 | if (ret < 0) | ||
984 | return ret; | ||
985 | pos += stripe_unit; | ||
986 | } | ||
987 | offset += object_set_size; | ||
988 | length -= object_set_size; | ||
989 | } | ||
990 | while (length) { | ||
991 | loff_t size = length; | ||
992 | ret = ceph_zero_partial_object(inode, offset, &size); | ||
993 | if (ret < 0) | ||
994 | return ret; | ||
995 | offset += size; | ||
996 | length -= size; | ||
997 | } | ||
998 | return ret; | ||
999 | } | ||
1000 | |||
1001 | static long ceph_fallocate(struct file *file, int mode, | ||
1002 | loff_t offset, loff_t length) | ||
1003 | { | ||
1004 | struct ceph_file_info *fi = file->private_data; | ||
1005 | struct inode *inode = file->f_dentry->d_inode; | ||
1006 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
1007 | struct ceph_osd_client *osdc = | ||
1008 | &ceph_inode_to_client(inode)->client->osdc; | ||
1009 | int want, got = 0; | ||
1010 | int dirty; | ||
1011 | int ret = 0; | ||
1012 | loff_t endoff = 0; | ||
1013 | loff_t size; | ||
1014 | |||
1015 | if (!S_ISREG(inode->i_mode)) | ||
1016 | return -EOPNOTSUPP; | ||
1017 | |||
1018 | if (IS_SWAPFILE(inode)) | ||
1019 | return -ETXTBSY; | ||
1020 | |||
1021 | mutex_lock(&inode->i_mutex); | ||
1022 | |||
1023 | if (ceph_snap(inode) != CEPH_NOSNAP) { | ||
1024 | ret = -EROFS; | ||
1025 | goto unlock; | ||
1026 | } | ||
1027 | |||
1028 | if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) && | ||
1029 | !(mode & FALLOC_FL_PUNCH_HOLE)) { | ||
1030 | ret = -ENOSPC; | ||
1031 | goto unlock; | ||
1032 | } | ||
1033 | |||
1034 | size = i_size_read(inode); | ||
1035 | if (!(mode & FALLOC_FL_KEEP_SIZE)) | ||
1036 | endoff = offset + length; | ||
1037 | |||
1038 | if (fi->fmode & CEPH_FILE_MODE_LAZY) | ||
1039 | want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO; | ||
1040 | else | ||
1041 | want = CEPH_CAP_FILE_BUFFER; | ||
1042 | |||
1043 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); | ||
1044 | if (ret < 0) | ||
1045 | goto unlock; | ||
1046 | |||
1047 | if (mode & FALLOC_FL_PUNCH_HOLE) { | ||
1048 | if (offset < size) | ||
1049 | ceph_zero_pagecache_range(inode, offset, length); | ||
1050 | ret = ceph_zero_objects(inode, offset, length); | ||
1051 | } else if (endoff > size) { | ||
1052 | truncate_pagecache_range(inode, size, -1); | ||
1053 | if (ceph_inode_set_size(inode, endoff)) | ||
1054 | ceph_check_caps(ceph_inode(inode), | ||
1055 | CHECK_CAPS_AUTHONLY, NULL); | ||
1056 | } | ||
1057 | |||
1058 | if (!ret) { | ||
1059 | spin_lock(&ci->i_ceph_lock); | ||
1060 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); | ||
1061 | spin_unlock(&ci->i_ceph_lock); | ||
1062 | if (dirty) | ||
1063 | __mark_inode_dirty(inode, dirty); | ||
1064 | } | ||
1065 | |||
1066 | ceph_put_cap_refs(ci, got); | ||
1067 | unlock: | ||
1068 | mutex_unlock(&inode->i_mutex); | ||
1069 | return ret; | ||
1070 | } | ||
1071 | |||
877 | const struct file_operations ceph_file_fops = { | 1072 | const struct file_operations ceph_file_fops = { |
878 | .open = ceph_open, | 1073 | .open = ceph_open, |
879 | .release = ceph_release, | 1074 | .release = ceph_release, |
@@ -890,5 +1085,6 @@ const struct file_operations ceph_file_fops = { | |||
890 | .splice_write = generic_file_splice_write, | 1085 | .splice_write = generic_file_splice_write, |
891 | .unlocked_ioctl = ceph_ioctl, | 1086 | .unlocked_ioctl = ceph_ioctl, |
892 | .compat_ioctl = ceph_ioctl, | 1087 | .compat_ioctl = ceph_ioctl, |
1088 | .fallocate = ceph_fallocate, | ||
893 | }; | 1089 | }; |
894 | 1090 | ||
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index dbc0a7392d67..8ec65bc11c71 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c | |||
@@ -503,7 +503,9 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req, | |||
503 | struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); | 503 | struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); |
504 | size_t payload_len = 0; | 504 | size_t payload_len = 0; |
505 | 505 | ||
506 | BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE); | 506 | BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE && |
507 | opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO && | ||
508 | opcode != CEPH_OSD_OP_TRUNCATE); | ||
507 | 509 | ||
508 | op->extent.offset = offset; | 510 | op->extent.offset = offset; |
509 | op->extent.length = length; | 511 | op->extent.length = length; |
@@ -631,6 +633,9 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req, | |||
631 | break; | 633 | break; |
632 | case CEPH_OSD_OP_READ: | 634 | case CEPH_OSD_OP_READ: |
633 | case CEPH_OSD_OP_WRITE: | 635 | case CEPH_OSD_OP_WRITE: |
636 | case CEPH_OSD_OP_ZERO: | ||
637 | case CEPH_OSD_OP_DELETE: | ||
638 | case CEPH_OSD_OP_TRUNCATE: | ||
634 | if (src->op == CEPH_OSD_OP_WRITE) | 639 | if (src->op == CEPH_OSD_OP_WRITE) |
635 | request_data_len = src->extent.length; | 640 | request_data_len = src->extent.length; |
636 | dst->extent.offset = cpu_to_le64(src->extent.offset); | 641 | dst->extent.offset = cpu_to_le64(src->extent.offset); |
@@ -715,7 +720,9 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
715 | u64 object_base; | 720 | u64 object_base; |
716 | int r; | 721 | int r; |
717 | 722 | ||
718 | BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE); | 723 | BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE && |
724 | opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO && | ||
725 | opcode != CEPH_OSD_OP_TRUNCATE); | ||
719 | 726 | ||
720 | req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool, | 727 | req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool, |
721 | GFP_NOFS); | 728 | GFP_NOFS); |