aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLi Wang <liwang@ubuntukylin.com>2013-08-14 23:51:44 -0400
committerSage Weil <sage@inktank.com>2013-08-15 14:12:17 -0400
commitad7a60de882aca31afb58721db166f7e77afcd92 (patch)
tree61a21b2249d77d575982786a04fbaca923361fc8
parent3871cbb9a41b1371dc13fc619e3ab4e0a1e29b4a (diff)
ceph: punch hole support
This patch implements fallocate and punch hole support for Ceph kernel client. Signed-off-by: Li Wang <liwang@ubuntukylin.com> Signed-off-by: Yunchuan Wen <yunchuanwen@ubuntukylin.com>
-rw-r--r--fs/ceph/file.c196
-rw-r--r--net/ceph/osd_client.c11
2 files changed, 205 insertions, 2 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index abc0e0759bdc..68af489c2abd 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -8,6 +8,7 @@
8#include <linux/namei.h> 8#include <linux/namei.h>
9#include <linux/writeback.h> 9#include <linux/writeback.h>
10#include <linux/aio.h> 10#include <linux/aio.h>
11#include <linux/falloc.h>
11 12
12#include "super.h" 13#include "super.h"
13#include "mds_client.h" 14#include "mds_client.h"
@@ -874,6 +875,200 @@ out:
874 return offset; 875 return offset;
875} 876}
876 877
878static inline void ceph_zero_partial_page(
879 struct inode *inode, loff_t offset, unsigned size)
880{
881 struct page *page;
882 pgoff_t index = offset >> PAGE_CACHE_SHIFT;
883
884 page = find_lock_page(inode->i_mapping, index);
885 if (page) {
886 wait_on_page_writeback(page);
887 zero_user(page, offset & (PAGE_CACHE_SIZE - 1), size);
888 unlock_page(page);
889 page_cache_release(page);
890 }
891}
892
893static void ceph_zero_pagecache_range(struct inode *inode, loff_t offset,
894 loff_t length)
895{
896 loff_t nearly = round_up(offset, PAGE_CACHE_SIZE);
897 if (offset < nearly) {
898 loff_t size = nearly - offset;
899 if (length < size)
900 size = length;
901 ceph_zero_partial_page(inode, offset, size);
902 offset += size;
903 length -= size;
904 }
905 if (length >= PAGE_CACHE_SIZE) {
906 loff_t size = round_down(length, PAGE_CACHE_SIZE);
907 truncate_pagecache_range(inode, offset, offset + size - 1);
908 offset += size;
909 length -= size;
910 }
911 if (length)
912 ceph_zero_partial_page(inode, offset, length);
913}
914
915static int ceph_zero_partial_object(struct inode *inode,
916 loff_t offset, loff_t *length)
917{
918 struct ceph_inode_info *ci = ceph_inode(inode);
919 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
920 struct ceph_osd_request *req;
921 int ret = 0;
922 loff_t zero = 0;
923 int op;
924
925 if (!length) {
926 op = offset ? CEPH_OSD_OP_DELETE : CEPH_OSD_OP_TRUNCATE;
927 length = &zero;
928 } else {
929 op = CEPH_OSD_OP_ZERO;
930 }
931
932 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
933 ceph_vino(inode),
934 offset, length,
935 1, op,
936 CEPH_OSD_FLAG_WRITE |
937 CEPH_OSD_FLAG_ONDISK,
938 NULL, 0, 0, false);
939 if (IS_ERR(req)) {
940 ret = PTR_ERR(req);
941 goto out;
942 }
943
944 ceph_osdc_build_request(req, offset, NULL, ceph_vino(inode).snap,
945 &inode->i_mtime);
946
947 ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
948 if (!ret) {
949 ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
950 if (ret == -ENOENT)
951 ret = 0;
952 }
953 ceph_osdc_put_request(req);
954
955out:
956 return ret;
957}
958
959static int ceph_zero_objects(struct inode *inode, loff_t offset, loff_t length)
960{
961 int ret = 0;
962 struct ceph_inode_info *ci = ceph_inode(inode);
963 __s32 stripe_unit = ceph_file_layout_su(ci->i_layout);
964 __s32 stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
965 __s32 object_size = ceph_file_layout_object_size(ci->i_layout);
966 loff_t object_set_size = (loff_t)object_size * stripe_count;
967
968 loff_t nearly = (offset + object_set_size - 1)
969 / object_set_size * object_set_size;
970 while (length && offset < nearly) {
971 loff_t size = length;
972 ret = ceph_zero_partial_object(inode, offset, &size);
973 if (ret < 0)
974 return ret;
975 offset += size;
976 length -= size;
977 }
978 while (length >= object_set_size) {
979 int i;
980 loff_t pos = offset;
981 for (i = 0; i < stripe_count; ++i) {
982 ret = ceph_zero_partial_object(inode, pos, NULL);
983 if (ret < 0)
984 return ret;
985 pos += stripe_unit;
986 }
987 offset += object_set_size;
988 length -= object_set_size;
989 }
990 while (length) {
991 loff_t size = length;
992 ret = ceph_zero_partial_object(inode, offset, &size);
993 if (ret < 0)
994 return ret;
995 offset += size;
996 length -= size;
997 }
998 return ret;
999}
1000
1001static long ceph_fallocate(struct file *file, int mode,
1002 loff_t offset, loff_t length)
1003{
1004 struct ceph_file_info *fi = file->private_data;
1005 struct inode *inode = file->f_dentry->d_inode;
1006 struct ceph_inode_info *ci = ceph_inode(inode);
1007 struct ceph_osd_client *osdc =
1008 &ceph_inode_to_client(inode)->client->osdc;
1009 int want, got = 0;
1010 int dirty;
1011 int ret = 0;
1012 loff_t endoff = 0;
1013 loff_t size;
1014
1015 if (!S_ISREG(inode->i_mode))
1016 return -EOPNOTSUPP;
1017
1018 if (IS_SWAPFILE(inode))
1019 return -ETXTBSY;
1020
1021 mutex_lock(&inode->i_mutex);
1022
1023 if (ceph_snap(inode) != CEPH_NOSNAP) {
1024 ret = -EROFS;
1025 goto unlock;
1026 }
1027
1028 if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) &&
1029 !(mode & FALLOC_FL_PUNCH_HOLE)) {
1030 ret = -ENOSPC;
1031 goto unlock;
1032 }
1033
1034 size = i_size_read(inode);
1035 if (!(mode & FALLOC_FL_KEEP_SIZE))
1036 endoff = offset + length;
1037
1038 if (fi->fmode & CEPH_FILE_MODE_LAZY)
1039 want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
1040 else
1041 want = CEPH_CAP_FILE_BUFFER;
1042
1043 ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff);
1044 if (ret < 0)
1045 goto unlock;
1046
1047 if (mode & FALLOC_FL_PUNCH_HOLE) {
1048 if (offset < size)
1049 ceph_zero_pagecache_range(inode, offset, length);
1050 ret = ceph_zero_objects(inode, offset, length);
1051 } else if (endoff > size) {
1052 truncate_pagecache_range(inode, size, -1);
1053 if (ceph_inode_set_size(inode, endoff))
1054 ceph_check_caps(ceph_inode(inode),
1055 CHECK_CAPS_AUTHONLY, NULL);
1056 }
1057
1058 if (!ret) {
1059 spin_lock(&ci->i_ceph_lock);
1060 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
1061 spin_unlock(&ci->i_ceph_lock);
1062 if (dirty)
1063 __mark_inode_dirty(inode, dirty);
1064 }
1065
1066 ceph_put_cap_refs(ci, got);
1067unlock:
1068 mutex_unlock(&inode->i_mutex);
1069 return ret;
1070}
1071
877const struct file_operations ceph_file_fops = { 1072const struct file_operations ceph_file_fops = {
878 .open = ceph_open, 1073 .open = ceph_open,
879 .release = ceph_release, 1074 .release = ceph_release,
@@ -890,5 +1085,6 @@ const struct file_operations ceph_file_fops = {
890 .splice_write = generic_file_splice_write, 1085 .splice_write = generic_file_splice_write,
891 .unlocked_ioctl = ceph_ioctl, 1086 .unlocked_ioctl = ceph_ioctl,
892 .compat_ioctl = ceph_ioctl, 1087 .compat_ioctl = ceph_ioctl,
1088 .fallocate = ceph_fallocate,
893}; 1089};
894 1090
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index dbc0a7392d67..8ec65bc11c71 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -503,7 +503,9 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
503 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); 503 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
504 size_t payload_len = 0; 504 size_t payload_len = 0;
505 505
506 BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE); 506 BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
507 opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO &&
508 opcode != CEPH_OSD_OP_TRUNCATE);
507 509
508 op->extent.offset = offset; 510 op->extent.offset = offset;
509 op->extent.length = length; 511 op->extent.length = length;
@@ -631,6 +633,9 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
631 break; 633 break;
632 case CEPH_OSD_OP_READ: 634 case CEPH_OSD_OP_READ:
633 case CEPH_OSD_OP_WRITE: 635 case CEPH_OSD_OP_WRITE:
636 case CEPH_OSD_OP_ZERO:
637 case CEPH_OSD_OP_DELETE:
638 case CEPH_OSD_OP_TRUNCATE:
634 if (src->op == CEPH_OSD_OP_WRITE) 639 if (src->op == CEPH_OSD_OP_WRITE)
635 request_data_len = src->extent.length; 640 request_data_len = src->extent.length;
636 dst->extent.offset = cpu_to_le64(src->extent.offset); 641 dst->extent.offset = cpu_to_le64(src->extent.offset);
@@ -715,7 +720,9 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
715 u64 object_base; 720 u64 object_base;
716 int r; 721 int r;
717 722
718 BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE); 723 BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
724 opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO &&
725 opcode != CEPH_OSD_OP_TRUNCATE);
719 726
720 req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool, 727 req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool,
721 GFP_NOFS); 728 GFP_NOFS);