aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
authorLi Wang <liwang@ubuntukylin.com>2013-08-14 23:51:44 -0400
committerSage Weil <sage@inktank.com>2013-08-15 14:12:17 -0400
commitad7a60de882aca31afb58721db166f7e77afcd92 (patch)
tree61a21b2249d77d575982786a04fbaca923361fc8 /fs/ceph
parent3871cbb9a41b1371dc13fc619e3ab4e0a1e29b4a (diff)
ceph: punch hole support
This patch implements fallocate and punch hole support for Ceph kernel client. Signed-off-by: Li Wang <liwang@ubuntukylin.com> Signed-off-by: Yunchuan Wen <yunchuanwen@ubuntukylin.com>
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/file.c196
1 files changed, 196 insertions, 0 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index abc0e0759bdc..68af489c2abd 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -8,6 +8,7 @@
8#include <linux/namei.h> 8#include <linux/namei.h>
9#include <linux/writeback.h> 9#include <linux/writeback.h>
10#include <linux/aio.h> 10#include <linux/aio.h>
11#include <linux/falloc.h>
11 12
12#include "super.h" 13#include "super.h"
13#include "mds_client.h" 14#include "mds_client.h"
@@ -874,6 +875,200 @@ out:
874 return offset; 875 return offset;
875} 876}
876 877
878static inline void ceph_zero_partial_page(
879 struct inode *inode, loff_t offset, unsigned size)
880{
881 struct page *page;
882 pgoff_t index = offset >> PAGE_CACHE_SHIFT;
883
884 page = find_lock_page(inode->i_mapping, index);
885 if (page) {
886 wait_on_page_writeback(page);
887 zero_user(page, offset & (PAGE_CACHE_SIZE - 1), size);
888 unlock_page(page);
889 page_cache_release(page);
890 }
891}
892
893static void ceph_zero_pagecache_range(struct inode *inode, loff_t offset,
894 loff_t length)
895{
896 loff_t nearly = round_up(offset, PAGE_CACHE_SIZE);
897 if (offset < nearly) {
898 loff_t size = nearly - offset;
899 if (length < size)
900 size = length;
901 ceph_zero_partial_page(inode, offset, size);
902 offset += size;
903 length -= size;
904 }
905 if (length >= PAGE_CACHE_SIZE) {
906 loff_t size = round_down(length, PAGE_CACHE_SIZE);
907 truncate_pagecache_range(inode, offset, offset + size - 1);
908 offset += size;
909 length -= size;
910 }
911 if (length)
912 ceph_zero_partial_page(inode, offset, length);
913}
914
915static int ceph_zero_partial_object(struct inode *inode,
916 loff_t offset, loff_t *length)
917{
918 struct ceph_inode_info *ci = ceph_inode(inode);
919 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
920 struct ceph_osd_request *req;
921 int ret = 0;
922 loff_t zero = 0;
923 int op;
924
925 if (!length) {
926 op = offset ? CEPH_OSD_OP_DELETE : CEPH_OSD_OP_TRUNCATE;
927 length = &zero;
928 } else {
929 op = CEPH_OSD_OP_ZERO;
930 }
931
932 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
933 ceph_vino(inode),
934 offset, length,
935 1, op,
936 CEPH_OSD_FLAG_WRITE |
937 CEPH_OSD_FLAG_ONDISK,
938 NULL, 0, 0, false);
939 if (IS_ERR(req)) {
940 ret = PTR_ERR(req);
941 goto out;
942 }
943
944 ceph_osdc_build_request(req, offset, NULL, ceph_vino(inode).snap,
945 &inode->i_mtime);
946
947 ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
948 if (!ret) {
949 ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
950 if (ret == -ENOENT)
951 ret = 0;
952 }
953 ceph_osdc_put_request(req);
954
955out:
956 return ret;
957}
958
959static int ceph_zero_objects(struct inode *inode, loff_t offset, loff_t length)
960{
961 int ret = 0;
962 struct ceph_inode_info *ci = ceph_inode(inode);
963 __s32 stripe_unit = ceph_file_layout_su(ci->i_layout);
964 __s32 stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
965 __s32 object_size = ceph_file_layout_object_size(ci->i_layout);
966 loff_t object_set_size = (loff_t)object_size * stripe_count;
967
968 loff_t nearly = (offset + object_set_size - 1)
969 / object_set_size * object_set_size;
970 while (length && offset < nearly) {
971 loff_t size = length;
972 ret = ceph_zero_partial_object(inode, offset, &size);
973 if (ret < 0)
974 return ret;
975 offset += size;
976 length -= size;
977 }
978 while (length >= object_set_size) {
979 int i;
980 loff_t pos = offset;
981 for (i = 0; i < stripe_count; ++i) {
982 ret = ceph_zero_partial_object(inode, pos, NULL);
983 if (ret < 0)
984 return ret;
985 pos += stripe_unit;
986 }
987 offset += object_set_size;
988 length -= object_set_size;
989 }
990 while (length) {
991 loff_t size = length;
992 ret = ceph_zero_partial_object(inode, offset, &size);
993 if (ret < 0)
994 return ret;
995 offset += size;
996 length -= size;
997 }
998 return ret;
999}
1000
1001static long ceph_fallocate(struct file *file, int mode,
1002 loff_t offset, loff_t length)
1003{
1004 struct ceph_file_info *fi = file->private_data;
1005 struct inode *inode = file->f_dentry->d_inode;
1006 struct ceph_inode_info *ci = ceph_inode(inode);
1007 struct ceph_osd_client *osdc =
1008 &ceph_inode_to_client(inode)->client->osdc;
1009 int want, got = 0;
1010 int dirty;
1011 int ret = 0;
1012 loff_t endoff = 0;
1013 loff_t size;
1014
1015 if (!S_ISREG(inode->i_mode))
1016 return -EOPNOTSUPP;
1017
1018 if (IS_SWAPFILE(inode))
1019 return -ETXTBSY;
1020
1021 mutex_lock(&inode->i_mutex);
1022
1023 if (ceph_snap(inode) != CEPH_NOSNAP) {
1024 ret = -EROFS;
1025 goto unlock;
1026 }
1027
1028 if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) &&
1029 !(mode & FALLOC_FL_PUNCH_HOLE)) {
1030 ret = -ENOSPC;
1031 goto unlock;
1032 }
1033
1034 size = i_size_read(inode);
1035 if (!(mode & FALLOC_FL_KEEP_SIZE))
1036 endoff = offset + length;
1037
1038 if (fi->fmode & CEPH_FILE_MODE_LAZY)
1039 want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
1040 else
1041 want = CEPH_CAP_FILE_BUFFER;
1042
1043 ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff);
1044 if (ret < 0)
1045 goto unlock;
1046
1047 if (mode & FALLOC_FL_PUNCH_HOLE) {
1048 if (offset < size)
1049 ceph_zero_pagecache_range(inode, offset, length);
1050 ret = ceph_zero_objects(inode, offset, length);
1051 } else if (endoff > size) {
1052 truncate_pagecache_range(inode, size, -1);
1053 if (ceph_inode_set_size(inode, endoff))
1054 ceph_check_caps(ceph_inode(inode),
1055 CHECK_CAPS_AUTHONLY, NULL);
1056 }
1057
1058 if (!ret) {
1059 spin_lock(&ci->i_ceph_lock);
1060 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
1061 spin_unlock(&ci->i_ceph_lock);
1062 if (dirty)
1063 __mark_inode_dirty(inode, dirty);
1064 }
1065
1066 ceph_put_cap_refs(ci, got);
1067unlock:
1068 mutex_unlock(&inode->i_mutex);
1069 return ret;
1070}
1071
877const struct file_operations ceph_file_fops = { 1072const struct file_operations ceph_file_fops = {
878 .open = ceph_open, 1073 .open = ceph_open,
879 .release = ceph_release, 1074 .release = ceph_release,
@@ -890,5 +1085,6 @@ const struct file_operations ceph_file_fops = {
890 .splice_write = generic_file_splice_write, 1085 .splice_write = generic_file_splice_write,
891 .unlocked_ioctl = ceph_ioctl, 1086 .unlocked_ioctl = ceph_ioctl,
892 .compat_ioctl = ceph_ioctl, 1087 .compat_ioctl = ceph_ioctl,
1088 .fallocate = ceph_fallocate,
893}; 1089};
894 1090