summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLuis Henriques <lhenriques@suse.com>2018-01-05 05:47:18 -0500
committerIlya Dryomov <idryomov@gmail.com>2018-04-02 05:17:51 -0400
commitfb18a57568c2b84cd611e242c0f6fa97b45e4907 (patch)
tree0f447778c8ce55e583cd8cf8eb8e945d38b5c208
parent08a79102aa373e03ce704621fd84567605214465 (diff)
ceph: quota: add initial infrastructure to support cephfs quotas
This patch adds the infrastructure required to support cephfs quotas as it is currently implemented in the ceph fuse client. Cephfs quotas can be set on any directory, and can restrict the number of bytes or the number of files stored beneath that point in the directory hierarchy. Quotas are set using the extended attributes 'ceph.quota.max_files' and 'ceph.quota.max_bytes', and can be removed by setting these attributes to '0'. Link: http://tracker.ceph.com/issues/22372 Signed-off-by: Luis Henriques <lhenriques@suse.com> Reviewed-by: "Yan, Zheng" <zyan@redhat.com> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
-rw-r--r--Documentation/filesystems/ceph.txt12
-rw-r--r--fs/ceph/Makefile2
-rw-r--r--fs/ceph/inode.c6
-rw-r--r--fs/ceph/mds_client.c23
-rw-r--r--fs/ceph/mds_client.h2
-rw-r--r--fs/ceph/quota.c65
-rw-r--r--fs/ceph/super.h8
-rw-r--r--fs/ceph/xattr.c44
-rw-r--r--include/linux/ceph/ceph_features.h1
-rw-r--r--include/linux/ceph/ceph_fs.h17
-rw-r--r--net/ceph/ceph_common.c1
11 files changed, 180 insertions, 1 deletions
diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph.txt
index 0b302a11718a..094772481263 100644
--- a/Documentation/filesystems/ceph.txt
+++ b/Documentation/filesystems/ceph.txt
@@ -62,6 +62,18 @@ subdirectories, and a summation of all nested file sizes. This makes
62the identification of large disk space consumers relatively quick, as 62the identification of large disk space consumers relatively quick, as
63no 'du' or similar recursive scan of the file system is required. 63no 'du' or similar recursive scan of the file system is required.
64 64
65Finally, Ceph also allows quotas to be set on any directory in the system.
66The quota can restrict the number of bytes or the number of files stored
67beneath that point in the directory hierarchy. Quotas can be set using
68extended attributes 'ceph.quota.max_files' and 'ceph.quota.max_bytes', eg:
69
70 setfattr -n ceph.quota.max_bytes -v 100000000 /some/dir
71 getfattr -n ceph.quota.max_bytes /some/dir
72
73A limitation of the current quotas implementation is that it relies on the
74cooperation of the client mounting the file system to stop writers when a
75limit is reached. A modified or adversarial client cannot be prevented
76from writing as much data as it needs.
65 77
66Mount Syntax 78Mount Syntax
67============ 79============
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile
index 174f5709e508..a699e320393f 100644
--- a/fs/ceph/Makefile
+++ b/fs/ceph/Makefile
@@ -6,7 +6,7 @@
6obj-$(CONFIG_CEPH_FS) += ceph.o 6obj-$(CONFIG_CEPH_FS) += ceph.o
7 7
8ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ 8ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
9 export.o caps.o snap.o xattr.o \ 9 export.o caps.o snap.o xattr.o quota.o \
10 mds_client.o mdsmap.o strings.o ceph_frag.o \ 10 mds_client.o mdsmap.o strings.o ceph_frag.o \
11 debugfs.o 11 debugfs.o
12 12
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index be5f12d0d637..2c6f8be4ed63 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -441,6 +441,9 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
441 atomic64_set(&ci->i_complete_seq[1], 0); 441 atomic64_set(&ci->i_complete_seq[1], 0);
442 ci->i_symlink = NULL; 442 ci->i_symlink = NULL;
443 443
444 ci->i_max_bytes = 0;
445 ci->i_max_files = 0;
446
444 memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout)); 447 memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout));
445 RCU_INIT_POINTER(ci->i_layout.pool_ns, NULL); 448 RCU_INIT_POINTER(ci->i_layout.pool_ns, NULL);
446 449
@@ -790,6 +793,9 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
790 inode->i_rdev = le32_to_cpu(info->rdev); 793 inode->i_rdev = le32_to_cpu(info->rdev);
791 inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; 794 inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
792 795
796 ci->i_max_bytes = iinfo->max_bytes;
797 ci->i_max_files = iinfo->max_files;
798
793 if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) && 799 if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) &&
794 (issued & CEPH_CAP_AUTH_EXCL) == 0) { 800 (issued & CEPH_CAP_AUTH_EXCL) == 0) {
795 inode->i_mode = le32_to_cpu(info->mode); 801 inode->i_mode = le32_to_cpu(info->mode);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 537048b4a4d5..1c9877c1149f 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -100,6 +100,26 @@ static int parse_reply_info_in(void **p, void *end,
100 } else 100 } else
101 info->inline_version = CEPH_INLINE_NONE; 101 info->inline_version = CEPH_INLINE_NONE;
102 102
103 if (features & CEPH_FEATURE_MDS_QUOTA) {
104 u8 struct_v, struct_compat;
105 u32 struct_len;
106
107 /*
108 * both struct_v and struct_compat are expected to be >= 1
109 */
110 ceph_decode_8_safe(p, end, struct_v, bad);
111 ceph_decode_8_safe(p, end, struct_compat, bad);
112 if (!struct_v || !struct_compat)
113 goto bad;
114 ceph_decode_32_safe(p, end, struct_len, bad);
115 ceph_decode_need(p, end, struct_len, bad);
116 ceph_decode_64_safe(p, end, info->max_bytes, bad);
117 ceph_decode_64_safe(p, end, info->max_files, bad);
118 } else {
119 info->max_bytes = 0;
120 info->max_files = 0;
121 }
122
103 info->pool_ns_len = 0; 123 info->pool_ns_len = 0;
104 info->pool_ns_data = NULL; 124 info->pool_ns_data = NULL;
105 if (features & CEPH_FEATURE_FS_FILE_LAYOUT_V2) { 125 if (features & CEPH_FEATURE_FS_FILE_LAYOUT_V2) {
@@ -4082,6 +4102,9 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
4082 case CEPH_MSG_CLIENT_LEASE: 4102 case CEPH_MSG_CLIENT_LEASE:
4083 handle_lease(mdsc, s, msg); 4103 handle_lease(mdsc, s, msg);
4084 break; 4104 break;
4105 case CEPH_MSG_CLIENT_QUOTA:
4106 ceph_handle_quota(mdsc, s, msg);
4107 break;
4085 4108
4086 default: 4109 default:
4087 pr_err("received unknown message type %d %s\n", type, 4110 pr_err("received unknown message type %d %s\n", type,
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 71e3b783ee6f..2a67c8b01ae6 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -49,6 +49,8 @@ struct ceph_mds_reply_info_in {
49 char *inline_data; 49 char *inline_data;
50 u32 pool_ns_len; 50 u32 pool_ns_len;
51 char *pool_ns_data; 51 char *pool_ns_data;
52 u64 max_bytes;
53 u64 max_files;
52}; 54};
53 55
54struct ceph_mds_reply_dir_entry { 56struct ceph_mds_reply_dir_entry {
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
new file mode 100644
index 000000000000..1b69d8365ec2
--- /dev/null
+++ b/fs/ceph/quota.c
@@ -0,0 +1,65 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * quota.c - CephFS quota
4 *
5 * Copyright (C) 2017-2018 SUSE
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20
21#include "super.h"
22#include "mds_client.h"
23
24void ceph_handle_quota(struct ceph_mds_client *mdsc,
25 struct ceph_mds_session *session,
26 struct ceph_msg *msg)
27{
28 struct super_block *sb = mdsc->fsc->sb;
29 struct ceph_mds_quota *h = msg->front.iov_base;
30 struct ceph_vino vino;
31 struct inode *inode;
32 struct ceph_inode_info *ci;
33
34 if (msg->front.iov_len != sizeof(*h)) {
35 pr_err("%s corrupt message mds%d len %d\n", __func__,
36 session->s_mds, (int)msg->front.iov_len);
37 ceph_msg_dump(msg);
38 return;
39 }
40
41 /* increment msg sequence number */
42 mutex_lock(&session->s_mutex);
43 session->s_seq++;
44 mutex_unlock(&session->s_mutex);
45
46 /* lookup inode */
47 vino.ino = le64_to_cpu(h->ino);
48 vino.snap = CEPH_NOSNAP;
49 inode = ceph_find_inode(sb, vino);
50 if (!inode) {
51 pr_warn("Failed to find inode %llu\n", vino.ino);
52 return;
53 }
54 ci = ceph_inode(inode);
55
56 spin_lock(&ci->i_ceph_lock);
57 ci->i_rbytes = le64_to_cpu(h->rbytes);
58 ci->i_rfiles = le64_to_cpu(h->rfiles);
59 ci->i_rsubdirs = le64_to_cpu(h->rsubdirs);
60 ci->i_max_bytes = le64_to_cpu(h->max_bytes);
61 ci->i_max_files = le64_to_cpu(h->max_files);
62 spin_unlock(&ci->i_ceph_lock);
63
64 iput(inode);
65}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index ff49433014e9..0c95a929bab7 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -310,6 +310,9 @@ struct ceph_inode_info {
310 u64 i_rbytes, i_rfiles, i_rsubdirs; 310 u64 i_rbytes, i_rfiles, i_rsubdirs;
311 u64 i_files, i_subdirs; 311 u64 i_files, i_subdirs;
312 312
313 /* quotas */
314 u64 i_max_bytes, i_max_files;
315
313 struct rb_root i_fragtree; 316 struct rb_root i_fragtree;
314 int i_fragtree_nsplits; 317 int i_fragtree_nsplits;
315 struct mutex i_fragtree_mutex; 318 struct mutex i_fragtree_mutex;
@@ -1070,4 +1073,9 @@ extern int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
1070extern int ceph_fs_debugfs_init(struct ceph_fs_client *client); 1073extern int ceph_fs_debugfs_init(struct ceph_fs_client *client);
1071extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client); 1074extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client);
1072 1075
1076/* quota.c */
1077extern void ceph_handle_quota(struct ceph_mds_client *mdsc,
1078 struct ceph_mds_session *session,
1079 struct ceph_msg *msg);
1080
1073#endif /* _FS_CEPH_SUPER_H */ 1081#endif /* _FS_CEPH_SUPER_H */
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index e1c4e0b12b4c..7e72348639e4 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -224,6 +224,31 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
224 (long)ci->i_rctime.tv_nsec); 224 (long)ci->i_rctime.tv_nsec);
225} 225}
226 226
227/* quotas */
228
229static bool ceph_vxattrcb_quota_exists(struct ceph_inode_info *ci)
230{
231 return (ci->i_max_files || ci->i_max_bytes);
232}
233
234static size_t ceph_vxattrcb_quota(struct ceph_inode_info *ci, char *val,
235 size_t size)
236{
237 return snprintf(val, size, "max_bytes=%llu max_files=%llu",
238 ci->i_max_bytes, ci->i_max_files);
239}
240
241static size_t ceph_vxattrcb_quota_max_bytes(struct ceph_inode_info *ci,
242 char *val, size_t size)
243{
244 return snprintf(val, size, "%llu", ci->i_max_bytes);
245}
246
247static size_t ceph_vxattrcb_quota_max_files(struct ceph_inode_info *ci,
248 char *val, size_t size)
249{
250 return snprintf(val, size, "%llu", ci->i_max_files);
251}
227 252
228#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name 253#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
229#define CEPH_XATTR_NAME2(_type, _name, _name2) \ 254#define CEPH_XATTR_NAME2(_type, _name, _name2) \
@@ -247,6 +272,15 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
247 .hidden = true, \ 272 .hidden = true, \
248 .exists_cb = ceph_vxattrcb_layout_exists, \ 273 .exists_cb = ceph_vxattrcb_layout_exists, \
249 } 274 }
275#define XATTR_QUOTA_FIELD(_type, _name) \
276 { \
277 .name = CEPH_XATTR_NAME(_type, _name), \
278 .name_size = sizeof(CEPH_XATTR_NAME(_type, _name)), \
279 .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
280 .readonly = false, \
281 .hidden = true, \
282 .exists_cb = ceph_vxattrcb_quota_exists, \
283 }
250 284
251static struct ceph_vxattr ceph_dir_vxattrs[] = { 285static struct ceph_vxattr ceph_dir_vxattrs[] = {
252 { 286 {
@@ -270,6 +304,16 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
270 XATTR_NAME_CEPH(dir, rsubdirs), 304 XATTR_NAME_CEPH(dir, rsubdirs),
271 XATTR_NAME_CEPH(dir, rbytes), 305 XATTR_NAME_CEPH(dir, rbytes),
272 XATTR_NAME_CEPH(dir, rctime), 306 XATTR_NAME_CEPH(dir, rctime),
307 {
308 .name = "ceph.quota",
309 .name_size = sizeof("ceph.quota"),
310 .getxattr_cb = ceph_vxattrcb_quota,
311 .readonly = false,
312 .hidden = true,
313 .exists_cb = ceph_vxattrcb_quota_exists,
314 },
315 XATTR_QUOTA_FIELD(quota, max_bytes),
316 XATTR_QUOTA_FIELD(quota, max_files),
273 { .name = NULL, 0 } /* Required table terminator */ 317 { .name = NULL, 0 } /* Required table terminator */
274}; 318};
275static size_t ceph_dir_vxattrs_name_size; /* total size of all names */ 319static size_t ceph_dir_vxattrs_name_size; /* total size of all names */
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 59042d5ac520..3901927cf6a0 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -204,6 +204,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
204 CEPH_FEATURE_OSD_PRIMARY_AFFINITY | \ 204 CEPH_FEATURE_OSD_PRIMARY_AFFINITY | \
205 CEPH_FEATURE_MSGR_KEEPALIVE2 | \ 205 CEPH_FEATURE_MSGR_KEEPALIVE2 | \
206 CEPH_FEATURE_OSD_POOLRESEND | \ 206 CEPH_FEATURE_OSD_POOLRESEND | \
207 CEPH_FEATURE_MDS_QUOTA | \
207 CEPH_FEATURE_CRUSH_V4 | \ 208 CEPH_FEATURE_CRUSH_V4 | \
208 CEPH_FEATURE_NEW_OSDOP_ENCODING | \ 209 CEPH_FEATURE_NEW_OSDOP_ENCODING | \
209 CEPH_FEATURE_SERVER_JEWEL | \ 210 CEPH_FEATURE_SERVER_JEWEL | \
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 88dd51381aaf..7ecfc88314d8 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -134,6 +134,7 @@ struct ceph_dir_layout {
134#define CEPH_MSG_CLIENT_LEASE 0x311 134#define CEPH_MSG_CLIENT_LEASE 0x311
135#define CEPH_MSG_CLIENT_SNAP 0x312 135#define CEPH_MSG_CLIENT_SNAP 0x312
136#define CEPH_MSG_CLIENT_CAPRELEASE 0x313 136#define CEPH_MSG_CLIENT_CAPRELEASE 0x313
137#define CEPH_MSG_CLIENT_QUOTA 0x314
137 138
138/* pool ops */ 139/* pool ops */
139#define CEPH_MSG_POOLOP_REPLY 48 140#define CEPH_MSG_POOLOP_REPLY 48
@@ -807,4 +808,20 @@ struct ceph_mds_snap_realm {
807} __attribute__ ((packed)); 808} __attribute__ ((packed));
808/* followed by my snap list, then prior parent snap list */ 809/* followed by my snap list, then prior parent snap list */
809 810
811/*
812 * quotas
813 */
814struct ceph_mds_quota {
815 __le64 ino; /* ino */
816 struct ceph_timespec rctime;
817 __le64 rbytes; /* dir stats */
818 __le64 rfiles;
819 __le64 rsubdirs;
820 __u8 struct_v; /* compat */
821 __u8 struct_compat;
822 __le32 struct_len;
823 __le64 max_bytes; /* quota max. bytes */
824 __le64 max_files; /* quota max. files */
825} __attribute__ ((packed));
826
810#endif 827#endif
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index c15e2699090c..ffbcc7f5e740 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -80,6 +80,7 @@ const char *ceph_msg_type_name(int type)
80 case CEPH_MSG_CLIENT_REPLY: return "client_reply"; 80 case CEPH_MSG_CLIENT_REPLY: return "client_reply";
81 case CEPH_MSG_CLIENT_CAPS: return "client_caps"; 81 case CEPH_MSG_CLIENT_CAPS: return "client_caps";
82 case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release"; 82 case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release";
83 case CEPH_MSG_CLIENT_QUOTA: return "client_quota";
83 case CEPH_MSG_CLIENT_SNAP: return "client_snap"; 84 case CEPH_MSG_CLIENT_SNAP: return "client_snap";
84 case CEPH_MSG_CLIENT_LEASE: return "client_lease"; 85 case CEPH_MSG_CLIENT_LEASE: return "client_lease";
85 case CEPH_MSG_POOLOP_REPLY: return "poolop_reply"; 86 case CEPH_MSG_POOLOP_REPLY: return "poolop_reply";