aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-12-14 14:02:15 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2010-12-14 14:02:15 -0500
commite97b71ded9ebe527681961c9e6f5ba99f6711974 (patch)
treea724e6f951456f1d855b55e3f34e3710b37f9f9a /fs
parent38971ce2fac484249d697fe48a9b0851a0b62572 (diff)
parent1cd275f609ba46c8cae3ee77e499c54a0d13a983 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: ceph: fix ioctl magic ceph: Behave better when handling file lock replies. ceph: pass lock information by struct file_lock instead of as individual params. ceph: Handle file locks in replies from the MDS. ceph: avoid possible null deref in readdir after dir llseek
Diffstat (limited to 'fs')
-rw-r--r--fs/ceph/dir.c4
-rw-r--r--fs/ceph/ioctl.h2
-rw-r--r--fs/ceph/locks.c94
-rw-r--r--fs/ceph/mds_client.c41
-rw-r--r--fs/ceph/mds_client.h31
5 files changed, 111 insertions, 61 deletions
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 7d447af84ec4..158c700fdca5 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -114,8 +114,8 @@ static int __dcache_readdir(struct file *filp,
114 spin_lock(&dcache_lock); 114 spin_lock(&dcache_lock);
115 115
116 /* start at beginning? */ 116 /* start at beginning? */
117 if (filp->f_pos == 2 || (last && 117 if (filp->f_pos == 2 || last == NULL ||
118 filp->f_pos < ceph_dentry(last)->offset)) { 118 filp->f_pos < ceph_dentry(last)->offset) {
119 if (list_empty(&parent->d_subdirs)) 119 if (list_empty(&parent->d_subdirs))
120 goto out_unlock; 120 goto out_unlock;
121 p = parent->d_subdirs.prev; 121 p = parent->d_subdirs.prev;
diff --git a/fs/ceph/ioctl.h b/fs/ceph/ioctl.h
index a6ce54e94eb5..52e8fd74d450 100644
--- a/fs/ceph/ioctl.h
+++ b/fs/ceph/ioctl.h
@@ -4,7 +4,7 @@
4#include <linux/ioctl.h> 4#include <linux/ioctl.h>
5#include <linux/types.h> 5#include <linux/types.h>
6 6
7#define CEPH_IOCTL_MAGIC 0x98 7#define CEPH_IOCTL_MAGIC 0x97
8 8
9/* just use u64 to align sanely on all archs */ 9/* just use u64 to align sanely on all archs */
10struct ceph_ioctl_layout { 10struct ceph_ioctl_layout {
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 40abde93c345..476b329867d4 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -11,40 +11,68 @@
11 * Implement fcntl and flock locking functions. 11 * Implement fcntl and flock locking functions.
12 */ 12 */
13static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, 13static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
14 u64 pid, u64 pid_ns, 14 int cmd, u8 wait, struct file_lock *fl)
15 int cmd, u64 start, u64 length, u8 wait)
16{ 15{
17 struct inode *inode = file->f_dentry->d_inode; 16 struct inode *inode = file->f_dentry->d_inode;
18 struct ceph_mds_client *mdsc = 17 struct ceph_mds_client *mdsc =
19 ceph_sb_to_client(inode->i_sb)->mdsc; 18 ceph_sb_to_client(inode->i_sb)->mdsc;
20 struct ceph_mds_request *req; 19 struct ceph_mds_request *req;
21 int err; 20 int err;
21 u64 length = 0;
22 22
23 req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); 23 req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
24 if (IS_ERR(req)) 24 if (IS_ERR(req))
25 return PTR_ERR(req); 25 return PTR_ERR(req);
26 req->r_inode = igrab(inode); 26 req->r_inode = igrab(inode);
27 27
28 /* mds requires start and length rather than start and end */
29 if (LLONG_MAX == fl->fl_end)
30 length = 0;
31 else
32 length = fl->fl_end - fl->fl_start + 1;
33
28 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " 34 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
29 "length: %llu, wait: %d, type`: %d", (int)lock_type, 35 "length: %llu, wait: %d, type`: %d", (int)lock_type,
30 (int)operation, pid, start, length, wait, cmd); 36 (int)operation, (u64)fl->fl_pid, fl->fl_start,
37 length, wait, fl->fl_type);
38
31 39
32 req->r_args.filelock_change.rule = lock_type; 40 req->r_args.filelock_change.rule = lock_type;
33 req->r_args.filelock_change.type = cmd; 41 req->r_args.filelock_change.type = cmd;
34 req->r_args.filelock_change.pid = cpu_to_le64(pid); 42 req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
35 /* This should be adjusted, but I'm not sure if 43 /* This should be adjusted, but I'm not sure if
36 namespaces actually get id numbers*/ 44 namespaces actually get id numbers*/
37 req->r_args.filelock_change.pid_namespace = 45 req->r_args.filelock_change.pid_namespace =
38 cpu_to_le64((u64)pid_ns); 46 cpu_to_le64((u64)(unsigned long)fl->fl_nspid);
39 req->r_args.filelock_change.start = cpu_to_le64(start); 47 req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start);
40 req->r_args.filelock_change.length = cpu_to_le64(length); 48 req->r_args.filelock_change.length = cpu_to_le64(length);
41 req->r_args.filelock_change.wait = wait; 49 req->r_args.filelock_change.wait = wait;
42 50
43 err = ceph_mdsc_do_request(mdsc, inode, req); 51 err = ceph_mdsc_do_request(mdsc, inode, req);
52
53 if ( operation == CEPH_MDS_OP_GETFILELOCK){
54 fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid);
55 if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
56 fl->fl_type = F_RDLCK;
57 else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type)
58 fl->fl_type = F_WRLCK;
59 else
60 fl->fl_type = F_UNLCK;
61
62 fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start);
63 length = le64_to_cpu(req->r_reply_info.filelock_reply->start) +
64 le64_to_cpu(req->r_reply_info.filelock_reply->length);
65 if (length >= 1)
66 fl->fl_end = length -1;
67 else
68 fl->fl_end = 0;
69
70 }
44 ceph_mdsc_put_request(req); 71 ceph_mdsc_put_request(req);
45 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " 72 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
46 "length: %llu, wait: %d, type`: %d err code %d", (int)lock_type, 73 "length: %llu, wait: %d, type`: %d, err code %d", (int)lock_type,
47 (int)operation, pid, start, length, wait, cmd, err); 74 (int)operation, (u64)fl->fl_pid, fl->fl_start,
75 length, wait, fl->fl_type, err);
48 return err; 76 return err;
49} 77}
50 78
@@ -54,7 +82,6 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
54 */ 82 */
55int ceph_lock(struct file *file, int cmd, struct file_lock *fl) 83int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
56{ 84{
57 u64 length;
58 u8 lock_cmd; 85 u8 lock_cmd;
59 int err; 86 int err;
60 u8 wait = 0; 87 u8 wait = 0;
@@ -76,29 +103,20 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
76 else 103 else
77 lock_cmd = CEPH_LOCK_UNLOCK; 104 lock_cmd = CEPH_LOCK_UNLOCK;
78 105
79 if (LLONG_MAX == fl->fl_end) 106 err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
80 length = 0;
81 else
82 length = fl->fl_end - fl->fl_start + 1;
83
84 err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
85 (u64)fl->fl_pid,
86 (u64)(unsigned long)fl->fl_nspid,
87 lock_cmd, fl->fl_start,
88 length, wait);
89 if (!err) { 107 if (!err) {
90 dout("mds locked, locking locally"); 108 if ( op != CEPH_MDS_OP_GETFILELOCK ){
91 err = posix_lock_file(file, fl, NULL); 109 dout("mds locked, locking locally");
92 if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { 110 err = posix_lock_file(file, fl, NULL);
93 /* undo! This should only happen if the kernel detects 111 if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
94 * local deadlock. */ 112 /* undo! This should only happen if the kernel detects
95 ceph_lock_message(CEPH_LOCK_FCNTL, op, file, 113 * local deadlock. */
96 (u64)fl->fl_pid, 114 ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
97 (u64)(unsigned long)fl->fl_nspid, 115 CEPH_LOCK_UNLOCK, 0, fl);
98 CEPH_LOCK_UNLOCK, fl->fl_start, 116 dout("got %d on posix_lock_file, undid lock", err);
99 length, 0); 117 }
100 dout("got %d on posix_lock_file, undid lock", err);
101 } 118 }
119
102 } else { 120 } else {
103 dout("mds returned error code %d", err); 121 dout("mds returned error code %d", err);
104 } 122 }
@@ -107,7 +125,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
107 125
108int ceph_flock(struct file *file, int cmd, struct file_lock *fl) 126int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
109{ 127{
110 u64 length;
111 u8 lock_cmd; 128 u8 lock_cmd;
112 int err; 129 int err;
113 u8 wait = 1; 130 u8 wait = 1;
@@ -127,26 +144,15 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
127 lock_cmd = CEPH_LOCK_EXCL; 144 lock_cmd = CEPH_LOCK_EXCL;
128 else 145 else
129 lock_cmd = CEPH_LOCK_UNLOCK; 146 lock_cmd = CEPH_LOCK_UNLOCK;
130 /* mds requires start and length rather than start and end */
131 if (LLONG_MAX == fl->fl_end)
132 length = 0;
133 else
134 length = fl->fl_end - fl->fl_start + 1;
135 147
136 err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, 148 err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
137 file, (u64)fl->fl_pid, 149 file, lock_cmd, wait, fl);
138 (u64)(unsigned long)fl->fl_nspid,
139 lock_cmd, fl->fl_start,
140 length, wait);
141 if (!err) { 150 if (!err) {
142 err = flock_lock_file_wait(file, fl); 151 err = flock_lock_file_wait(file, fl);
143 if (err) { 152 if (err) {
144 ceph_lock_message(CEPH_LOCK_FLOCK, 153 ceph_lock_message(CEPH_LOCK_FLOCK,
145 CEPH_MDS_OP_SETFILELOCK, 154 CEPH_MDS_OP_SETFILELOCK,
146 file, (u64)fl->fl_pid, 155 file, CEPH_LOCK_UNLOCK, 0, fl);
147 (u64)(unsigned long)fl->fl_nspid,
148 CEPH_LOCK_UNLOCK, fl->fl_start,
149 length, 0);
150 dout("got %d on flock_lock_file_wait, undid lock", err); 156 dout("got %d on flock_lock_file_wait, undid lock", err);
151 } 157 }
152 } else { 158 } else {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 098b18508479..38800eaa81d0 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -202,6 +202,38 @@ out_bad:
202} 202}
203 203
204/* 204/*
205 * parse fcntl F_GETLK results
206 */
207static int parse_reply_info_filelock(void **p, void *end,
208 struct ceph_mds_reply_info_parsed *info)
209{
210 if (*p + sizeof(*info->filelock_reply) > end)
211 goto bad;
212
213 info->filelock_reply = *p;
214 *p += sizeof(*info->filelock_reply);
215
216 if (unlikely(*p != end))
217 goto bad;
218 return 0;
219
220bad:
221 return -EIO;
222}
223
224/*
225 * parse extra results
226 */
227static int parse_reply_info_extra(void **p, void *end,
228 struct ceph_mds_reply_info_parsed *info)
229{
230 if (info->head->op == CEPH_MDS_OP_GETFILELOCK)
231 return parse_reply_info_filelock(p, end, info);
232 else
233 return parse_reply_info_dir(p, end, info);
234}
235
236/*
205 * parse entire mds reply 237 * parse entire mds reply
206 */ 238 */
207static int parse_reply_info(struct ceph_msg *msg, 239static int parse_reply_info(struct ceph_msg *msg,
@@ -223,10 +255,10 @@ static int parse_reply_info(struct ceph_msg *msg,
223 goto out_bad; 255 goto out_bad;
224 } 256 }
225 257
226 /* dir content */ 258 /* extra */
227 ceph_decode_32_safe(&p, end, len, bad); 259 ceph_decode_32_safe(&p, end, len, bad);
228 if (len > 0) { 260 if (len > 0) {
229 err = parse_reply_info_dir(&p, p+len, info); 261 err = parse_reply_info_extra(&p, p+len, info);
230 if (err < 0) 262 if (err < 0)
231 goto out_bad; 263 goto out_bad;
232 } 264 }
@@ -2074,7 +2106,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
2074 2106
2075 mutex_lock(&session->s_mutex); 2107 mutex_lock(&session->s_mutex);
2076 if (err < 0) { 2108 if (err < 0) {
2077 pr_err("mdsc_handle_reply got corrupt reply mds%d\n", mds); 2109 pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid);
2078 ceph_msg_dump(msg); 2110 ceph_msg_dump(msg);
2079 goto out_err; 2111 goto out_err;
2080 } 2112 }
@@ -2094,7 +2126,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
2094 mutex_lock(&req->r_fill_mutex); 2126 mutex_lock(&req->r_fill_mutex);
2095 err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); 2127 err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
2096 if (err == 0) { 2128 if (err == 0) {
2097 if (result == 0 && rinfo->dir_nr) 2129 if (result == 0 && req->r_op != CEPH_MDS_OP_GETFILELOCK &&
2130 rinfo->dir_nr)
2098 ceph_readdir_prepopulate(req, req->r_session); 2131 ceph_readdir_prepopulate(req, req->r_session);
2099 ceph_unreserve_caps(mdsc, &req->r_caps_reservation); 2132 ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
2100 } 2133 }
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 9341fd4f1432..aabe563b54db 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -42,26 +42,37 @@ struct ceph_mds_reply_info_in {
42}; 42};
43 43
44/* 44/*
45 * parsed info about an mds reply, including information about the 45 * parsed info about an mds reply, including information about
46 * target inode and/or its parent directory and dentry, and directory 46 * either: 1) the target inode and/or its parent directory and dentry,
47 * contents (for readdir results). 47 * and directory contents (for readdir results), or
48 * 2) the file range lock info (for fcntl F_GETLK results).
48 */ 49 */
49struct ceph_mds_reply_info_parsed { 50struct ceph_mds_reply_info_parsed {
50 struct ceph_mds_reply_head *head; 51 struct ceph_mds_reply_head *head;
51 52
53 /* trace */
52 struct ceph_mds_reply_info_in diri, targeti; 54 struct ceph_mds_reply_info_in diri, targeti;
53 struct ceph_mds_reply_dirfrag *dirfrag; 55 struct ceph_mds_reply_dirfrag *dirfrag;
54 char *dname; 56 char *dname;
55 u32 dname_len; 57 u32 dname_len;
56 struct ceph_mds_reply_lease *dlease; 58 struct ceph_mds_reply_lease *dlease;
57 59
58 struct ceph_mds_reply_dirfrag *dir_dir; 60 /* extra */
59 int dir_nr; 61 union {
60 char **dir_dname; 62 /* for fcntl F_GETLK results */
61 u32 *dir_dname_len; 63 struct ceph_filelock *filelock_reply;
62 struct ceph_mds_reply_lease **dir_dlease; 64
63 struct ceph_mds_reply_info_in *dir_in; 65 /* for readdir results */
64 u8 dir_complete, dir_end; 66 struct {
67 struct ceph_mds_reply_dirfrag *dir_dir;
68 int dir_nr;
69 char **dir_dname;
70 u32 *dir_dname_len;
71 struct ceph_mds_reply_lease **dir_dlease;
72 struct ceph_mds_reply_info_in *dir_in;
73 u8 dir_complete, dir_end;
74 };
75 };
65 76
66 /* encoded blob describing snapshot contexts for certain 77 /* encoded blob describing snapshot contexts for certain
67 operations (e.g., open) */ 78 operations (e.g., open) */