diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-12-14 14:02:15 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-12-14 14:02:15 -0500 |
commit | e97b71ded9ebe527681961c9e6f5ba99f6711974 (patch) | |
tree | a724e6f951456f1d855b55e3f34e3710b37f9f9a /fs | |
parent | 38971ce2fac484249d697fe48a9b0851a0b62572 (diff) | |
parent | 1cd275f609ba46c8cae3ee77e499c54a0d13a983 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
ceph: fix ioctl magic
ceph: Behave better when handling file lock replies.
ceph: pass lock information by struct file_lock instead of as individual params.
ceph: Handle file locks in replies from the MDS.
ceph: avoid possible null deref in readdir after dir llseek
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ceph/dir.c | 4 | ||||
-rw-r--r-- | fs/ceph/ioctl.h | 2 | ||||
-rw-r--r-- | fs/ceph/locks.c | 94 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 41 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 31 |
5 files changed, 111 insertions, 61 deletions
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 7d447af84ec4..158c700fdca5 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -114,8 +114,8 @@ static int __dcache_readdir(struct file *filp, | |||
114 | spin_lock(&dcache_lock); | 114 | spin_lock(&dcache_lock); |
115 | 115 | ||
116 | /* start at beginning? */ | 116 | /* start at beginning? */ |
117 | if (filp->f_pos == 2 || (last && | 117 | if (filp->f_pos == 2 || last == NULL || |
118 | filp->f_pos < ceph_dentry(last)->offset)) { | 118 | filp->f_pos < ceph_dentry(last)->offset) { |
119 | if (list_empty(&parent->d_subdirs)) | 119 | if (list_empty(&parent->d_subdirs)) |
120 | goto out_unlock; | 120 | goto out_unlock; |
121 | p = parent->d_subdirs.prev; | 121 | p = parent->d_subdirs.prev; |
diff --git a/fs/ceph/ioctl.h b/fs/ceph/ioctl.h index a6ce54e94eb5..52e8fd74d450 100644 --- a/fs/ceph/ioctl.h +++ b/fs/ceph/ioctl.h | |||
@@ -4,7 +4,7 @@ | |||
4 | #include <linux/ioctl.h> | 4 | #include <linux/ioctl.h> |
5 | #include <linux/types.h> | 5 | #include <linux/types.h> |
6 | 6 | ||
7 | #define CEPH_IOCTL_MAGIC 0x98 | 7 | #define CEPH_IOCTL_MAGIC 0x97 |
8 | 8 | ||
9 | /* just use u64 to align sanely on all archs */ | 9 | /* just use u64 to align sanely on all archs */ |
10 | struct ceph_ioctl_layout { | 10 | struct ceph_ioctl_layout { |
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 40abde93c345..476b329867d4 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c | |||
@@ -11,40 +11,68 @@ | |||
11 | * Implement fcntl and flock locking functions. | 11 | * Implement fcntl and flock locking functions. |
12 | */ | 12 | */ |
13 | static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, | 13 | static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, |
14 | u64 pid, u64 pid_ns, | 14 | int cmd, u8 wait, struct file_lock *fl) |
15 | int cmd, u64 start, u64 length, u8 wait) | ||
16 | { | 15 | { |
17 | struct inode *inode = file->f_dentry->d_inode; | 16 | struct inode *inode = file->f_dentry->d_inode; |
18 | struct ceph_mds_client *mdsc = | 17 | struct ceph_mds_client *mdsc = |
19 | ceph_sb_to_client(inode->i_sb)->mdsc; | 18 | ceph_sb_to_client(inode->i_sb)->mdsc; |
20 | struct ceph_mds_request *req; | 19 | struct ceph_mds_request *req; |
21 | int err; | 20 | int err; |
21 | u64 length = 0; | ||
22 | 22 | ||
23 | req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); | 23 | req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); |
24 | if (IS_ERR(req)) | 24 | if (IS_ERR(req)) |
25 | return PTR_ERR(req); | 25 | return PTR_ERR(req); |
26 | req->r_inode = igrab(inode); | 26 | req->r_inode = igrab(inode); |
27 | 27 | ||
28 | /* mds requires start and length rather than start and end */ | ||
29 | if (LLONG_MAX == fl->fl_end) | ||
30 | length = 0; | ||
31 | else | ||
32 | length = fl->fl_end - fl->fl_start + 1; | ||
33 | |||
28 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " | 34 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " |
29 | "length: %llu, wait: %d, type`: %d", (int)lock_type, | 35 | "length: %llu, wait: %d, type`: %d", (int)lock_type, |
30 | (int)operation, pid, start, length, wait, cmd); | 36 | (int)operation, (u64)fl->fl_pid, fl->fl_start, |
37 | length, wait, fl->fl_type); | ||
38 | |||
31 | 39 | ||
32 | req->r_args.filelock_change.rule = lock_type; | 40 | req->r_args.filelock_change.rule = lock_type; |
33 | req->r_args.filelock_change.type = cmd; | 41 | req->r_args.filelock_change.type = cmd; |
34 | req->r_args.filelock_change.pid = cpu_to_le64(pid); | 42 | req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid); |
35 | /* This should be adjusted, but I'm not sure if | 43 | /* This should be adjusted, but I'm not sure if |
36 | namespaces actually get id numbers*/ | 44 | namespaces actually get id numbers*/ |
37 | req->r_args.filelock_change.pid_namespace = | 45 | req->r_args.filelock_change.pid_namespace = |
38 | cpu_to_le64((u64)pid_ns); | 46 | cpu_to_le64((u64)(unsigned long)fl->fl_nspid); |
39 | req->r_args.filelock_change.start = cpu_to_le64(start); | 47 | req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start); |
40 | req->r_args.filelock_change.length = cpu_to_le64(length); | 48 | req->r_args.filelock_change.length = cpu_to_le64(length); |
41 | req->r_args.filelock_change.wait = wait; | 49 | req->r_args.filelock_change.wait = wait; |
42 | 50 | ||
43 | err = ceph_mdsc_do_request(mdsc, inode, req); | 51 | err = ceph_mdsc_do_request(mdsc, inode, req); |
52 | |||
53 | if ( operation == CEPH_MDS_OP_GETFILELOCK){ | ||
54 | fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid); | ||
55 | if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type) | ||
56 | fl->fl_type = F_RDLCK; | ||
57 | else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type) | ||
58 | fl->fl_type = F_WRLCK; | ||
59 | else | ||
60 | fl->fl_type = F_UNLCK; | ||
61 | |||
62 | fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start); | ||
63 | length = le64_to_cpu(req->r_reply_info.filelock_reply->start) + | ||
64 | le64_to_cpu(req->r_reply_info.filelock_reply->length); | ||
65 | if (length >= 1) | ||
66 | fl->fl_end = length -1; | ||
67 | else | ||
68 | fl->fl_end = 0; | ||
69 | |||
70 | } | ||
44 | ceph_mdsc_put_request(req); | 71 | ceph_mdsc_put_request(req); |
45 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " | 72 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " |
46 | "length: %llu, wait: %d, type`: %d err code %d", (int)lock_type, | 73 | "length: %llu, wait: %d, type`: %d, err code %d", (int)lock_type, |
47 | (int)operation, pid, start, length, wait, cmd, err); | 74 | (int)operation, (u64)fl->fl_pid, fl->fl_start, |
75 | length, wait, fl->fl_type, err); | ||
48 | return err; | 76 | return err; |
49 | } | 77 | } |
50 | 78 | ||
@@ -54,7 +82,6 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, | |||
54 | */ | 82 | */ |
55 | int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | 83 | int ceph_lock(struct file *file, int cmd, struct file_lock *fl) |
56 | { | 84 | { |
57 | u64 length; | ||
58 | u8 lock_cmd; | 85 | u8 lock_cmd; |
59 | int err; | 86 | int err; |
60 | u8 wait = 0; | 87 | u8 wait = 0; |
@@ -76,29 +103,20 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | |||
76 | else | 103 | else |
77 | lock_cmd = CEPH_LOCK_UNLOCK; | 104 | lock_cmd = CEPH_LOCK_UNLOCK; |
78 | 105 | ||
79 | if (LLONG_MAX == fl->fl_end) | 106 | err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl); |
80 | length = 0; | ||
81 | else | ||
82 | length = fl->fl_end - fl->fl_start + 1; | ||
83 | |||
84 | err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, | ||
85 | (u64)fl->fl_pid, | ||
86 | (u64)(unsigned long)fl->fl_nspid, | ||
87 | lock_cmd, fl->fl_start, | ||
88 | length, wait); | ||
89 | if (!err) { | 107 | if (!err) { |
90 | dout("mds locked, locking locally"); | 108 | if ( op != CEPH_MDS_OP_GETFILELOCK ){ |
91 | err = posix_lock_file(file, fl, NULL); | 109 | dout("mds locked, locking locally"); |
92 | if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { | 110 | err = posix_lock_file(file, fl, NULL); |
93 | /* undo! This should only happen if the kernel detects | 111 | if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { |
94 | * local deadlock. */ | 112 | /* undo! This should only happen if the kernel detects |
95 | ceph_lock_message(CEPH_LOCK_FCNTL, op, file, | 113 | * local deadlock. */ |
96 | (u64)fl->fl_pid, | 114 | ceph_lock_message(CEPH_LOCK_FCNTL, op, file, |
97 | (u64)(unsigned long)fl->fl_nspid, | 115 | CEPH_LOCK_UNLOCK, 0, fl); |
98 | CEPH_LOCK_UNLOCK, fl->fl_start, | 116 | dout("got %d on posix_lock_file, undid lock", err); |
99 | length, 0); | 117 | } |
100 | dout("got %d on posix_lock_file, undid lock", err); | ||
101 | } | 118 | } |
119 | |||
102 | } else { | 120 | } else { |
103 | dout("mds returned error code %d", err); | 121 | dout("mds returned error code %d", err); |
104 | } | 122 | } |
@@ -107,7 +125,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | |||
107 | 125 | ||
108 | int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | 126 | int ceph_flock(struct file *file, int cmd, struct file_lock *fl) |
109 | { | 127 | { |
110 | u64 length; | ||
111 | u8 lock_cmd; | 128 | u8 lock_cmd; |
112 | int err; | 129 | int err; |
113 | u8 wait = 1; | 130 | u8 wait = 1; |
@@ -127,26 +144,15 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | |||
127 | lock_cmd = CEPH_LOCK_EXCL; | 144 | lock_cmd = CEPH_LOCK_EXCL; |
128 | else | 145 | else |
129 | lock_cmd = CEPH_LOCK_UNLOCK; | 146 | lock_cmd = CEPH_LOCK_UNLOCK; |
130 | /* mds requires start and length rather than start and end */ | ||
131 | if (LLONG_MAX == fl->fl_end) | ||
132 | length = 0; | ||
133 | else | ||
134 | length = fl->fl_end - fl->fl_start + 1; | ||
135 | 147 | ||
136 | err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, | 148 | err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, |
137 | file, (u64)fl->fl_pid, | 149 | file, lock_cmd, wait, fl); |
138 | (u64)(unsigned long)fl->fl_nspid, | ||
139 | lock_cmd, fl->fl_start, | ||
140 | length, wait); | ||
141 | if (!err) { | 150 | if (!err) { |
142 | err = flock_lock_file_wait(file, fl); | 151 | err = flock_lock_file_wait(file, fl); |
143 | if (err) { | 152 | if (err) { |
144 | ceph_lock_message(CEPH_LOCK_FLOCK, | 153 | ceph_lock_message(CEPH_LOCK_FLOCK, |
145 | CEPH_MDS_OP_SETFILELOCK, | 154 | CEPH_MDS_OP_SETFILELOCK, |
146 | file, (u64)fl->fl_pid, | 155 | file, CEPH_LOCK_UNLOCK, 0, fl); |
147 | (u64)(unsigned long)fl->fl_nspid, | ||
148 | CEPH_LOCK_UNLOCK, fl->fl_start, | ||
149 | length, 0); | ||
150 | dout("got %d on flock_lock_file_wait, undid lock", err); | 156 | dout("got %d on flock_lock_file_wait, undid lock", err); |
151 | } | 157 | } |
152 | } else { | 158 | } else { |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 098b18508479..38800eaa81d0 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -202,6 +202,38 @@ out_bad: | |||
202 | } | 202 | } |
203 | 203 | ||
204 | /* | 204 | /* |
205 | * parse fcntl F_GETLK results | ||
206 | */ | ||
207 | static int parse_reply_info_filelock(void **p, void *end, | ||
208 | struct ceph_mds_reply_info_parsed *info) | ||
209 | { | ||
210 | if (*p + sizeof(*info->filelock_reply) > end) | ||
211 | goto bad; | ||
212 | |||
213 | info->filelock_reply = *p; | ||
214 | *p += sizeof(*info->filelock_reply); | ||
215 | |||
216 | if (unlikely(*p != end)) | ||
217 | goto bad; | ||
218 | return 0; | ||
219 | |||
220 | bad: | ||
221 | return -EIO; | ||
222 | } | ||
223 | |||
224 | /* | ||
225 | * parse extra results | ||
226 | */ | ||
227 | static int parse_reply_info_extra(void **p, void *end, | ||
228 | struct ceph_mds_reply_info_parsed *info) | ||
229 | { | ||
230 | if (info->head->op == CEPH_MDS_OP_GETFILELOCK) | ||
231 | return parse_reply_info_filelock(p, end, info); | ||
232 | else | ||
233 | return parse_reply_info_dir(p, end, info); | ||
234 | } | ||
235 | |||
236 | /* | ||
205 | * parse entire mds reply | 237 | * parse entire mds reply |
206 | */ | 238 | */ |
207 | static int parse_reply_info(struct ceph_msg *msg, | 239 | static int parse_reply_info(struct ceph_msg *msg, |
@@ -223,10 +255,10 @@ static int parse_reply_info(struct ceph_msg *msg, | |||
223 | goto out_bad; | 255 | goto out_bad; |
224 | } | 256 | } |
225 | 257 | ||
226 | /* dir content */ | 258 | /* extra */ |
227 | ceph_decode_32_safe(&p, end, len, bad); | 259 | ceph_decode_32_safe(&p, end, len, bad); |
228 | if (len > 0) { | 260 | if (len > 0) { |
229 | err = parse_reply_info_dir(&p, p+len, info); | 261 | err = parse_reply_info_extra(&p, p+len, info); |
230 | if (err < 0) | 262 | if (err < 0) |
231 | goto out_bad; | 263 | goto out_bad; |
232 | } | 264 | } |
@@ -2074,7 +2106,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
2074 | 2106 | ||
2075 | mutex_lock(&session->s_mutex); | 2107 | mutex_lock(&session->s_mutex); |
2076 | if (err < 0) { | 2108 | if (err < 0) { |
2077 | pr_err("mdsc_handle_reply got corrupt reply mds%d\n", mds); | 2109 | pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid); |
2078 | ceph_msg_dump(msg); | 2110 | ceph_msg_dump(msg); |
2079 | goto out_err; | 2111 | goto out_err; |
2080 | } | 2112 | } |
@@ -2094,7 +2126,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
2094 | mutex_lock(&req->r_fill_mutex); | 2126 | mutex_lock(&req->r_fill_mutex); |
2095 | err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); | 2127 | err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); |
2096 | if (err == 0) { | 2128 | if (err == 0) { |
2097 | if (result == 0 && rinfo->dir_nr) | 2129 | if (result == 0 && req->r_op != CEPH_MDS_OP_GETFILELOCK && |
2130 | rinfo->dir_nr) | ||
2098 | ceph_readdir_prepopulate(req, req->r_session); | 2131 | ceph_readdir_prepopulate(req, req->r_session); |
2099 | ceph_unreserve_caps(mdsc, &req->r_caps_reservation); | 2132 | ceph_unreserve_caps(mdsc, &req->r_caps_reservation); |
2100 | } | 2133 | } |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 9341fd4f1432..aabe563b54db 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -42,26 +42,37 @@ struct ceph_mds_reply_info_in { | |||
42 | }; | 42 | }; |
43 | 43 | ||
44 | /* | 44 | /* |
45 | * parsed info about an mds reply, including information about the | 45 | * parsed info about an mds reply, including information about |
46 | * target inode and/or its parent directory and dentry, and directory | 46 | * either: 1) the target inode and/or its parent directory and dentry, |
47 | * contents (for readdir results). | 47 | * and directory contents (for readdir results), or |
48 | * 2) the file range lock info (for fcntl F_GETLK results). | ||
48 | */ | 49 | */ |
49 | struct ceph_mds_reply_info_parsed { | 50 | struct ceph_mds_reply_info_parsed { |
50 | struct ceph_mds_reply_head *head; | 51 | struct ceph_mds_reply_head *head; |
51 | 52 | ||
53 | /* trace */ | ||
52 | struct ceph_mds_reply_info_in diri, targeti; | 54 | struct ceph_mds_reply_info_in diri, targeti; |
53 | struct ceph_mds_reply_dirfrag *dirfrag; | 55 | struct ceph_mds_reply_dirfrag *dirfrag; |
54 | char *dname; | 56 | char *dname; |
55 | u32 dname_len; | 57 | u32 dname_len; |
56 | struct ceph_mds_reply_lease *dlease; | 58 | struct ceph_mds_reply_lease *dlease; |
57 | 59 | ||
58 | struct ceph_mds_reply_dirfrag *dir_dir; | 60 | /* extra */ |
59 | int dir_nr; | 61 | union { |
60 | char **dir_dname; | 62 | /* for fcntl F_GETLK results */ |
61 | u32 *dir_dname_len; | 63 | struct ceph_filelock *filelock_reply; |
62 | struct ceph_mds_reply_lease **dir_dlease; | 64 | |
63 | struct ceph_mds_reply_info_in *dir_in; | 65 | /* for readdir results */ |
64 | u8 dir_complete, dir_end; | 66 | struct { |
67 | struct ceph_mds_reply_dirfrag *dir_dir; | ||
68 | int dir_nr; | ||
69 | char **dir_dname; | ||
70 | u32 *dir_dname_len; | ||
71 | struct ceph_mds_reply_lease **dir_dlease; | ||
72 | struct ceph_mds_reply_info_in *dir_in; | ||
73 | u8 dir_complete, dir_end; | ||
74 | }; | ||
75 | }; | ||
65 | 76 | ||
66 | /* encoded blob describing snapshot contexts for certain | 77 | /* encoded blob describing snapshot contexts for certain |
67 | operations (e.g., open) */ | 78 | operations (e.g., open) */ |