diff options
39 files changed, 1162 insertions, 410 deletions
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile index 6a660e610be8..278e1172600d 100644 --- a/fs/ceph/Makefile +++ b/fs/ceph/Makefile | |||
@@ -6,7 +6,7 @@ ifneq ($(KERNELRELEASE),) | |||
6 | 6 | ||
7 | obj-$(CONFIG_CEPH_FS) += ceph.o | 7 | obj-$(CONFIG_CEPH_FS) += ceph.o |
8 | 8 | ||
9 | ceph-objs := super.o inode.o dir.o file.o addr.o ioctl.o \ | 9 | ceph-objs := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ |
10 | export.o caps.o snap.o xattr.o \ | 10 | export.o caps.o snap.o xattr.o \ |
11 | messenger.o msgpool.o buffer.o pagelist.o \ | 11 | messenger.o msgpool.o buffer.o pagelist.o \ |
12 | mds_client.o mdsmap.o \ | 12 | mds_client.o mdsmap.o \ |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index d9c60b84949a..5598a0d02295 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -309,7 +309,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
309 | zero_user_segment(page, s, PAGE_CACHE_SIZE); | 309 | zero_user_segment(page, s, PAGE_CACHE_SIZE); |
310 | } | 310 | } |
311 | 311 | ||
312 | if (add_to_page_cache_lru(page, mapping, page->index, GFP_NOFS)) { | 312 | if (add_to_page_cache_lru(page, mapping, page->index, |
313 | GFP_NOFS)) { | ||
313 | page_cache_release(page); | 314 | page_cache_release(page); |
314 | dout("readpages %p add_to_page_cache failed %p\n", | 315 | dout("readpages %p add_to_page_cache failed %p\n", |
315 | inode, page); | 316 | inode, page); |
@@ -552,7 +553,7 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
552 | * page truncation thread, possibly losing some data that | 553 | * page truncation thread, possibly losing some data that |
553 | * raced its way in | 554 | * raced its way in |
554 | */ | 555 | */ |
555 | if ((issued & CEPH_CAP_FILE_CACHE) == 0) | 556 | if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) |
556 | generic_error_remove_page(inode->i_mapping, page); | 557 | generic_error_remove_page(inode->i_mapping, page); |
557 | 558 | ||
558 | unlock_page(page); | 559 | unlock_page(page); |
@@ -797,9 +798,12 @@ get_more_pages: | |||
797 | dout("%p will write page %p idx %lu\n", | 798 | dout("%p will write page %p idx %lu\n", |
798 | inode, page, page->index); | 799 | inode, page, page->index); |
799 | 800 | ||
800 | writeback_stat = atomic_long_inc_return(&client->writeback_count); | 801 | writeback_stat = |
801 | if (writeback_stat > CONGESTION_ON_THRESH(client->mount_args->congestion_kb)) { | 802 | atomic_long_inc_return(&client->writeback_count); |
802 | set_bdi_congested(&client->backing_dev_info, BLK_RW_ASYNC); | 803 | if (writeback_stat > CONGESTION_ON_THRESH( |
804 | client->mount_args->congestion_kb)) { | ||
805 | set_bdi_congested(&client->backing_dev_info, | ||
806 | BLK_RW_ASYNC); | ||
803 | } | 807 | } |
804 | 808 | ||
805 | set_page_writeback(page); | 809 | set_page_writeback(page); |
@@ -1036,7 +1040,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, | |||
1036 | *pagep = page; | 1040 | *pagep = page; |
1037 | 1041 | ||
1038 | dout("write_begin file %p inode %p page %p %d~%d\n", file, | 1042 | dout("write_begin file %p inode %p page %p %d~%d\n", file, |
1039 | inode, page, (int)pos, (int)len); | 1043 | inode, page, (int)pos, (int)len); |
1040 | 1044 | ||
1041 | r = ceph_update_writeable_page(file, pos, len, page); | 1045 | r = ceph_update_writeable_page(file, pos, len, page); |
1042 | } while (r == -EAGAIN); | 1046 | } while (r == -EAGAIN); |
diff --git a/fs/ceph/armor.c b/fs/ceph/armor.c index 67b2c030924b..eb2a666b0be7 100644 --- a/fs/ceph/armor.c +++ b/fs/ceph/armor.c | |||
@@ -1,11 +1,15 @@ | |||
1 | 1 | ||
2 | #include <linux/errno.h> | 2 | #include <linux/errno.h> |
3 | 3 | ||
4 | int ceph_armor(char *dst, const char *src, const char *end); | ||
5 | int ceph_unarmor(char *dst, const char *src, const char *end); | ||
6 | |||
4 | /* | 7 | /* |
5 | * base64 encode/decode. | 8 | * base64 encode/decode. |
6 | */ | 9 | */ |
7 | 10 | ||
8 | const char *pem_key = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; | 11 | static const char *pem_key = |
12 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; | ||
9 | 13 | ||
10 | static int encode_bits(int c) | 14 | static int encode_bits(int c) |
11 | { | 15 | { |
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c index 89490beaf537..6d2e30600627 100644 --- a/fs/ceph/auth.c +++ b/fs/ceph/auth.c | |||
@@ -20,7 +20,7 @@ static u32 supported_protocols[] = { | |||
20 | CEPH_AUTH_CEPHX | 20 | CEPH_AUTH_CEPHX |
21 | }; | 21 | }; |
22 | 22 | ||
23 | int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol) | 23 | static int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol) |
24 | { | 24 | { |
25 | switch (protocol) { | 25 | switch (protocol) { |
26 | case CEPH_AUTH_NONE: | 26 | case CEPH_AUTH_NONE: |
@@ -133,8 +133,8 @@ bad: | |||
133 | return -ERANGE; | 133 | return -ERANGE; |
134 | } | 134 | } |
135 | 135 | ||
136 | int ceph_build_auth_request(struct ceph_auth_client *ac, | 136 | static int ceph_build_auth_request(struct ceph_auth_client *ac, |
137 | void *msg_buf, size_t msg_len) | 137 | void *msg_buf, size_t msg_len) |
138 | { | 138 | { |
139 | struct ceph_mon_request_header *monhdr = msg_buf; | 139 | struct ceph_mon_request_header *monhdr = msg_buf; |
140 | void *p = monhdr + 1; | 140 | void *p = monhdr + 1; |
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index 6d44053ecff1..582e0b2caf8a 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c | |||
@@ -87,8 +87,8 @@ static int ceph_x_decrypt(struct ceph_crypto_key *secret, | |||
87 | /* | 87 | /* |
88 | * get existing (or insert new) ticket handler | 88 | * get existing (or insert new) ticket handler |
89 | */ | 89 | */ |
90 | struct ceph_x_ticket_handler *get_ticket_handler(struct ceph_auth_client *ac, | 90 | static struct ceph_x_ticket_handler * |
91 | int service) | 91 | get_ticket_handler(struct ceph_auth_client *ac, int service) |
92 | { | 92 | { |
93 | struct ceph_x_ticket_handler *th; | 93 | struct ceph_x_ticket_handler *th; |
94 | struct ceph_x_info *xi = ac->private; | 94 | struct ceph_x_info *xi = ac->private; |
@@ -429,7 +429,7 @@ static int ceph_x_build_request(struct ceph_auth_client *ac, | |||
429 | auth->struct_v = 1; | 429 | auth->struct_v = 1; |
430 | auth->key = 0; | 430 | auth->key = 0; |
431 | for (u = (u64 *)tmp_enc; u + 1 <= (u64 *)(tmp_enc + ret); u++) | 431 | for (u = (u64 *)tmp_enc; u + 1 <= (u64 *)(tmp_enc + ret); u++) |
432 | auth->key ^= *u; | 432 | auth->key ^= *(__le64 *)u; |
433 | dout(" server_challenge %llx client_challenge %llx key %llx\n", | 433 | dout(" server_challenge %llx client_challenge %llx key %llx\n", |
434 | xi->server_challenge, le64_to_cpu(auth->client_challenge), | 434 | xi->server_challenge, le64_to_cpu(auth->client_challenge), |
435 | le64_to_cpu(auth->key)); | 435 | le64_to_cpu(auth->key)); |
diff --git a/fs/ceph/buffer.c b/fs/ceph/buffer.c index c67535d70aa6..cd39f17021de 100644 --- a/fs/ceph/buffer.c +++ b/fs/ceph/buffer.c | |||
@@ -47,22 +47,6 @@ void ceph_buffer_release(struct kref *kref) | |||
47 | kfree(b); | 47 | kfree(b); |
48 | } | 48 | } |
49 | 49 | ||
50 | int ceph_buffer_alloc(struct ceph_buffer *b, int len, gfp_t gfp) | ||
51 | { | ||
52 | b->vec.iov_base = kmalloc(len, gfp | __GFP_NOWARN); | ||
53 | if (b->vec.iov_base) { | ||
54 | b->is_vmalloc = false; | ||
55 | } else { | ||
56 | b->vec.iov_base = __vmalloc(len, gfp, PAGE_KERNEL); | ||
57 | b->is_vmalloc = true; | ||
58 | } | ||
59 | if (!b->vec.iov_base) | ||
60 | return -ENOMEM; | ||
61 | b->alloc_len = len; | ||
62 | b->vec.iov_len = len; | ||
63 | return 0; | ||
64 | } | ||
65 | |||
66 | int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end) | 50 | int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end) |
67 | { | 51 | { |
68 | size_t len; | 52 | size_t len; |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index b81be9a56487..7bf182b03973 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -113,58 +113,41 @@ const char *ceph_cap_string(int caps) | |||
113 | return cap_str[i]; | 113 | return cap_str[i]; |
114 | } | 114 | } |
115 | 115 | ||
116 | /* | 116 | void ceph_caps_init(struct ceph_mds_client *mdsc) |
117 | * Cap reservations | ||
118 | * | ||
119 | * Maintain a global pool of preallocated struct ceph_caps, referenced | ||
120 | * by struct ceph_caps_reservations. This ensures that we preallocate | ||
121 | * memory needed to successfully process an MDS response. (If an MDS | ||
122 | * sends us cap information and we fail to process it, we will have | ||
123 | * problems due to the client and MDS being out of sync.) | ||
124 | * | ||
125 | * Reservations are 'owned' by a ceph_cap_reservation context. | ||
126 | */ | ||
127 | static spinlock_t caps_list_lock; | ||
128 | static struct list_head caps_list; /* unused (reserved or unreserved) */ | ||
129 | static int caps_total_count; /* total caps allocated */ | ||
130 | static int caps_use_count; /* in use */ | ||
131 | static int caps_reserve_count; /* unused, reserved */ | ||
132 | static int caps_avail_count; /* unused, unreserved */ | ||
133 | static int caps_min_count; /* keep at least this many (unreserved) */ | ||
134 | |||
135 | void __init ceph_caps_init(void) | ||
136 | { | 117 | { |
137 | INIT_LIST_HEAD(&caps_list); | 118 | INIT_LIST_HEAD(&mdsc->caps_list); |
138 | spin_lock_init(&caps_list_lock); | 119 | spin_lock_init(&mdsc->caps_list_lock); |
139 | } | 120 | } |
140 | 121 | ||
141 | void ceph_caps_finalize(void) | 122 | void ceph_caps_finalize(struct ceph_mds_client *mdsc) |
142 | { | 123 | { |
143 | struct ceph_cap *cap; | 124 | struct ceph_cap *cap; |
144 | 125 | ||
145 | spin_lock(&caps_list_lock); | 126 | spin_lock(&mdsc->caps_list_lock); |
146 | while (!list_empty(&caps_list)) { | 127 | while (!list_empty(&mdsc->caps_list)) { |
147 | cap = list_first_entry(&caps_list, struct ceph_cap, caps_item); | 128 | cap = list_first_entry(&mdsc->caps_list, |
129 | struct ceph_cap, caps_item); | ||
148 | list_del(&cap->caps_item); | 130 | list_del(&cap->caps_item); |
149 | kmem_cache_free(ceph_cap_cachep, cap); | 131 | kmem_cache_free(ceph_cap_cachep, cap); |
150 | } | 132 | } |
151 | caps_total_count = 0; | 133 | mdsc->caps_total_count = 0; |
152 | caps_avail_count = 0; | 134 | mdsc->caps_avail_count = 0; |
153 | caps_use_count = 0; | 135 | mdsc->caps_use_count = 0; |
154 | caps_reserve_count = 0; | 136 | mdsc->caps_reserve_count = 0; |
155 | caps_min_count = 0; | 137 | mdsc->caps_min_count = 0; |
156 | spin_unlock(&caps_list_lock); | 138 | spin_unlock(&mdsc->caps_list_lock); |
157 | } | 139 | } |
158 | 140 | ||
159 | void ceph_adjust_min_caps(int delta) | 141 | void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta) |
160 | { | 142 | { |
161 | spin_lock(&caps_list_lock); | 143 | spin_lock(&mdsc->caps_list_lock); |
162 | caps_min_count += delta; | 144 | mdsc->caps_min_count += delta; |
163 | BUG_ON(caps_min_count < 0); | 145 | BUG_ON(mdsc->caps_min_count < 0); |
164 | spin_unlock(&caps_list_lock); | 146 | spin_unlock(&mdsc->caps_list_lock); |
165 | } | 147 | } |
166 | 148 | ||
167 | int ceph_reserve_caps(struct ceph_cap_reservation *ctx, int need) | 149 | int ceph_reserve_caps(struct ceph_mds_client *mdsc, |
150 | struct ceph_cap_reservation *ctx, int need) | ||
168 | { | 151 | { |
169 | int i; | 152 | int i; |
170 | struct ceph_cap *cap; | 153 | struct ceph_cap *cap; |
@@ -176,16 +159,17 @@ int ceph_reserve_caps(struct ceph_cap_reservation *ctx, int need) | |||
176 | dout("reserve caps ctx=%p need=%d\n", ctx, need); | 159 | dout("reserve caps ctx=%p need=%d\n", ctx, need); |
177 | 160 | ||
178 | /* first reserve any caps that are already allocated */ | 161 | /* first reserve any caps that are already allocated */ |
179 | spin_lock(&caps_list_lock); | 162 | spin_lock(&mdsc->caps_list_lock); |
180 | if (caps_avail_count >= need) | 163 | if (mdsc->caps_avail_count >= need) |
181 | have = need; | 164 | have = need; |
182 | else | 165 | else |
183 | have = caps_avail_count; | 166 | have = mdsc->caps_avail_count; |
184 | caps_avail_count -= have; | 167 | mdsc->caps_avail_count -= have; |
185 | caps_reserve_count += have; | 168 | mdsc->caps_reserve_count += have; |
186 | BUG_ON(caps_total_count != caps_use_count + caps_reserve_count + | 169 | BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + |
187 | caps_avail_count); | 170 | mdsc->caps_reserve_count + |
188 | spin_unlock(&caps_list_lock); | 171 | mdsc->caps_avail_count); |
172 | spin_unlock(&mdsc->caps_list_lock); | ||
189 | 173 | ||
190 | for (i = have; i < need; i++) { | 174 | for (i = have; i < need; i++) { |
191 | cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); | 175 | cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); |
@@ -198,19 +182,20 @@ int ceph_reserve_caps(struct ceph_cap_reservation *ctx, int need) | |||
198 | } | 182 | } |
199 | BUG_ON(have + alloc != need); | 183 | BUG_ON(have + alloc != need); |
200 | 184 | ||
201 | spin_lock(&caps_list_lock); | 185 | spin_lock(&mdsc->caps_list_lock); |
202 | caps_total_count += alloc; | 186 | mdsc->caps_total_count += alloc; |
203 | caps_reserve_count += alloc; | 187 | mdsc->caps_reserve_count += alloc; |
204 | list_splice(&newcaps, &caps_list); | 188 | list_splice(&newcaps, &mdsc->caps_list); |
205 | 189 | ||
206 | BUG_ON(caps_total_count != caps_use_count + caps_reserve_count + | 190 | BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + |
207 | caps_avail_count); | 191 | mdsc->caps_reserve_count + |
208 | spin_unlock(&caps_list_lock); | 192 | mdsc->caps_avail_count); |
193 | spin_unlock(&mdsc->caps_list_lock); | ||
209 | 194 | ||
210 | ctx->count = need; | 195 | ctx->count = need; |
211 | dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n", | 196 | dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n", |
212 | ctx, caps_total_count, caps_use_count, caps_reserve_count, | 197 | ctx, mdsc->caps_total_count, mdsc->caps_use_count, |
213 | caps_avail_count); | 198 | mdsc->caps_reserve_count, mdsc->caps_avail_count); |
214 | return 0; | 199 | return 0; |
215 | 200 | ||
216 | out_alloc_count: | 201 | out_alloc_count: |
@@ -220,26 +205,29 @@ out_alloc_count: | |||
220 | return ret; | 205 | return ret; |
221 | } | 206 | } |
222 | 207 | ||
223 | int ceph_unreserve_caps(struct ceph_cap_reservation *ctx) | 208 | int ceph_unreserve_caps(struct ceph_mds_client *mdsc, |
209 | struct ceph_cap_reservation *ctx) | ||
224 | { | 210 | { |
225 | dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count); | 211 | dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count); |
226 | if (ctx->count) { | 212 | if (ctx->count) { |
227 | spin_lock(&caps_list_lock); | 213 | spin_lock(&mdsc->caps_list_lock); |
228 | BUG_ON(caps_reserve_count < ctx->count); | 214 | BUG_ON(mdsc->caps_reserve_count < ctx->count); |
229 | caps_reserve_count -= ctx->count; | 215 | mdsc->caps_reserve_count -= ctx->count; |
230 | caps_avail_count += ctx->count; | 216 | mdsc->caps_avail_count += ctx->count; |
231 | ctx->count = 0; | 217 | ctx->count = 0; |
232 | dout("unreserve caps %d = %d used + %d resv + %d avail\n", | 218 | dout("unreserve caps %d = %d used + %d resv + %d avail\n", |
233 | caps_total_count, caps_use_count, caps_reserve_count, | 219 | mdsc->caps_total_count, mdsc->caps_use_count, |
234 | caps_avail_count); | 220 | mdsc->caps_reserve_count, mdsc->caps_avail_count); |
235 | BUG_ON(caps_total_count != caps_use_count + caps_reserve_count + | 221 | BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + |
236 | caps_avail_count); | 222 | mdsc->caps_reserve_count + |
237 | spin_unlock(&caps_list_lock); | 223 | mdsc->caps_avail_count); |
224 | spin_unlock(&mdsc->caps_list_lock); | ||
238 | } | 225 | } |
239 | return 0; | 226 | return 0; |
240 | } | 227 | } |
241 | 228 | ||
242 | static struct ceph_cap *get_cap(struct ceph_cap_reservation *ctx) | 229 | static struct ceph_cap *get_cap(struct ceph_mds_client *mdsc, |
230 | struct ceph_cap_reservation *ctx) | ||
243 | { | 231 | { |
244 | struct ceph_cap *cap = NULL; | 232 | struct ceph_cap *cap = NULL; |
245 | 233 | ||
@@ -247,71 +235,74 @@ static struct ceph_cap *get_cap(struct ceph_cap_reservation *ctx) | |||
247 | if (!ctx) { | 235 | if (!ctx) { |
248 | cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); | 236 | cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); |
249 | if (cap) { | 237 | if (cap) { |
250 | caps_use_count++; | 238 | mdsc->caps_use_count++; |
251 | caps_total_count++; | 239 | mdsc->caps_total_count++; |
252 | } | 240 | } |
253 | return cap; | 241 | return cap; |
254 | } | 242 | } |
255 | 243 | ||
256 | spin_lock(&caps_list_lock); | 244 | spin_lock(&mdsc->caps_list_lock); |
257 | dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n", | 245 | dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n", |
258 | ctx, ctx->count, caps_total_count, caps_use_count, | 246 | ctx, ctx->count, mdsc->caps_total_count, mdsc->caps_use_count, |
259 | caps_reserve_count, caps_avail_count); | 247 | mdsc->caps_reserve_count, mdsc->caps_avail_count); |
260 | BUG_ON(!ctx->count); | 248 | BUG_ON(!ctx->count); |
261 | BUG_ON(ctx->count > caps_reserve_count); | 249 | BUG_ON(ctx->count > mdsc->caps_reserve_count); |
262 | BUG_ON(list_empty(&caps_list)); | 250 | BUG_ON(list_empty(&mdsc->caps_list)); |
263 | 251 | ||
264 | ctx->count--; | 252 | ctx->count--; |
265 | caps_reserve_count--; | 253 | mdsc->caps_reserve_count--; |
266 | caps_use_count++; | 254 | mdsc->caps_use_count++; |
267 | 255 | ||
268 | cap = list_first_entry(&caps_list, struct ceph_cap, caps_item); | 256 | cap = list_first_entry(&mdsc->caps_list, struct ceph_cap, caps_item); |
269 | list_del(&cap->caps_item); | 257 | list_del(&cap->caps_item); |
270 | 258 | ||
271 | BUG_ON(caps_total_count != caps_use_count + caps_reserve_count + | 259 | BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + |
272 | caps_avail_count); | 260 | mdsc->caps_reserve_count + mdsc->caps_avail_count); |
273 | spin_unlock(&caps_list_lock); | 261 | spin_unlock(&mdsc->caps_list_lock); |
274 | return cap; | 262 | return cap; |
275 | } | 263 | } |
276 | 264 | ||
277 | void ceph_put_cap(struct ceph_cap *cap) | 265 | void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap) |
278 | { | 266 | { |
279 | spin_lock(&caps_list_lock); | 267 | spin_lock(&mdsc->caps_list_lock); |
280 | dout("put_cap %p %d = %d used + %d resv + %d avail\n", | 268 | dout("put_cap %p %d = %d used + %d resv + %d avail\n", |
281 | cap, caps_total_count, caps_use_count, | 269 | cap, mdsc->caps_total_count, mdsc->caps_use_count, |
282 | caps_reserve_count, caps_avail_count); | 270 | mdsc->caps_reserve_count, mdsc->caps_avail_count); |
283 | caps_use_count--; | 271 | mdsc->caps_use_count--; |
284 | /* | 272 | /* |
285 | * Keep some preallocated caps around (ceph_min_count), to | 273 | * Keep some preallocated caps around (ceph_min_count), to |
286 | * avoid lots of free/alloc churn. | 274 | * avoid lots of free/alloc churn. |
287 | */ | 275 | */ |
288 | if (caps_avail_count >= caps_reserve_count + caps_min_count) { | 276 | if (mdsc->caps_avail_count >= mdsc->caps_reserve_count + |
289 | caps_total_count--; | 277 | mdsc->caps_min_count) { |
278 | mdsc->caps_total_count--; | ||
290 | kmem_cache_free(ceph_cap_cachep, cap); | 279 | kmem_cache_free(ceph_cap_cachep, cap); |
291 | } else { | 280 | } else { |
292 | caps_avail_count++; | 281 | mdsc->caps_avail_count++; |
293 | list_add(&cap->caps_item, &caps_list); | 282 | list_add(&cap->caps_item, &mdsc->caps_list); |
294 | } | 283 | } |
295 | 284 | ||
296 | BUG_ON(caps_total_count != caps_use_count + caps_reserve_count + | 285 | BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + |
297 | caps_avail_count); | 286 | mdsc->caps_reserve_count + mdsc->caps_avail_count); |
298 | spin_unlock(&caps_list_lock); | 287 | spin_unlock(&mdsc->caps_list_lock); |
299 | } | 288 | } |
300 | 289 | ||
301 | void ceph_reservation_status(struct ceph_client *client, | 290 | void ceph_reservation_status(struct ceph_client *client, |
302 | int *total, int *avail, int *used, int *reserved, | 291 | int *total, int *avail, int *used, int *reserved, |
303 | int *min) | 292 | int *min) |
304 | { | 293 | { |
294 | struct ceph_mds_client *mdsc = &client->mdsc; | ||
295 | |||
305 | if (total) | 296 | if (total) |
306 | *total = caps_total_count; | 297 | *total = mdsc->caps_total_count; |
307 | if (avail) | 298 | if (avail) |
308 | *avail = caps_avail_count; | 299 | *avail = mdsc->caps_avail_count; |
309 | if (used) | 300 | if (used) |
310 | *used = caps_use_count; | 301 | *used = mdsc->caps_use_count; |
311 | if (reserved) | 302 | if (reserved) |
312 | *reserved = caps_reserve_count; | 303 | *reserved = mdsc->caps_reserve_count; |
313 | if (min) | 304 | if (min) |
314 | *min = caps_min_count; | 305 | *min = mdsc->caps_min_count; |
315 | } | 306 | } |
316 | 307 | ||
317 | /* | 308 | /* |
@@ -336,22 +327,29 @@ static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds) | |||
336 | return NULL; | 327 | return NULL; |
337 | } | 328 | } |
338 | 329 | ||
330 | struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, int mds) | ||
331 | { | ||
332 | struct ceph_cap *cap; | ||
333 | |||
334 | spin_lock(&ci->vfs_inode.i_lock); | ||
335 | cap = __get_cap_for_mds(ci, mds); | ||
336 | spin_unlock(&ci->vfs_inode.i_lock); | ||
337 | return cap; | ||
338 | } | ||
339 | |||
339 | /* | 340 | /* |
340 | * Return id of any MDS with a cap, preferably FILE_WR|WRBUFFER|EXCL, else | 341 | * Return id of any MDS with a cap, preferably FILE_WR|BUFFER|EXCL, else -1. |
341 | * -1. | ||
342 | */ | 342 | */ |
343 | static int __ceph_get_cap_mds(struct ceph_inode_info *ci, u32 *mseq) | 343 | static int __ceph_get_cap_mds(struct ceph_inode_info *ci) |
344 | { | 344 | { |
345 | struct ceph_cap *cap; | 345 | struct ceph_cap *cap; |
346 | int mds = -1; | 346 | int mds = -1; |
347 | struct rb_node *p; | 347 | struct rb_node *p; |
348 | 348 | ||
349 | /* prefer mds with WR|WRBUFFER|EXCL caps */ | 349 | /* prefer mds with WR|BUFFER|EXCL caps */ |
350 | for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { | 350 | for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { |
351 | cap = rb_entry(p, struct ceph_cap, ci_node); | 351 | cap = rb_entry(p, struct ceph_cap, ci_node); |
352 | mds = cap->mds; | 352 | mds = cap->mds; |
353 | if (mseq) | ||
354 | *mseq = cap->mseq; | ||
355 | if (cap->issued & (CEPH_CAP_FILE_WR | | 353 | if (cap->issued & (CEPH_CAP_FILE_WR | |
356 | CEPH_CAP_FILE_BUFFER | | 354 | CEPH_CAP_FILE_BUFFER | |
357 | CEPH_CAP_FILE_EXCL)) | 355 | CEPH_CAP_FILE_EXCL)) |
@@ -364,7 +362,7 @@ int ceph_get_cap_mds(struct inode *inode) | |||
364 | { | 362 | { |
365 | int mds; | 363 | int mds; |
366 | spin_lock(&inode->i_lock); | 364 | spin_lock(&inode->i_lock); |
367 | mds = __ceph_get_cap_mds(ceph_inode(inode), NULL); | 365 | mds = __ceph_get_cap_mds(ceph_inode(inode)); |
368 | spin_unlock(&inode->i_lock); | 366 | spin_unlock(&inode->i_lock); |
369 | return mds; | 367 | return mds; |
370 | } | 368 | } |
@@ -483,8 +481,8 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap, | |||
483 | * Each time we receive FILE_CACHE anew, we increment | 481 | * Each time we receive FILE_CACHE anew, we increment |
484 | * i_rdcache_gen. | 482 | * i_rdcache_gen. |
485 | */ | 483 | */ |
486 | if ((issued & CEPH_CAP_FILE_CACHE) && | 484 | if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) && |
487 | (had & CEPH_CAP_FILE_CACHE) == 0) | 485 | (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) |
488 | ci->i_rdcache_gen++; | 486 | ci->i_rdcache_gen++; |
489 | 487 | ||
490 | /* | 488 | /* |
@@ -543,7 +541,7 @@ retry: | |||
543 | new_cap = NULL; | 541 | new_cap = NULL; |
544 | } else { | 542 | } else { |
545 | spin_unlock(&inode->i_lock); | 543 | spin_unlock(&inode->i_lock); |
546 | new_cap = get_cap(caps_reservation); | 544 | new_cap = get_cap(mdsc, caps_reservation); |
547 | if (new_cap == NULL) | 545 | if (new_cap == NULL) |
548 | return -ENOMEM; | 546 | return -ENOMEM; |
549 | goto retry; | 547 | goto retry; |
@@ -588,6 +586,7 @@ retry: | |||
588 | } else { | 586 | } else { |
589 | pr_err("ceph_add_cap: couldn't find snap realm %llx\n", | 587 | pr_err("ceph_add_cap: couldn't find snap realm %llx\n", |
590 | realmino); | 588 | realmino); |
589 | WARN_ON(!realm); | ||
591 | } | 590 | } |
592 | } | 591 | } |
593 | 592 | ||
@@ -831,7 +830,7 @@ int __ceph_caps_file_wanted(struct ceph_inode_info *ci) | |||
831 | { | 830 | { |
832 | int want = 0; | 831 | int want = 0; |
833 | int mode; | 832 | int mode; |
834 | for (mode = 0; mode < 4; mode++) | 833 | for (mode = 0; mode < CEPH_FILE_MODE_NUM; mode++) |
835 | if (ci->i_nr_by_mode[mode]) | 834 | if (ci->i_nr_by_mode[mode]) |
836 | want |= ceph_caps_for_mode(mode); | 835 | want |= ceph_caps_for_mode(mode); |
837 | return want; | 836 | return want; |
@@ -901,7 +900,7 @@ void __ceph_remove_cap(struct ceph_cap *cap) | |||
901 | ci->i_auth_cap = NULL; | 900 | ci->i_auth_cap = NULL; |
902 | 901 | ||
903 | if (removed) | 902 | if (removed) |
904 | ceph_put_cap(cap); | 903 | ceph_put_cap(mdsc, cap); |
905 | 904 | ||
906 | if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { | 905 | if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { |
907 | struct ceph_snap_realm *realm = ci->i_snap_realm; | 906 | struct ceph_snap_realm *realm = ci->i_snap_realm; |
@@ -1197,6 +1196,8 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
1197 | */ | 1196 | */ |
1198 | void __ceph_flush_snaps(struct ceph_inode_info *ci, | 1197 | void __ceph_flush_snaps(struct ceph_inode_info *ci, |
1199 | struct ceph_mds_session **psession) | 1198 | struct ceph_mds_session **psession) |
1199 | __releases(ci->vfs_inode->i_lock) | ||
1200 | __acquires(ci->vfs_inode->i_lock) | ||
1200 | { | 1201 | { |
1201 | struct inode *inode = &ci->vfs_inode; | 1202 | struct inode *inode = &ci->vfs_inode; |
1202 | int mds; | 1203 | int mds; |
@@ -1232,7 +1233,13 @@ retry: | |||
1232 | BUG_ON(capsnap->dirty == 0); | 1233 | BUG_ON(capsnap->dirty == 0); |
1233 | 1234 | ||
1234 | /* pick mds, take s_mutex */ | 1235 | /* pick mds, take s_mutex */ |
1235 | mds = __ceph_get_cap_mds(ci, &mseq); | 1236 | if (ci->i_auth_cap == NULL) { |
1237 | dout("no auth cap (migrating?), doing nothing\n"); | ||
1238 | goto out; | ||
1239 | } | ||
1240 | mds = ci->i_auth_cap->session->s_mds; | ||
1241 | mseq = ci->i_auth_cap->mseq; | ||
1242 | |||
1236 | if (session && session->s_mds != mds) { | 1243 | if (session && session->s_mds != mds) { |
1237 | dout("oops, wrong session %p mutex\n", session); | 1244 | dout("oops, wrong session %p mutex\n", session); |
1238 | mutex_unlock(&session->s_mutex); | 1245 | mutex_unlock(&session->s_mutex); |
@@ -1251,8 +1258,8 @@ retry: | |||
1251 | } | 1258 | } |
1252 | /* | 1259 | /* |
1253 | * if session == NULL, we raced against a cap | 1260 | * if session == NULL, we raced against a cap |
1254 | * deletion. retry, and we'll get a better | 1261 | * deletion or migration. retry, and we'll |
1255 | * @mds value next time. | 1262 | * get a better @mds value next time. |
1256 | */ | 1263 | */ |
1257 | spin_lock(&inode->i_lock); | 1264 | spin_lock(&inode->i_lock); |
1258 | goto retry; | 1265 | goto retry; |
@@ -1290,6 +1297,7 @@ retry: | |||
1290 | list_del_init(&ci->i_snap_flush_item); | 1297 | list_del_init(&ci->i_snap_flush_item); |
1291 | spin_unlock(&mdsc->snap_flush_lock); | 1298 | spin_unlock(&mdsc->snap_flush_lock); |
1292 | 1299 | ||
1300 | out: | ||
1293 | if (psession) | 1301 | if (psession) |
1294 | *psession = session; | 1302 | *psession = session; |
1295 | else if (session) { | 1303 | else if (session) { |
@@ -1435,7 +1443,6 @@ static int try_nonblocking_invalidate(struct inode *inode) | |||
1435 | */ | 1443 | */ |
1436 | void ceph_check_caps(struct ceph_inode_info *ci, int flags, | 1444 | void ceph_check_caps(struct ceph_inode_info *ci, int flags, |
1437 | struct ceph_mds_session *session) | 1445 | struct ceph_mds_session *session) |
1438 | __releases(session->s_mutex) | ||
1439 | { | 1446 | { |
1440 | struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); | 1447 | struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); |
1441 | struct ceph_mds_client *mdsc = &client->mdsc; | 1448 | struct ceph_mds_client *mdsc = &client->mdsc; |
@@ -1510,11 +1517,13 @@ retry_locked: | |||
1510 | ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ | 1517 | ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ |
1511 | ci->i_rdcache_gen && /* may have cached pages */ | 1518 | ci->i_rdcache_gen && /* may have cached pages */ |
1512 | (file_wanted == 0 || /* no open files */ | 1519 | (file_wanted == 0 || /* no open files */ |
1513 | (revoking & CEPH_CAP_FILE_CACHE)) && /* or revoking cache */ | 1520 | (revoking & (CEPH_CAP_FILE_CACHE| |
1521 | CEPH_CAP_FILE_LAZYIO))) && /* or revoking cache */ | ||
1514 | !tried_invalidate) { | 1522 | !tried_invalidate) { |
1515 | dout("check_caps trying to invalidate on %p\n", inode); | 1523 | dout("check_caps trying to invalidate on %p\n", inode); |
1516 | if (try_nonblocking_invalidate(inode) < 0) { | 1524 | if (try_nonblocking_invalidate(inode) < 0) { |
1517 | if (revoking & CEPH_CAP_FILE_CACHE) { | 1525 | if (revoking & (CEPH_CAP_FILE_CACHE| |
1526 | CEPH_CAP_FILE_LAZYIO)) { | ||
1518 | dout("check_caps queuing invalidate\n"); | 1527 | dout("check_caps queuing invalidate\n"); |
1519 | queue_invalidate = 1; | 1528 | queue_invalidate = 1; |
1520 | ci->i_rdcache_revoking = ci->i_rdcache_gen; | 1529 | ci->i_rdcache_revoking = ci->i_rdcache_gen; |
@@ -2250,8 +2259,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2250 | struct ceph_mds_session *session, | 2259 | struct ceph_mds_session *session, |
2251 | struct ceph_cap *cap, | 2260 | struct ceph_cap *cap, |
2252 | struct ceph_buffer *xattr_buf) | 2261 | struct ceph_buffer *xattr_buf) |
2253 | __releases(inode->i_lock) | 2262 | __releases(inode->i_lock) |
2254 | __releases(session->s_mutex) | ||
2255 | { | 2263 | { |
2256 | struct ceph_inode_info *ci = ceph_inode(inode); | 2264 | struct ceph_inode_info *ci = ceph_inode(inode); |
2257 | int mds = session->s_mds; | 2265 | int mds = session->s_mds; |
@@ -2278,6 +2286,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2278 | * will invalidate _after_ writeback.) | 2286 | * will invalidate _after_ writeback.) |
2279 | */ | 2287 | */ |
2280 | if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) && | 2288 | if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) && |
2289 | (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 && | ||
2281 | !ci->i_wrbuffer_ref) { | 2290 | !ci->i_wrbuffer_ref) { |
2282 | if (try_nonblocking_invalidate(inode) == 0) { | 2291 | if (try_nonblocking_invalidate(inode) == 0) { |
2283 | revoked_rdcache = 1; | 2292 | revoked_rdcache = 1; |
@@ -2369,15 +2378,22 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2369 | 2378 | ||
2370 | /* revocation, grant, or no-op? */ | 2379 | /* revocation, grant, or no-op? */ |
2371 | if (cap->issued & ~newcaps) { | 2380 | if (cap->issued & ~newcaps) { |
2372 | dout("revocation: %s -> %s\n", ceph_cap_string(cap->issued), | 2381 | int revoking = cap->issued & ~newcaps; |
2373 | ceph_cap_string(newcaps)); | 2382 | |
2374 | if ((used & ~newcaps) & CEPH_CAP_FILE_BUFFER) | 2383 | dout("revocation: %s -> %s (revoking %s)\n", |
2375 | writeback = 1; /* will delay ack */ | 2384 | ceph_cap_string(cap->issued), |
2376 | else if (dirty & ~newcaps) | 2385 | ceph_cap_string(newcaps), |
2377 | check_caps = 1; /* initiate writeback in check_caps */ | 2386 | ceph_cap_string(revoking)); |
2378 | else if (((used & ~newcaps) & CEPH_CAP_FILE_CACHE) == 0 || | 2387 | if (revoking & used & CEPH_CAP_FILE_BUFFER) |
2379 | revoked_rdcache) | 2388 | writeback = 1; /* initiate writeback; will delay ack */ |
2380 | check_caps = 2; /* send revoke ack in check_caps */ | 2389 | else if (revoking == CEPH_CAP_FILE_CACHE && |
2390 | (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 && | ||
2391 | queue_invalidate) | ||
2392 | ; /* do nothing yet, invalidation will be queued */ | ||
2393 | else if (cap == ci->i_auth_cap) | ||
2394 | check_caps = 1; /* check auth cap only */ | ||
2395 | else | ||
2396 | check_caps = 2; /* check all caps */ | ||
2381 | cap->issued = newcaps; | 2397 | cap->issued = newcaps; |
2382 | cap->implemented |= newcaps; | 2398 | cap->implemented |= newcaps; |
2383 | } else if (cap->issued == newcaps) { | 2399 | } else if (cap->issued == newcaps) { |
@@ -2568,7 +2584,8 @@ static void handle_cap_trunc(struct inode *inode, | |||
2568 | * caller holds s_mutex | 2584 | * caller holds s_mutex |
2569 | */ | 2585 | */ |
2570 | static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | 2586 | static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, |
2571 | struct ceph_mds_session *session) | 2587 | struct ceph_mds_session *session, |
2588 | int *open_target_sessions) | ||
2572 | { | 2589 | { |
2573 | struct ceph_inode_info *ci = ceph_inode(inode); | 2590 | struct ceph_inode_info *ci = ceph_inode(inode); |
2574 | int mds = session->s_mds; | 2591 | int mds = session->s_mds; |
@@ -2600,6 +2617,12 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | |||
2600 | ci->i_cap_exporting_mds = mds; | 2617 | ci->i_cap_exporting_mds = mds; |
2601 | ci->i_cap_exporting_mseq = mseq; | 2618 | ci->i_cap_exporting_mseq = mseq; |
2602 | ci->i_cap_exporting_issued = cap->issued; | 2619 | ci->i_cap_exporting_issued = cap->issued; |
2620 | |||
2621 | /* | ||
2622 | * make sure we have open sessions with all possible | ||
2623 | * export targets, so that we get the matching IMPORT | ||
2624 | */ | ||
2625 | *open_target_sessions = 1; | ||
2603 | } | 2626 | } |
2604 | __ceph_remove_cap(cap); | 2627 | __ceph_remove_cap(cap); |
2605 | } | 2628 | } |
@@ -2675,6 +2698,10 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2675 | u64 size, max_size; | 2698 | u64 size, max_size; |
2676 | u64 tid; | 2699 | u64 tid; |
2677 | void *snaptrace; | 2700 | void *snaptrace; |
2701 | size_t snaptrace_len; | ||
2702 | void *flock; | ||
2703 | u32 flock_len; | ||
2704 | int open_target_sessions = 0; | ||
2678 | 2705 | ||
2679 | dout("handle_caps from mds%d\n", mds); | 2706 | dout("handle_caps from mds%d\n", mds); |
2680 | 2707 | ||
@@ -2683,7 +2710,6 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2683 | if (msg->front.iov_len < sizeof(*h)) | 2710 | if (msg->front.iov_len < sizeof(*h)) |
2684 | goto bad; | 2711 | goto bad; |
2685 | h = msg->front.iov_base; | 2712 | h = msg->front.iov_base; |
2686 | snaptrace = h + 1; | ||
2687 | op = le32_to_cpu(h->op); | 2713 | op = le32_to_cpu(h->op); |
2688 | vino.ino = le64_to_cpu(h->ino); | 2714 | vino.ino = le64_to_cpu(h->ino); |
2689 | vino.snap = CEPH_NOSNAP; | 2715 | vino.snap = CEPH_NOSNAP; |
@@ -2693,6 +2719,21 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2693 | size = le64_to_cpu(h->size); | 2719 | size = le64_to_cpu(h->size); |
2694 | max_size = le64_to_cpu(h->max_size); | 2720 | max_size = le64_to_cpu(h->max_size); |
2695 | 2721 | ||
2722 | snaptrace = h + 1; | ||
2723 | snaptrace_len = le32_to_cpu(h->snap_trace_len); | ||
2724 | |||
2725 | if (le16_to_cpu(msg->hdr.version) >= 2) { | ||
2726 | void *p, *end; | ||
2727 | |||
2728 | p = snaptrace + snaptrace_len; | ||
2729 | end = msg->front.iov_base + msg->front.iov_len; | ||
2730 | ceph_decode_32_safe(&p, end, flock_len, bad); | ||
2731 | flock = p; | ||
2732 | } else { | ||
2733 | flock = NULL; | ||
2734 | flock_len = 0; | ||
2735 | } | ||
2736 | |||
2696 | mutex_lock(&session->s_mutex); | 2737 | mutex_lock(&session->s_mutex); |
2697 | session->s_seq++; | 2738 | session->s_seq++; |
2698 | dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq, | 2739 | dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq, |
@@ -2714,7 +2755,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2714 | * along for the mds (who clearly thinks we still have this | 2755 | * along for the mds (who clearly thinks we still have this |
2715 | * cap). | 2756 | * cap). |
2716 | */ | 2757 | */ |
2717 | ceph_add_cap_releases(mdsc, session, -1); | 2758 | ceph_add_cap_releases(mdsc, session); |
2718 | ceph_send_cap_releases(mdsc, session); | 2759 | ceph_send_cap_releases(mdsc, session); |
2719 | goto done; | 2760 | goto done; |
2720 | } | 2761 | } |
@@ -2726,12 +2767,12 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2726 | goto done; | 2767 | goto done; |
2727 | 2768 | ||
2728 | case CEPH_CAP_OP_EXPORT: | 2769 | case CEPH_CAP_OP_EXPORT: |
2729 | handle_cap_export(inode, h, session); | 2770 | handle_cap_export(inode, h, session, &open_target_sessions); |
2730 | goto done; | 2771 | goto done; |
2731 | 2772 | ||
2732 | case CEPH_CAP_OP_IMPORT: | 2773 | case CEPH_CAP_OP_IMPORT: |
2733 | handle_cap_import(mdsc, inode, h, session, | 2774 | handle_cap_import(mdsc, inode, h, session, |
2734 | snaptrace, le32_to_cpu(h->snap_trace_len)); | 2775 | snaptrace, snaptrace_len); |
2735 | ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, | 2776 | ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, |
2736 | session); | 2777 | session); |
2737 | goto done_unlocked; | 2778 | goto done_unlocked; |
@@ -2773,6 +2814,8 @@ done: | |||
2773 | done_unlocked: | 2814 | done_unlocked: |
2774 | if (inode) | 2815 | if (inode) |
2775 | iput(inode); | 2816 | iput(inode); |
2817 | if (open_target_sessions) | ||
2818 | ceph_mdsc_open_export_target_sessions(mdsc, session); | ||
2776 | return; | 2819 | return; |
2777 | 2820 | ||
2778 | bad: | 2821 | bad: |
diff --git a/fs/ceph/ceph_frag.h b/fs/ceph/ceph_frag.h index 793f50cb7c22..5babb8e95352 100644 --- a/fs/ceph/ceph_frag.h +++ b/fs/ceph/ceph_frag.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifndef _FS_CEPH_FRAG_H | 1 | #ifndef FS_CEPH_FRAG_H |
2 | #define _FS_CEPH_FRAG_H | 2 | #define FS_CEPH_FRAG_H |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * "Frags" are a way to describe a subset of a 32-bit number space, | 5 | * "Frags" are a way to describe a subset of a 32-bit number space, |
diff --git a/fs/ceph/ceph_fs.c b/fs/ceph/ceph_fs.c index 79d76bc4303f..3ac6cc7c1156 100644 --- a/fs/ceph/ceph_fs.c +++ b/fs/ceph/ceph_fs.c | |||
@@ -29,46 +29,44 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout) | |||
29 | 29 | ||
30 | int ceph_flags_to_mode(int flags) | 30 | int ceph_flags_to_mode(int flags) |
31 | { | 31 | { |
32 | int mode; | ||
33 | |||
32 | #ifdef O_DIRECTORY /* fixme */ | 34 | #ifdef O_DIRECTORY /* fixme */ |
33 | if ((flags & O_DIRECTORY) == O_DIRECTORY) | 35 | if ((flags & O_DIRECTORY) == O_DIRECTORY) |
34 | return CEPH_FILE_MODE_PIN; | 36 | return CEPH_FILE_MODE_PIN; |
35 | #endif | 37 | #endif |
38 | if ((flags & O_APPEND) == O_APPEND) | ||
39 | flags |= O_WRONLY; | ||
40 | |||
41 | if ((flags & O_ACCMODE) == O_RDWR) | ||
42 | mode = CEPH_FILE_MODE_RDWR; | ||
43 | else if ((flags & O_ACCMODE) == O_WRONLY) | ||
44 | mode = CEPH_FILE_MODE_WR; | ||
45 | else | ||
46 | mode = CEPH_FILE_MODE_RD; | ||
47 | |||
36 | #ifdef O_LAZY | 48 | #ifdef O_LAZY |
37 | if (flags & O_LAZY) | 49 | if (flags & O_LAZY) |
38 | return CEPH_FILE_MODE_LAZY; | 50 | mode |= CEPH_FILE_MODE_LAZY; |
39 | #endif | 51 | #endif |
40 | if ((flags & O_APPEND) == O_APPEND) | ||
41 | flags |= O_WRONLY; | ||
42 | 52 | ||
43 | flags &= O_ACCMODE; | 53 | return mode; |
44 | if ((flags & O_RDWR) == O_RDWR) | ||
45 | return CEPH_FILE_MODE_RDWR; | ||
46 | if ((flags & O_WRONLY) == O_WRONLY) | ||
47 | return CEPH_FILE_MODE_WR; | ||
48 | return CEPH_FILE_MODE_RD; | ||
49 | } | 54 | } |
50 | 55 | ||
51 | int ceph_caps_for_mode(int mode) | 56 | int ceph_caps_for_mode(int mode) |
52 | { | 57 | { |
53 | switch (mode) { | 58 | int caps = CEPH_CAP_PIN; |
54 | case CEPH_FILE_MODE_PIN: | 59 | |
55 | return CEPH_CAP_PIN; | 60 | if (mode & CEPH_FILE_MODE_RD) |
56 | case CEPH_FILE_MODE_RD: | 61 | caps |= CEPH_CAP_FILE_SHARED | |
57 | return CEPH_CAP_PIN | CEPH_CAP_FILE_SHARED | | ||
58 | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE; | 62 | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE; |
59 | case CEPH_FILE_MODE_RDWR: | 63 | if (mode & CEPH_FILE_MODE_WR) |
60 | return CEPH_CAP_PIN | CEPH_CAP_FILE_SHARED | | 64 | caps |= CEPH_CAP_FILE_EXCL | |
61 | CEPH_CAP_FILE_EXCL | | ||
62 | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE | | ||
63 | CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | | ||
64 | CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL | | ||
65 | CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL; | ||
66 | case CEPH_FILE_MODE_WR: | ||
67 | return CEPH_CAP_PIN | CEPH_CAP_FILE_SHARED | | ||
68 | CEPH_CAP_FILE_EXCL | | ||
69 | CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | | 65 | CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | |
70 | CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL | | 66 | CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL | |
71 | CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL; | 67 | CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL; |
72 | } | 68 | if (mode & CEPH_FILE_MODE_LAZY) |
73 | return 0; | 69 | caps |= CEPH_CAP_FILE_LAZYIO; |
70 | |||
71 | return caps; | ||
74 | } | 72 | } |
diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h index 2fa992eaf7da..d5619ac86711 100644 --- a/fs/ceph/ceph_fs.h +++ b/fs/ceph/ceph_fs.h | |||
@@ -9,27 +9,13 @@ | |||
9 | * LGPL2 | 9 | * LGPL2 |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #ifndef _FS_CEPH_CEPH_FS_H | 12 | #ifndef CEPH_FS_H |
13 | #define _FS_CEPH_CEPH_FS_H | 13 | #define CEPH_FS_H |
14 | 14 | ||
15 | #include "msgr.h" | 15 | #include "msgr.h" |
16 | #include "rados.h" | 16 | #include "rados.h" |
17 | 17 | ||
18 | /* | 18 | /* |
19 | * Ceph release version | ||
20 | */ | ||
21 | #define CEPH_VERSION_MAJOR 0 | ||
22 | #define CEPH_VERSION_MINOR 20 | ||
23 | #define CEPH_VERSION_PATCH 0 | ||
24 | |||
25 | #define _CEPH_STRINGIFY(x) #x | ||
26 | #define CEPH_STRINGIFY(x) _CEPH_STRINGIFY(x) | ||
27 | #define CEPH_MAKE_VERSION(x, y, z) CEPH_STRINGIFY(x) "." CEPH_STRINGIFY(y) \ | ||
28 | "." CEPH_STRINGIFY(z) | ||
29 | #define CEPH_VERSION CEPH_MAKE_VERSION(CEPH_VERSION_MAJOR, \ | ||
30 | CEPH_VERSION_MINOR, CEPH_VERSION_PATCH) | ||
31 | |||
32 | /* | ||
33 | * subprotocol versions. when specific messages types or high-level | 19 | * subprotocol versions. when specific messages types or high-level |
34 | * protocols change, bump the affected components. we keep rev | 20 | * protocols change, bump the affected components. we keep rev |
35 | * internal cluster protocols separately from the public, | 21 | * internal cluster protocols separately from the public, |
@@ -53,18 +39,10 @@ | |||
53 | /* | 39 | /* |
54 | * feature bits | 40 | * feature bits |
55 | */ | 41 | */ |
56 | #define CEPH_FEATURE_UID 1 | 42 | #define CEPH_FEATURE_UID (1<<0) |
57 | #define CEPH_FEATURE_NOSRCADDR 2 | 43 | #define CEPH_FEATURE_NOSRCADDR (1<<1) |
58 | #define CEPH_FEATURE_FLOCK 4 | 44 | #define CEPH_FEATURE_MONCLOCKCHECK (1<<2) |
59 | 45 | #define CEPH_FEATURE_FLOCK (1<<3) | |
60 | #define CEPH_FEATURE_SUPPORTED_MON CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR | ||
61 | #define CEPH_FEATURE_REQUIRED_MON CEPH_FEATURE_UID | ||
62 | #define CEPH_FEATURE_SUPPORTED_MDS CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR|CEPH_FEATURE_FLOCK | ||
63 | #define CEPH_FEATURE_REQUIRED_MDS CEPH_FEATURE_UID | ||
64 | #define CEPH_FEATURE_SUPPORTED_OSD CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR | ||
65 | #define CEPH_FEATURE_REQUIRED_OSD CEPH_FEATURE_UID | ||
66 | #define CEPH_FEATURE_SUPPORTED_CLIENT CEPH_FEATURE_NOSRCADDR | ||
67 | #define CEPH_FEATURE_REQUIRED_CLIENT CEPH_FEATURE_NOSRCADDR | ||
68 | 46 | ||
69 | 47 | ||
70 | /* | 48 | /* |
@@ -96,6 +74,8 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); | |||
96 | #define CEPH_CRYPTO_NONE 0x0 | 74 | #define CEPH_CRYPTO_NONE 0x0 |
97 | #define CEPH_CRYPTO_AES 0x1 | 75 | #define CEPH_CRYPTO_AES 0x1 |
98 | 76 | ||
77 | #define CEPH_AES_IV "cephsageyudagreg" | ||
78 | |||
99 | /* security/authentication protocols */ | 79 | /* security/authentication protocols */ |
100 | #define CEPH_AUTH_UNKNOWN 0x0 | 80 | #define CEPH_AUTH_UNKNOWN 0x0 |
101 | #define CEPH_AUTH_NONE 0x1 | 81 | #define CEPH_AUTH_NONE 0x1 |
@@ -275,6 +255,7 @@ extern const char *ceph_mds_state_name(int s); | |||
275 | #define CEPH_LOCK_IDFT 512 /* dir frag tree */ | 255 | #define CEPH_LOCK_IDFT 512 /* dir frag tree */ |
276 | #define CEPH_LOCK_INEST 1024 /* mds internal */ | 256 | #define CEPH_LOCK_INEST 1024 /* mds internal */ |
277 | #define CEPH_LOCK_IXATTR 2048 | 257 | #define CEPH_LOCK_IXATTR 2048 |
258 | #define CEPH_LOCK_IFLOCK 4096 /* advisory file locks */ | ||
278 | #define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ | 259 | #define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ |
279 | 260 | ||
280 | /* client_session ops */ | 261 | /* client_session ops */ |
@@ -316,6 +297,8 @@ enum { | |||
316 | CEPH_MDS_OP_RMXATTR = 0x01106, | 297 | CEPH_MDS_OP_RMXATTR = 0x01106, |
317 | CEPH_MDS_OP_SETLAYOUT = 0x01107, | 298 | CEPH_MDS_OP_SETLAYOUT = 0x01107, |
318 | CEPH_MDS_OP_SETATTR = 0x01108, | 299 | CEPH_MDS_OP_SETATTR = 0x01108, |
300 | CEPH_MDS_OP_SETFILELOCK= 0x01109, | ||
301 | CEPH_MDS_OP_GETFILELOCK= 0x00110, | ||
319 | 302 | ||
320 | CEPH_MDS_OP_MKNOD = 0x01201, | 303 | CEPH_MDS_OP_MKNOD = 0x01201, |
321 | CEPH_MDS_OP_LINK = 0x01202, | 304 | CEPH_MDS_OP_LINK = 0x01202, |
@@ -386,6 +369,15 @@ union ceph_mds_request_args { | |||
386 | struct { | 369 | struct { |
387 | struct ceph_file_layout layout; | 370 | struct ceph_file_layout layout; |
388 | } __attribute__ ((packed)) setlayout; | 371 | } __attribute__ ((packed)) setlayout; |
372 | struct { | ||
373 | __u8 rule; /* currently fcntl or flock */ | ||
374 | __u8 type; /* shared, exclusive, remove*/ | ||
375 | __le64 pid; /* process id requesting the lock */ | ||
376 | __le64 pid_namespace; | ||
377 | __le64 start; /* initial location to lock */ | ||
378 | __le64 length; /* num bytes to lock from start */ | ||
379 | __u8 wait; /* will caller wait for lock to become available? */ | ||
380 | } __attribute__ ((packed)) filelock_change; | ||
389 | } __attribute__ ((packed)); | 381 | } __attribute__ ((packed)); |
390 | 382 | ||
391 | #define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ | 383 | #define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ |
@@ -480,6 +472,23 @@ struct ceph_mds_reply_dirfrag { | |||
480 | __le32 dist[]; | 472 | __le32 dist[]; |
481 | } __attribute__ ((packed)); | 473 | } __attribute__ ((packed)); |
482 | 474 | ||
475 | #define CEPH_LOCK_FCNTL 1 | ||
476 | #define CEPH_LOCK_FLOCK 2 | ||
477 | |||
478 | #define CEPH_LOCK_SHARED 1 | ||
479 | #define CEPH_LOCK_EXCL 2 | ||
480 | #define CEPH_LOCK_UNLOCK 4 | ||
481 | |||
482 | struct ceph_filelock { | ||
483 | __le64 start;/* file offset to start lock at */ | ||
484 | __le64 length; /* num bytes to lock; 0 for all following start */ | ||
485 | __le64 client; /* which client holds the lock */ | ||
486 | __le64 pid; /* process id holding the lock on the client */ | ||
487 | __le64 pid_namespace; | ||
488 | __u8 type; /* shared lock, exclusive lock, or unlock */ | ||
489 | } __attribute__ ((packed)); | ||
490 | |||
491 | |||
483 | /* file access modes */ | 492 | /* file access modes */ |
484 | #define CEPH_FILE_MODE_PIN 0 | 493 | #define CEPH_FILE_MODE_PIN 0 |
485 | #define CEPH_FILE_MODE_RD 1 | 494 | #define CEPH_FILE_MODE_RD 1 |
@@ -508,9 +517,10 @@ int ceph_flags_to_mode(int flags); | |||
508 | #define CEPH_CAP_SAUTH 2 | 517 | #define CEPH_CAP_SAUTH 2 |
509 | #define CEPH_CAP_SLINK 4 | 518 | #define CEPH_CAP_SLINK 4 |
510 | #define CEPH_CAP_SXATTR 6 | 519 | #define CEPH_CAP_SXATTR 6 |
511 | #define CEPH_CAP_SFILE 8 /* goes at the end (uses >2 cap bits) */ | 520 | #define CEPH_CAP_SFILE 8 |
521 | #define CEPH_CAP_SFLOCK 20 | ||
512 | 522 | ||
513 | #define CEPH_CAP_BITS 16 | 523 | #define CEPH_CAP_BITS 22 |
514 | 524 | ||
515 | /* composed values */ | 525 | /* composed values */ |
516 | #define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH) | 526 | #define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH) |
@@ -528,6 +538,9 @@ int ceph_flags_to_mode(int flags); | |||
528 | #define CEPH_CAP_FILE_BUFFER (CEPH_CAP_GBUFFER << CEPH_CAP_SFILE) | 538 | #define CEPH_CAP_FILE_BUFFER (CEPH_CAP_GBUFFER << CEPH_CAP_SFILE) |
529 | #define CEPH_CAP_FILE_WREXTEND (CEPH_CAP_GWREXTEND << CEPH_CAP_SFILE) | 539 | #define CEPH_CAP_FILE_WREXTEND (CEPH_CAP_GWREXTEND << CEPH_CAP_SFILE) |
530 | #define CEPH_CAP_FILE_LAZYIO (CEPH_CAP_GLAZYIO << CEPH_CAP_SFILE) | 540 | #define CEPH_CAP_FILE_LAZYIO (CEPH_CAP_GLAZYIO << CEPH_CAP_SFILE) |
541 | #define CEPH_CAP_FLOCK_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SFLOCK) | ||
542 | #define CEPH_CAP_FLOCK_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SFLOCK) | ||
543 | |||
531 | 544 | ||
532 | /* cap masks (for getattr) */ | 545 | /* cap masks (for getattr) */ |
533 | #define CEPH_STAT_CAP_INODE CEPH_CAP_PIN | 546 | #define CEPH_STAT_CAP_INODE CEPH_CAP_PIN |
@@ -563,7 +576,8 @@ int ceph_flags_to_mode(int flags); | |||
563 | CEPH_CAP_FILE_EXCL) | 576 | CEPH_CAP_FILE_EXCL) |
564 | #define CEPH_CAP_ANY_WR (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR) | 577 | #define CEPH_CAP_ANY_WR (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR) |
565 | #define CEPH_CAP_ANY (CEPH_CAP_ANY_RD | CEPH_CAP_ANY_EXCL | \ | 578 | #define CEPH_CAP_ANY (CEPH_CAP_ANY_RD | CEPH_CAP_ANY_EXCL | \ |
566 | CEPH_CAP_ANY_FILE_WR | CEPH_CAP_PIN) | 579 | CEPH_CAP_ANY_FILE_WR | CEPH_CAP_FILE_LAZYIO | \ |
580 | CEPH_CAP_PIN) | ||
567 | 581 | ||
568 | #define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \ | 582 | #define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \ |
569 | CEPH_LOCK_IXATTR) | 583 | CEPH_LOCK_IXATTR) |
@@ -653,12 +667,21 @@ struct ceph_mds_cap_reconnect { | |||
653 | __le64 cap_id; | 667 | __le64 cap_id; |
654 | __le32 wanted; | 668 | __le32 wanted; |
655 | __le32 issued; | 669 | __le32 issued; |
670 | __le64 snaprealm; | ||
671 | __le64 pathbase; /* base ino for our path to this ino */ | ||
672 | __le32 flock_len; /* size of flock state blob, if any */ | ||
673 | } __attribute__ ((packed)); | ||
674 | /* followed by flock blob */ | ||
675 | |||
676 | struct ceph_mds_cap_reconnect_v1 { | ||
677 | __le64 cap_id; | ||
678 | __le32 wanted; | ||
679 | __le32 issued; | ||
656 | __le64 size; | 680 | __le64 size; |
657 | struct ceph_timespec mtime, atime; | 681 | struct ceph_timespec mtime, atime; |
658 | __le64 snaprealm; | 682 | __le64 snaprealm; |
659 | __le64 pathbase; /* base ino for our path to this ino */ | 683 | __le64 pathbase; /* base ino for our path to this ino */ |
660 | } __attribute__ ((packed)); | 684 | } __attribute__ ((packed)); |
661 | /* followed by encoded string */ | ||
662 | 685 | ||
663 | struct ceph_mds_snaprealm_reconnect { | 686 | struct ceph_mds_snaprealm_reconnect { |
664 | __le64 ino; /* snap realm base */ | 687 | __le64 ino; /* snap realm base */ |
diff --git a/fs/ceph/ceph_hash.h b/fs/ceph/ceph_hash.h index 5ac470c433c9..d099c3f90236 100644 --- a/fs/ceph/ceph_hash.h +++ b/fs/ceph/ceph_hash.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifndef _FS_CEPH_HASH_H | 1 | #ifndef FS_CEPH_HASH_H |
2 | #define _FS_CEPH_HASH_H | 2 | #define FS_CEPH_HASH_H |
3 | 3 | ||
4 | #define CEPH_STR_HASH_LINUX 0x1 /* linux dcache hash */ | 4 | #define CEPH_STR_HASH_LINUX 0x1 /* linux dcache hash */ |
5 | #define CEPH_STR_HASH_RJENKINS 0x2 /* robert jenkins' */ | 5 | #define CEPH_STR_HASH_RJENKINS 0x2 /* robert jenkins' */ |
diff --git a/fs/ceph/ceph_strings.c b/fs/ceph/ceph_strings.c index 7503aee828ce..c6179d3a26a2 100644 --- a/fs/ceph/ceph_strings.c +++ b/fs/ceph/ceph_strings.c | |||
@@ -28,6 +28,7 @@ const char *ceph_osd_op_name(int op) | |||
28 | case CEPH_OSD_OP_TRUNCATE: return "truncate"; | 28 | case CEPH_OSD_OP_TRUNCATE: return "truncate"; |
29 | case CEPH_OSD_OP_ZERO: return "zero"; | 29 | case CEPH_OSD_OP_ZERO: return "zero"; |
30 | case CEPH_OSD_OP_WRITEFULL: return "writefull"; | 30 | case CEPH_OSD_OP_WRITEFULL: return "writefull"; |
31 | case CEPH_OSD_OP_ROLLBACK: return "rollback"; | ||
31 | 32 | ||
32 | case CEPH_OSD_OP_APPEND: return "append"; | 33 | case CEPH_OSD_OP_APPEND: return "append"; |
33 | case CEPH_OSD_OP_STARTSYNC: return "startsync"; | 34 | case CEPH_OSD_OP_STARTSYNC: return "startsync"; |
@@ -129,6 +130,8 @@ const char *ceph_mds_op_name(int op) | |||
129 | case CEPH_MDS_OP_LSSNAP: return "lssnap"; | 130 | case CEPH_MDS_OP_LSSNAP: return "lssnap"; |
130 | case CEPH_MDS_OP_MKSNAP: return "mksnap"; | 131 | case CEPH_MDS_OP_MKSNAP: return "mksnap"; |
131 | case CEPH_MDS_OP_RMSNAP: return "rmsnap"; | 132 | case CEPH_MDS_OP_RMSNAP: return "rmsnap"; |
133 | case CEPH_MDS_OP_SETFILELOCK: return "setfilelock"; | ||
134 | case CEPH_MDS_OP_GETFILELOCK: return "getfilelock"; | ||
132 | } | 135 | } |
133 | return "???"; | 136 | return "???"; |
134 | } | 137 | } |
diff --git a/fs/ceph/crush/crush.h b/fs/ceph/crush/crush.h index dcd7e7523700..97e435b191f4 100644 --- a/fs/ceph/crush/crush.h +++ b/fs/ceph/crush/crush.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifndef _CRUSH_CRUSH_H | 1 | #ifndef CEPH_CRUSH_CRUSH_H |
2 | #define _CRUSH_CRUSH_H | 2 | #define CEPH_CRUSH_CRUSH_H |
3 | 3 | ||
4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
5 | 5 | ||
diff --git a/fs/ceph/crush/hash.h b/fs/ceph/crush/hash.h index ff48e110e4bb..91e884230d5d 100644 --- a/fs/ceph/crush/hash.h +++ b/fs/ceph/crush/hash.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifndef _CRUSH_HASH_H | 1 | #ifndef CEPH_CRUSH_HASH_H |
2 | #define _CRUSH_HASH_H | 2 | #define CEPH_CRUSH_HASH_H |
3 | 3 | ||
4 | #define CRUSH_HASH_RJENKINS1 0 | 4 | #define CRUSH_HASH_RJENKINS1 0 |
5 | 5 | ||
diff --git a/fs/ceph/crush/mapper.h b/fs/ceph/crush/mapper.h index 98e90046fd9f..c46b99c18bb0 100644 --- a/fs/ceph/crush/mapper.h +++ b/fs/ceph/crush/mapper.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifndef _CRUSH_MAPPER_H | 1 | #ifndef CEPH_CRUSH_MAPPER_H |
2 | #define _CRUSH_MAPPER_H | 2 | #define CEPH_CRUSH_MAPPER_H |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * CRUSH functions for find rules and then mapping an input to an | 5 | * CRUSH functions for find rules and then mapping an input to an |
diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c index f704b3b62424..a3e627f63293 100644 --- a/fs/ceph/crypto.c +++ b/fs/ceph/crypto.c | |||
@@ -75,10 +75,11 @@ static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void) | |||
75 | return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); | 75 | return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); |
76 | } | 76 | } |
77 | 77 | ||
78 | const u8 *aes_iv = "cephsageyudagreg"; | 78 | static const u8 *aes_iv = (u8 *)CEPH_AES_IV; |
79 | 79 | ||
80 | int ceph_aes_encrypt(const void *key, int key_len, void *dst, size_t *dst_len, | 80 | static int ceph_aes_encrypt(const void *key, int key_len, |
81 | const void *src, size_t src_len) | 81 | void *dst, size_t *dst_len, |
82 | const void *src, size_t src_len) | ||
82 | { | 83 | { |
83 | struct scatterlist sg_in[2], sg_out[1]; | 84 | struct scatterlist sg_in[2], sg_out[1]; |
84 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); | 85 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); |
@@ -126,9 +127,10 @@ int ceph_aes_encrypt(const void *key, int key_len, void *dst, size_t *dst_len, | |||
126 | return 0; | 127 | return 0; |
127 | } | 128 | } |
128 | 129 | ||
129 | int ceph_aes_encrypt2(const void *key, int key_len, void *dst, size_t *dst_len, | 130 | static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, |
130 | const void *src1, size_t src1_len, | 131 | size_t *dst_len, |
131 | const void *src2, size_t src2_len) | 132 | const void *src1, size_t src1_len, |
133 | const void *src2, size_t src2_len) | ||
132 | { | 134 | { |
133 | struct scatterlist sg_in[3], sg_out[1]; | 135 | struct scatterlist sg_in[3], sg_out[1]; |
134 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); | 136 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); |
@@ -179,8 +181,9 @@ int ceph_aes_encrypt2(const void *key, int key_len, void *dst, size_t *dst_len, | |||
179 | return 0; | 181 | return 0; |
180 | } | 182 | } |
181 | 183 | ||
182 | int ceph_aes_decrypt(const void *key, int key_len, void *dst, size_t *dst_len, | 184 | static int ceph_aes_decrypt(const void *key, int key_len, |
183 | const void *src, size_t src_len) | 185 | void *dst, size_t *dst_len, |
186 | const void *src, size_t src_len) | ||
184 | { | 187 | { |
185 | struct scatterlist sg_in[1], sg_out[2]; | 188 | struct scatterlist sg_in[1], sg_out[2]; |
186 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); | 189 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); |
@@ -238,10 +241,10 @@ int ceph_aes_decrypt(const void *key, int key_len, void *dst, size_t *dst_len, | |||
238 | return 0; | 241 | return 0; |
239 | } | 242 | } |
240 | 243 | ||
241 | int ceph_aes_decrypt2(const void *key, int key_len, | 244 | static int ceph_aes_decrypt2(const void *key, int key_len, |
242 | void *dst1, size_t *dst1_len, | 245 | void *dst1, size_t *dst1_len, |
243 | void *dst2, size_t *dst2_len, | 246 | void *dst2, size_t *dst2_len, |
244 | const void *src, size_t src_len) | 247 | const void *src, size_t src_len) |
245 | { | 248 | { |
246 | struct scatterlist sg_in[1], sg_out[3]; | 249 | struct scatterlist sg_in[1], sg_out[3]; |
247 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); | 250 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); |
diff --git a/fs/ceph/crypto.h b/fs/ceph/crypto.h index 40b502e6bd89..bdf38607323c 100644 --- a/fs/ceph/crypto.h +++ b/fs/ceph/crypto.h | |||
@@ -42,7 +42,7 @@ extern int ceph_encrypt2(struct ceph_crypto_key *secret, | |||
42 | const void *src2, size_t src2_len); | 42 | const void *src2, size_t src2_len); |
43 | 43 | ||
44 | /* armor.c */ | 44 | /* armor.c */ |
45 | extern int ceph_armor(char *dst, const void *src, const void *end); | 45 | extern int ceph_armor(char *dst, const char *src, const char *end); |
46 | extern int ceph_unarmor(void *dst, const char *src, const char *end); | 46 | extern int ceph_unarmor(char *dst, const char *src, const char *end); |
47 | 47 | ||
48 | #endif | 48 | #endif |
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index f2f5332ddbba..360c4f22718d 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c | |||
@@ -291,7 +291,7 @@ static int dentry_lru_show(struct seq_file *s, void *ptr) | |||
291 | return 0; | 291 | return 0; |
292 | } | 292 | } |
293 | 293 | ||
294 | #define DEFINE_SHOW_FUNC(name) \ | 294 | #define DEFINE_SHOW_FUNC(name) \ |
295 | static int name##_open(struct inode *inode, struct file *file) \ | 295 | static int name##_open(struct inode *inode, struct file *file) \ |
296 | { \ | 296 | { \ |
297 | struct seq_file *sf; \ | 297 | struct seq_file *sf; \ |
@@ -361,8 +361,8 @@ int ceph_debugfs_client_init(struct ceph_client *client) | |||
361 | int ret = 0; | 361 | int ret = 0; |
362 | char name[80]; | 362 | char name[80]; |
363 | 363 | ||
364 | snprintf(name, sizeof(name), FSID_FORMAT ".client%lld", | 364 | snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid, |
365 | PR_FSID(&client->fsid), client->monc.auth->global_id); | 365 | client->monc.auth->global_id); |
366 | 366 | ||
367 | client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir); | 367 | client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir); |
368 | if (!client->debugfs_dir) | 368 | if (!client->debugfs_dir) |
@@ -432,11 +432,12 @@ int ceph_debugfs_client_init(struct ceph_client *client) | |||
432 | if (!client->debugfs_caps) | 432 | if (!client->debugfs_caps) |
433 | goto out; | 433 | goto out; |
434 | 434 | ||
435 | client->debugfs_congestion_kb = debugfs_create_file("writeback_congestion_kb", | 435 | client->debugfs_congestion_kb = |
436 | 0600, | 436 | debugfs_create_file("writeback_congestion_kb", |
437 | client->debugfs_dir, | 437 | 0600, |
438 | client, | 438 | client->debugfs_dir, |
439 | &congestion_kb_fops); | 439 | client, |
440 | &congestion_kb_fops); | ||
440 | if (!client->debugfs_congestion_kb) | 441 | if (!client->debugfs_congestion_kb) |
441 | goto out; | 442 | goto out; |
442 | 443 | ||
@@ -466,7 +467,7 @@ void ceph_debugfs_client_cleanup(struct ceph_client *client) | |||
466 | debugfs_remove(client->debugfs_dir); | 467 | debugfs_remove(client->debugfs_dir); |
467 | } | 468 | } |
468 | 469 | ||
469 | #else // CONFIG_DEBUG_FS | 470 | #else /* CONFIG_DEBUG_FS */ |
470 | 471 | ||
471 | int __init ceph_debugfs_init(void) | 472 | int __init ceph_debugfs_init(void) |
472 | { | 473 | { |
@@ -486,4 +487,4 @@ void ceph_debugfs_client_cleanup(struct ceph_client *client) | |||
486 | { | 487 | { |
487 | } | 488 | } |
488 | 489 | ||
489 | #endif // CONFIG_DEBUG_FS | 490 | #endif /* CONFIG_DEBUG_FS */ |
diff --git a/fs/ceph/decode.h b/fs/ceph/decode.h index 65b3e022eaf5..3d25415afe63 100644 --- a/fs/ceph/decode.h +++ b/fs/ceph/decode.h | |||
@@ -99,11 +99,13 @@ static inline void ceph_encode_timespec(struct ceph_timespec *tv, | |||
99 | */ | 99 | */ |
100 | static inline void ceph_encode_addr(struct ceph_entity_addr *a) | 100 | static inline void ceph_encode_addr(struct ceph_entity_addr *a) |
101 | { | 101 | { |
102 | a->in_addr.ss_family = htons(a->in_addr.ss_family); | 102 | __be16 ss_family = htons(a->in_addr.ss_family); |
103 | a->in_addr.ss_family = *(__u16 *)&ss_family; | ||
103 | } | 104 | } |
104 | static inline void ceph_decode_addr(struct ceph_entity_addr *a) | 105 | static inline void ceph_decode_addr(struct ceph_entity_addr *a) |
105 | { | 106 | { |
106 | a->in_addr.ss_family = ntohs(a->in_addr.ss_family); | 107 | __be16 ss_family = *(__be16 *)&a->in_addr.ss_family; |
108 | a->in_addr.ss_family = ntohs(ss_family); | ||
107 | WARN_ON(a->in_addr.ss_family == 512); | 109 | WARN_ON(a->in_addr.ss_family == 512); |
108 | } | 110 | } |
109 | 111 | ||
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index f94ed3c7f6a5..67bbb41d5526 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -27,7 +27,7 @@ | |||
27 | 27 | ||
28 | const struct inode_operations ceph_dir_iops; | 28 | const struct inode_operations ceph_dir_iops; |
29 | const struct file_operations ceph_dir_fops; | 29 | const struct file_operations ceph_dir_fops; |
30 | struct dentry_operations ceph_dentry_ops; | 30 | const struct dentry_operations ceph_dentry_ops; |
31 | 31 | ||
32 | /* | 32 | /* |
33 | * Initialize ceph dentry state. | 33 | * Initialize ceph dentry state. |
@@ -94,6 +94,8 @@ static unsigned fpos_off(loff_t p) | |||
94 | */ | 94 | */ |
95 | static int __dcache_readdir(struct file *filp, | 95 | static int __dcache_readdir(struct file *filp, |
96 | void *dirent, filldir_t filldir) | 96 | void *dirent, filldir_t filldir) |
97 | __releases(inode->i_lock) | ||
98 | __acquires(inode->i_lock) | ||
97 | { | 99 | { |
98 | struct inode *inode = filp->f_dentry->d_inode; | 100 | struct inode *inode = filp->f_dentry->d_inode; |
99 | struct ceph_file_info *fi = filp->private_data; | 101 | struct ceph_file_info *fi = filp->private_data; |
@@ -1239,16 +1241,16 @@ const struct inode_operations ceph_dir_iops = { | |||
1239 | .create = ceph_create, | 1241 | .create = ceph_create, |
1240 | }; | 1242 | }; |
1241 | 1243 | ||
1242 | struct dentry_operations ceph_dentry_ops = { | 1244 | const struct dentry_operations ceph_dentry_ops = { |
1243 | .d_revalidate = ceph_d_revalidate, | 1245 | .d_revalidate = ceph_d_revalidate, |
1244 | .d_release = ceph_dentry_release, | 1246 | .d_release = ceph_dentry_release, |
1245 | }; | 1247 | }; |
1246 | 1248 | ||
1247 | struct dentry_operations ceph_snapdir_dentry_ops = { | 1249 | const struct dentry_operations ceph_snapdir_dentry_ops = { |
1248 | .d_revalidate = ceph_snapdir_d_revalidate, | 1250 | .d_revalidate = ceph_snapdir_d_revalidate, |
1249 | .d_release = ceph_dentry_release, | 1251 | .d_release = ceph_dentry_release, |
1250 | }; | 1252 | }; |
1251 | 1253 | ||
1252 | struct dentry_operations ceph_snap_dentry_ops = { | 1254 | const struct dentry_operations ceph_snap_dentry_ops = { |
1253 | .d_release = ceph_dentry_release, | 1255 | .d_release = ceph_dentry_release, |
1254 | }; | 1256 | }; |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 7c08698fad3e..8c044a4f0457 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -317,7 +317,7 @@ void ceph_release_page_vector(struct page **pages, int num_pages) | |||
317 | /* | 317 | /* |
318 | * allocate a vector new pages | 318 | * allocate a vector new pages |
319 | */ | 319 | */ |
320 | struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) | 320 | static struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) |
321 | { | 321 | { |
322 | struct page **pages; | 322 | struct page **pages; |
323 | int i; | 323 | int i; |
@@ -665,7 +665,7 @@ more: | |||
665 | * throw out any page cache pages in this range. this | 665 | * throw out any page cache pages in this range. this |
666 | * may block. | 666 | * may block. |
667 | */ | 667 | */ |
668 | truncate_inode_pages_range(inode->i_mapping, pos, | 668 | truncate_inode_pages_range(inode->i_mapping, pos, |
669 | (pos+len) | (PAGE_CACHE_SIZE-1)); | 669 | (pos+len) | (PAGE_CACHE_SIZE-1)); |
670 | } else { | 670 | } else { |
671 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); | 671 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); |
@@ -740,28 +740,32 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
740 | unsigned long nr_segs, loff_t pos) | 740 | unsigned long nr_segs, loff_t pos) |
741 | { | 741 | { |
742 | struct file *filp = iocb->ki_filp; | 742 | struct file *filp = iocb->ki_filp; |
743 | struct ceph_file_info *fi = filp->private_data; | ||
743 | loff_t *ppos = &iocb->ki_pos; | 744 | loff_t *ppos = &iocb->ki_pos; |
744 | size_t len = iov->iov_len; | 745 | size_t len = iov->iov_len; |
745 | struct inode *inode = filp->f_dentry->d_inode; | 746 | struct inode *inode = filp->f_dentry->d_inode; |
746 | struct ceph_inode_info *ci = ceph_inode(inode); | 747 | struct ceph_inode_info *ci = ceph_inode(inode); |
747 | void *base = iov->iov_base; | 748 | void __user *base = iov->iov_base; |
748 | ssize_t ret; | 749 | ssize_t ret; |
749 | int got = 0; | 750 | int want, got = 0; |
750 | int checkeof = 0, read = 0; | 751 | int checkeof = 0, read = 0; |
751 | 752 | ||
752 | dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", | 753 | dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", |
753 | inode, ceph_vinop(inode), pos, (unsigned)len, inode); | 754 | inode, ceph_vinop(inode), pos, (unsigned)len, inode); |
754 | again: | 755 | again: |
755 | __ceph_do_pending_vmtruncate(inode); | 756 | __ceph_do_pending_vmtruncate(inode); |
756 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, CEPH_CAP_FILE_CACHE, | 757 | if (fi->fmode & CEPH_FILE_MODE_LAZY) |
757 | &got, -1); | 758 | want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; |
759 | else | ||
760 | want = CEPH_CAP_FILE_CACHE; | ||
761 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1); | ||
758 | if (ret < 0) | 762 | if (ret < 0) |
759 | goto out; | 763 | goto out; |
760 | dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", | 764 | dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", |
761 | inode, ceph_vinop(inode), pos, (unsigned)len, | 765 | inode, ceph_vinop(inode), pos, (unsigned)len, |
762 | ceph_cap_string(got)); | 766 | ceph_cap_string(got)); |
763 | 767 | ||
764 | if ((got & CEPH_CAP_FILE_CACHE) == 0 || | 768 | if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 || |
765 | (iocb->ki_filp->f_flags & O_DIRECT) || | 769 | (iocb->ki_filp->f_flags & O_DIRECT) || |
766 | (inode->i_sb->s_flags & MS_SYNCHRONOUS)) | 770 | (inode->i_sb->s_flags & MS_SYNCHRONOUS)) |
767 | /* hmm, this isn't really async... */ | 771 | /* hmm, this isn't really async... */ |
@@ -807,11 +811,12 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
807 | unsigned long nr_segs, loff_t pos) | 811 | unsigned long nr_segs, loff_t pos) |
808 | { | 812 | { |
809 | struct file *file = iocb->ki_filp; | 813 | struct file *file = iocb->ki_filp; |
814 | struct ceph_file_info *fi = file->private_data; | ||
810 | struct inode *inode = file->f_dentry->d_inode; | 815 | struct inode *inode = file->f_dentry->d_inode; |
811 | struct ceph_inode_info *ci = ceph_inode(inode); | 816 | struct ceph_inode_info *ci = ceph_inode(inode); |
812 | struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; | 817 | struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; |
813 | loff_t endoff = pos + iov->iov_len; | 818 | loff_t endoff = pos + iov->iov_len; |
814 | int got = 0; | 819 | int want, got = 0; |
815 | int ret, err; | 820 | int ret, err; |
816 | 821 | ||
817 | if (ceph_snap(inode) != CEPH_NOSNAP) | 822 | if (ceph_snap(inode) != CEPH_NOSNAP) |
@@ -824,8 +829,11 @@ retry_snap: | |||
824 | dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", | 829 | dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", |
825 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, | 830 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, |
826 | inode->i_size); | 831 | inode->i_size); |
827 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, CEPH_CAP_FILE_BUFFER, | 832 | if (fi->fmode & CEPH_FILE_MODE_LAZY) |
828 | &got, endoff); | 833 | want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO; |
834 | else | ||
835 | want = CEPH_CAP_FILE_BUFFER; | ||
836 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); | ||
829 | if (ret < 0) | 837 | if (ret < 0) |
830 | goto out; | 838 | goto out; |
831 | 839 | ||
@@ -833,7 +841,7 @@ retry_snap: | |||
833 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, | 841 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, |
834 | ceph_cap_string(got)); | 842 | ceph_cap_string(got)); |
835 | 843 | ||
836 | if ((got & CEPH_CAP_FILE_BUFFER) == 0 || | 844 | if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || |
837 | (iocb->ki_filp->f_flags & O_DIRECT) || | 845 | (iocb->ki_filp->f_flags & O_DIRECT) || |
838 | (inode->i_sb->s_flags & MS_SYNCHRONOUS)) { | 846 | (inode->i_sb->s_flags & MS_SYNCHRONOUS)) { |
839 | ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, | 847 | ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, |
@@ -930,6 +938,8 @@ const struct file_operations ceph_file_fops = { | |||
930 | .aio_write = ceph_aio_write, | 938 | .aio_write = ceph_aio_write, |
931 | .mmap = ceph_mmap, | 939 | .mmap = ceph_mmap, |
932 | .fsync = ceph_fsync, | 940 | .fsync = ceph_fsync, |
941 | .lock = ceph_lock, | ||
942 | .flock = ceph_flock, | ||
933 | .splice_read = generic_file_splice_read, | 943 | .splice_read = generic_file_splice_read, |
934 | .splice_write = generic_file_splice_write, | 944 | .splice_write = generic_file_splice_write, |
935 | .unlocked_ioctl = ceph_ioctl, | 945 | .unlocked_ioctl = ceph_ioctl, |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 389f9dbd9949..5d893d31e399 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -442,8 +442,9 @@ int ceph_fill_file_size(struct inode *inode, int issued, | |||
442 | * the file is either opened or mmaped | 442 | * the file is either opened or mmaped |
443 | */ | 443 | */ |
444 | if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_RD| | 444 | if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_RD| |
445 | CEPH_CAP_FILE_WR|CEPH_CAP_FILE_BUFFER| | 445 | CEPH_CAP_FILE_WR|CEPH_CAP_FILE_BUFFER| |
446 | CEPH_CAP_FILE_EXCL)) || | 446 | CEPH_CAP_FILE_EXCL| |
447 | CEPH_CAP_FILE_LAZYIO)) || | ||
447 | mapping_mapped(inode->i_mapping) || | 448 | mapping_mapped(inode->i_mapping) || |
448 | __ceph_caps_file_wanted(ci)) { | 449 | __ceph_caps_file_wanted(ci)) { |
449 | ci->i_truncate_pending++; | 450 | ci->i_truncate_pending++; |
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index d085f07756b4..76e307d2aba1 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c | |||
@@ -143,6 +143,27 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) | |||
143 | return 0; | 143 | return 0; |
144 | } | 144 | } |
145 | 145 | ||
146 | static long ceph_ioctl_lazyio(struct file *file) | ||
147 | { | ||
148 | struct ceph_file_info *fi = file->private_data; | ||
149 | struct inode *inode = file->f_dentry->d_inode; | ||
150 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
151 | |||
152 | if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) { | ||
153 | spin_lock(&inode->i_lock); | ||
154 | ci->i_nr_by_mode[fi->fmode]--; | ||
155 | fi->fmode |= CEPH_FILE_MODE_LAZY; | ||
156 | ci->i_nr_by_mode[fi->fmode]++; | ||
157 | spin_unlock(&inode->i_lock); | ||
158 | dout("ioctl_layzio: file %p marked lazy\n", file); | ||
159 | |||
160 | ceph_check_caps(ci, 0, NULL); | ||
161 | } else { | ||
162 | dout("ioctl_layzio: file %p already lazy\n", file); | ||
163 | } | ||
164 | return 0; | ||
165 | } | ||
166 | |||
146 | long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | 167 | long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
147 | { | 168 | { |
148 | dout("ioctl file %p cmd %u arg %lu\n", file, cmd, arg); | 169 | dout("ioctl file %p cmd %u arg %lu\n", file, cmd, arg); |
@@ -155,6 +176,9 @@ long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
155 | 176 | ||
156 | case CEPH_IOC_GET_DATALOC: | 177 | case CEPH_IOC_GET_DATALOC: |
157 | return ceph_ioctl_get_dataloc(file, (void __user *)arg); | 178 | return ceph_ioctl_get_dataloc(file, (void __user *)arg); |
179 | |||
180 | case CEPH_IOC_LAZYIO: | ||
181 | return ceph_ioctl_lazyio(file); | ||
158 | } | 182 | } |
159 | return -ENOTTY; | 183 | return -ENOTTY; |
160 | } | 184 | } |
diff --git a/fs/ceph/ioctl.h b/fs/ceph/ioctl.h index 25e4f1a9d059..88451a3b6857 100644 --- a/fs/ceph/ioctl.h +++ b/fs/ceph/ioctl.h | |||
@@ -37,4 +37,6 @@ struct ceph_ioctl_dataloc { | |||
37 | #define CEPH_IOC_GET_DATALOC _IOWR(CEPH_IOCTL_MAGIC, 3, \ | 37 | #define CEPH_IOC_GET_DATALOC _IOWR(CEPH_IOCTL_MAGIC, 3, \ |
38 | struct ceph_ioctl_dataloc) | 38 | struct ceph_ioctl_dataloc) |
39 | 39 | ||
40 | #define CEPH_IOC_LAZYIO _IO(CEPH_IOCTL_MAGIC, 4) | ||
41 | |||
40 | #endif | 42 | #endif |
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c new file mode 100644 index 000000000000..ae85af06454f --- /dev/null +++ b/fs/ceph/locks.c | |||
@@ -0,0 +1,256 @@ | |||
1 | #include "ceph_debug.h" | ||
2 | |||
3 | #include <linux/file.h> | ||
4 | #include <linux/namei.h> | ||
5 | |||
6 | #include "super.h" | ||
7 | #include "mds_client.h" | ||
8 | #include "pagelist.h" | ||
9 | |||
10 | /** | ||
11 | * Implement fcntl and flock locking functions. | ||
12 | */ | ||
13 | static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, | ||
14 | u64 pid, u64 pid_ns, | ||
15 | int cmd, u64 start, u64 length, u8 wait) | ||
16 | { | ||
17 | struct inode *inode = file->f_dentry->d_inode; | ||
18 | struct ceph_mds_client *mdsc = | ||
19 | &ceph_sb_to_client(inode->i_sb)->mdsc; | ||
20 | struct ceph_mds_request *req; | ||
21 | int err; | ||
22 | |||
23 | req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); | ||
24 | if (IS_ERR(req)) | ||
25 | return PTR_ERR(req); | ||
26 | req->r_inode = igrab(inode); | ||
27 | |||
28 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " | ||
29 | "length: %llu, wait: %d, type`: %d", (int)lock_type, | ||
30 | (int)operation, pid, start, length, wait, cmd); | ||
31 | |||
32 | req->r_args.filelock_change.rule = lock_type; | ||
33 | req->r_args.filelock_change.type = cmd; | ||
34 | req->r_args.filelock_change.pid = cpu_to_le64(pid); | ||
35 | /* This should be adjusted, but I'm not sure if | ||
36 | namespaces actually get id numbers*/ | ||
37 | req->r_args.filelock_change.pid_namespace = | ||
38 | cpu_to_le64((u64)pid_ns); | ||
39 | req->r_args.filelock_change.start = cpu_to_le64(start); | ||
40 | req->r_args.filelock_change.length = cpu_to_le64(length); | ||
41 | req->r_args.filelock_change.wait = wait; | ||
42 | |||
43 | err = ceph_mdsc_do_request(mdsc, inode, req); | ||
44 | ceph_mdsc_put_request(req); | ||
45 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " | ||
46 | "length: %llu, wait: %d, type`: %d err code %d", (int)lock_type, | ||
47 | (int)operation, pid, start, length, wait, cmd, err); | ||
48 | return err; | ||
49 | } | ||
50 | |||
51 | /** | ||
52 | * Attempt to set an fcntl lock. | ||
53 | * For now, this just goes away to the server. Later it may be more awesome. | ||
54 | */ | ||
55 | int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | ||
56 | { | ||
57 | u64 length; | ||
58 | u8 lock_cmd; | ||
59 | int err; | ||
60 | u8 wait = 0; | ||
61 | u16 op = CEPH_MDS_OP_SETFILELOCK; | ||
62 | |||
63 | fl->fl_nspid = get_pid(task_tgid(current)); | ||
64 | dout("ceph_lock, fl_pid:%d", fl->fl_pid); | ||
65 | |||
66 | /* set wait bit as appropriate, then make command as Ceph expects it*/ | ||
67 | if (F_SETLKW == cmd) | ||
68 | wait = 1; | ||
69 | if (F_GETLK == cmd) | ||
70 | op = CEPH_MDS_OP_GETFILELOCK; | ||
71 | |||
72 | if (F_RDLCK == fl->fl_type) | ||
73 | lock_cmd = CEPH_LOCK_SHARED; | ||
74 | else if (F_WRLCK == fl->fl_type) | ||
75 | lock_cmd = CEPH_LOCK_EXCL; | ||
76 | else | ||
77 | lock_cmd = CEPH_LOCK_UNLOCK; | ||
78 | |||
79 | if (LLONG_MAX == fl->fl_end) | ||
80 | length = 0; | ||
81 | else | ||
82 | length = fl->fl_end - fl->fl_start + 1; | ||
83 | |||
84 | err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, | ||
85 | (u64)fl->fl_pid, (u64)fl->fl_nspid, | ||
86 | lock_cmd, fl->fl_start, | ||
87 | length, wait); | ||
88 | if (!err) { | ||
89 | dout("mds locked, locking locally"); | ||
90 | err = posix_lock_file(file, fl, NULL); | ||
91 | if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { | ||
92 | /* undo! This should only happen if the kernel detects | ||
93 | * local deadlock. */ | ||
94 | ceph_lock_message(CEPH_LOCK_FCNTL, op, file, | ||
95 | (u64)fl->fl_pid, (u64)fl->fl_nspid, | ||
96 | CEPH_LOCK_UNLOCK, fl->fl_start, | ||
97 | length, 0); | ||
98 | dout("got %d on posix_lock_file, undid lock", err); | ||
99 | } | ||
100 | } else { | ||
101 | dout("mds returned error code %d", err); | ||
102 | } | ||
103 | return err; | ||
104 | } | ||
105 | |||
106 | int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | ||
107 | { | ||
108 | u64 length; | ||
109 | u8 lock_cmd; | ||
110 | int err; | ||
111 | u8 wait = 1; | ||
112 | |||
113 | fl->fl_nspid = get_pid(task_tgid(current)); | ||
114 | dout("ceph_flock, fl_pid:%d", fl->fl_pid); | ||
115 | |||
116 | /* set wait bit, then clear it out of cmd*/ | ||
117 | if (cmd & LOCK_NB) | ||
118 | wait = 0; | ||
119 | cmd = cmd & (LOCK_SH | LOCK_EX | LOCK_UN); | ||
120 | /* set command sequence that Ceph wants to see: | ||
121 | shared lock, exclusive lock, or unlock */ | ||
122 | if (LOCK_SH == cmd) | ||
123 | lock_cmd = CEPH_LOCK_SHARED; | ||
124 | else if (LOCK_EX == cmd) | ||
125 | lock_cmd = CEPH_LOCK_EXCL; | ||
126 | else | ||
127 | lock_cmd = CEPH_LOCK_UNLOCK; | ||
128 | /* mds requires start and length rather than start and end */ | ||
129 | if (LLONG_MAX == fl->fl_end) | ||
130 | length = 0; | ||
131 | else | ||
132 | length = fl->fl_end - fl->fl_start + 1; | ||
133 | |||
134 | err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, | ||
135 | file, (u64)fl->fl_pid, (u64)fl->fl_nspid, | ||
136 | lock_cmd, fl->fl_start, | ||
137 | length, wait); | ||
138 | if (!err) { | ||
139 | err = flock_lock_file_wait(file, fl); | ||
140 | if (err) { | ||
141 | ceph_lock_message(CEPH_LOCK_FLOCK, | ||
142 | CEPH_MDS_OP_SETFILELOCK, | ||
143 | file, (u64)fl->fl_pid, | ||
144 | (u64)fl->fl_nspid, | ||
145 | CEPH_LOCK_UNLOCK, fl->fl_start, | ||
146 | length, 0); | ||
147 | dout("got %d on flock_lock_file_wait, undid lock", err); | ||
148 | } | ||
149 | } else { | ||
150 | dout("mds error code %d", err); | ||
151 | } | ||
152 | return err; | ||
153 | } | ||
154 | |||
155 | /** | ||
156 | * Must be called with BKL already held. Fills in the passed | ||
157 | * counter variables, so you can prepare pagelist metadata before calling | ||
158 | * ceph_encode_locks. | ||
159 | */ | ||
160 | void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) | ||
161 | { | ||
162 | struct file_lock *lock; | ||
163 | |||
164 | *fcntl_count = 0; | ||
165 | *flock_count = 0; | ||
166 | |||
167 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | ||
168 | if (lock->fl_flags & FL_POSIX) | ||
169 | ++(*fcntl_count); | ||
170 | else if (lock->fl_flags & FL_FLOCK) | ||
171 | ++(*flock_count); | ||
172 | } | ||
173 | dout("counted %d flock locks and %d fcntl locks", | ||
174 | *flock_count, *fcntl_count); | ||
175 | } | ||
176 | |||
177 | /** | ||
178 | * Encode the flock and fcntl locks for the given inode into the pagelist. | ||
179 | * Format is: #fcntl locks, sequential fcntl locks, #flock locks, | ||
180 | * sequential flock locks. | ||
181 | * Must be called with BLK already held, and the lock numbers should have | ||
182 | * been gathered under the same lock holding window. | ||
183 | */ | ||
184 | int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, | ||
185 | int num_fcntl_locks, int num_flock_locks) | ||
186 | { | ||
187 | struct file_lock *lock; | ||
188 | struct ceph_filelock cephlock; | ||
189 | int err = 0; | ||
190 | |||
191 | dout("encoding %d flock and %d fcntl locks", num_flock_locks, | ||
192 | num_fcntl_locks); | ||
193 | err = ceph_pagelist_append(pagelist, &num_fcntl_locks, sizeof(u32)); | ||
194 | if (err) | ||
195 | goto fail; | ||
196 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | ||
197 | if (lock->fl_flags & FL_POSIX) { | ||
198 | err = lock_to_ceph_filelock(lock, &cephlock); | ||
199 | if (err) | ||
200 | goto fail; | ||
201 | err = ceph_pagelist_append(pagelist, &cephlock, | ||
202 | sizeof(struct ceph_filelock)); | ||
203 | } | ||
204 | if (err) | ||
205 | goto fail; | ||
206 | } | ||
207 | |||
208 | err = ceph_pagelist_append(pagelist, &num_flock_locks, sizeof(u32)); | ||
209 | if (err) | ||
210 | goto fail; | ||
211 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | ||
212 | if (lock->fl_flags & FL_FLOCK) { | ||
213 | err = lock_to_ceph_filelock(lock, &cephlock); | ||
214 | if (err) | ||
215 | goto fail; | ||
216 | err = ceph_pagelist_append(pagelist, &cephlock, | ||
217 | sizeof(struct ceph_filelock)); | ||
218 | } | ||
219 | if (err) | ||
220 | goto fail; | ||
221 | } | ||
222 | fail: | ||
223 | return err; | ||
224 | } | ||
225 | |||
226 | /* | ||
227 | * Given a pointer to a lock, convert it to a ceph filelock | ||
228 | */ | ||
229 | int lock_to_ceph_filelock(struct file_lock *lock, | ||
230 | struct ceph_filelock *cephlock) | ||
231 | { | ||
232 | int err = 0; | ||
233 | |||
234 | cephlock->start = cpu_to_le64(lock->fl_start); | ||
235 | cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); | ||
236 | cephlock->client = cpu_to_le64(0); | ||
237 | cephlock->pid = cpu_to_le64(lock->fl_pid); | ||
238 | cephlock->pid_namespace = cpu_to_le64((u64)lock->fl_nspid); | ||
239 | |||
240 | switch (lock->fl_type) { | ||
241 | case F_RDLCK: | ||
242 | cephlock->type = CEPH_LOCK_SHARED; | ||
243 | break; | ||
244 | case F_WRLCK: | ||
245 | cephlock->type = CEPH_LOCK_EXCL; | ||
246 | break; | ||
247 | case F_UNLCK: | ||
248 | cephlock->type = CEPH_LOCK_UNLOCK; | ||
249 | break; | ||
250 | default: | ||
251 | dout("Have unknown lock type %d", lock->fl_type); | ||
252 | err = -EINVAL; | ||
253 | } | ||
254 | |||
255 | return err; | ||
256 | } | ||
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index dd440bd438a9..a75ddbf9fe37 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/wait.h> | 3 | #include <linux/wait.h> |
4 | #include <linux/slab.h> | 4 | #include <linux/slab.h> |
5 | #include <linux/sched.h> | 5 | #include <linux/sched.h> |
6 | #include <linux/smp_lock.h> | ||
6 | 7 | ||
7 | #include "mds_client.h" | 8 | #include "mds_client.h" |
8 | #include "mon_client.h" | 9 | #include "mon_client.h" |
@@ -37,6 +38,11 @@ | |||
37 | * are no longer valid. | 38 | * are no longer valid. |
38 | */ | 39 | */ |
39 | 40 | ||
41 | struct ceph_reconnect_state { | ||
42 | struct ceph_pagelist *pagelist; | ||
43 | bool flock; | ||
44 | }; | ||
45 | |||
40 | static void __wake_requests(struct ceph_mds_client *mdsc, | 46 | static void __wake_requests(struct ceph_mds_client *mdsc, |
41 | struct list_head *head); | 47 | struct list_head *head); |
42 | 48 | ||
@@ -449,7 +455,7 @@ void ceph_mdsc_release_request(struct kref *kref) | |||
449 | kfree(req->r_path1); | 455 | kfree(req->r_path1); |
450 | kfree(req->r_path2); | 456 | kfree(req->r_path2); |
451 | put_request_session(req); | 457 | put_request_session(req); |
452 | ceph_unreserve_caps(&req->r_caps_reservation); | 458 | ceph_unreserve_caps(req->r_mdsc, &req->r_caps_reservation); |
453 | kfree(req); | 459 | kfree(req); |
454 | } | 460 | } |
455 | 461 | ||
@@ -512,7 +518,8 @@ static void __register_request(struct ceph_mds_client *mdsc, | |||
512 | { | 518 | { |
513 | req->r_tid = ++mdsc->last_tid; | 519 | req->r_tid = ++mdsc->last_tid; |
514 | if (req->r_num_caps) | 520 | if (req->r_num_caps) |
515 | ceph_reserve_caps(&req->r_caps_reservation, req->r_num_caps); | 521 | ceph_reserve_caps(mdsc, &req->r_caps_reservation, |
522 | req->r_num_caps); | ||
516 | dout("__register_request %p tid %lld\n", req, req->r_tid); | 523 | dout("__register_request %p tid %lld\n", req, req->r_tid); |
517 | ceph_mdsc_get_request(req); | 524 | ceph_mdsc_get_request(req); |
518 | __insert_request(mdsc, req); | 525 | __insert_request(mdsc, req); |
@@ -704,6 +711,51 @@ static int __open_session(struct ceph_mds_client *mdsc, | |||
704 | } | 711 | } |
705 | 712 | ||
706 | /* | 713 | /* |
714 | * open sessions for any export targets for the given mds | ||
715 | * | ||
716 | * called under mdsc->mutex | ||
717 | */ | ||
718 | static void __open_export_target_sessions(struct ceph_mds_client *mdsc, | ||
719 | struct ceph_mds_session *session) | ||
720 | { | ||
721 | struct ceph_mds_info *mi; | ||
722 | struct ceph_mds_session *ts; | ||
723 | int i, mds = session->s_mds; | ||
724 | int target; | ||
725 | |||
726 | if (mds >= mdsc->mdsmap->m_max_mds) | ||
727 | return; | ||
728 | mi = &mdsc->mdsmap->m_info[mds]; | ||
729 | dout("open_export_target_sessions for mds%d (%d targets)\n", | ||
730 | session->s_mds, mi->num_export_targets); | ||
731 | |||
732 | for (i = 0; i < mi->num_export_targets; i++) { | ||
733 | target = mi->export_targets[i]; | ||
734 | ts = __ceph_lookup_mds_session(mdsc, target); | ||
735 | if (!ts) { | ||
736 | ts = register_session(mdsc, target); | ||
737 | if (IS_ERR(ts)) | ||
738 | return; | ||
739 | } | ||
740 | if (session->s_state == CEPH_MDS_SESSION_NEW || | ||
741 | session->s_state == CEPH_MDS_SESSION_CLOSING) | ||
742 | __open_session(mdsc, session); | ||
743 | else | ||
744 | dout(" mds%d target mds%d %p is %s\n", session->s_mds, | ||
745 | i, ts, session_state_name(ts->s_state)); | ||
746 | ceph_put_mds_session(ts); | ||
747 | } | ||
748 | } | ||
749 | |||
750 | void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc, | ||
751 | struct ceph_mds_session *session) | ||
752 | { | ||
753 | mutex_lock(&mdsc->mutex); | ||
754 | __open_export_target_sessions(mdsc, session); | ||
755 | mutex_unlock(&mdsc->mutex); | ||
756 | } | ||
757 | |||
758 | /* | ||
707 | * session caps | 759 | * session caps |
708 | */ | 760 | */ |
709 | 761 | ||
@@ -764,7 +816,7 @@ static int iterate_session_caps(struct ceph_mds_session *session, | |||
764 | last_inode = NULL; | 816 | last_inode = NULL; |
765 | } | 817 | } |
766 | if (old_cap) { | 818 | if (old_cap) { |
767 | ceph_put_cap(old_cap); | 819 | ceph_put_cap(session->s_mdsc, old_cap); |
768 | old_cap = NULL; | 820 | old_cap = NULL; |
769 | } | 821 | } |
770 | 822 | ||
@@ -793,7 +845,7 @@ out: | |||
793 | if (last_inode) | 845 | if (last_inode) |
794 | iput(last_inode); | 846 | iput(last_inode); |
795 | if (old_cap) | 847 | if (old_cap) |
796 | ceph_put_cap(old_cap); | 848 | ceph_put_cap(session->s_mdsc, old_cap); |
797 | 849 | ||
798 | return ret; | 850 | return ret; |
799 | } | 851 | } |
@@ -1067,15 +1119,16 @@ static int trim_caps(struct ceph_mds_client *mdsc, | |||
1067 | * Called under s_mutex. | 1119 | * Called under s_mutex. |
1068 | */ | 1120 | */ |
1069 | int ceph_add_cap_releases(struct ceph_mds_client *mdsc, | 1121 | int ceph_add_cap_releases(struct ceph_mds_client *mdsc, |
1070 | struct ceph_mds_session *session, | 1122 | struct ceph_mds_session *session) |
1071 | int extra) | ||
1072 | { | 1123 | { |
1073 | struct ceph_msg *msg; | 1124 | struct ceph_msg *msg, *partial = NULL; |
1074 | struct ceph_mds_cap_release *head; | 1125 | struct ceph_mds_cap_release *head; |
1075 | int err = -ENOMEM; | 1126 | int err = -ENOMEM; |
1127 | int extra = mdsc->client->mount_args->cap_release_safety; | ||
1128 | int num; | ||
1076 | 1129 | ||
1077 | if (extra < 0) | 1130 | dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds, |
1078 | extra = mdsc->client->mount_args->cap_release_safety; | 1131 | extra); |
1079 | 1132 | ||
1080 | spin_lock(&session->s_cap_lock); | 1133 | spin_lock(&session->s_cap_lock); |
1081 | 1134 | ||
@@ -1084,9 +1137,14 @@ int ceph_add_cap_releases(struct ceph_mds_client *mdsc, | |||
1084 | struct ceph_msg, | 1137 | struct ceph_msg, |
1085 | list_head); | 1138 | list_head); |
1086 | head = msg->front.iov_base; | 1139 | head = msg->front.iov_base; |
1087 | extra += CEPH_CAPS_PER_RELEASE - le32_to_cpu(head->num); | 1140 | num = le32_to_cpu(head->num); |
1141 | if (num) { | ||
1142 | dout(" partial %p with (%d/%d)\n", msg, num, | ||
1143 | (int)CEPH_CAPS_PER_RELEASE); | ||
1144 | extra += CEPH_CAPS_PER_RELEASE - num; | ||
1145 | partial = msg; | ||
1146 | } | ||
1088 | } | 1147 | } |
1089 | |||
1090 | while (session->s_num_cap_releases < session->s_nr_caps + extra) { | 1148 | while (session->s_num_cap_releases < session->s_nr_caps + extra) { |
1091 | spin_unlock(&session->s_cap_lock); | 1149 | spin_unlock(&session->s_cap_lock); |
1092 | msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE, | 1150 | msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE, |
@@ -1103,19 +1161,14 @@ int ceph_add_cap_releases(struct ceph_mds_client *mdsc, | |||
1103 | session->s_num_cap_releases += CEPH_CAPS_PER_RELEASE; | 1161 | session->s_num_cap_releases += CEPH_CAPS_PER_RELEASE; |
1104 | } | 1162 | } |
1105 | 1163 | ||
1106 | if (!list_empty(&session->s_cap_releases)) { | 1164 | if (partial) { |
1107 | msg = list_first_entry(&session->s_cap_releases, | 1165 | head = partial->front.iov_base; |
1108 | struct ceph_msg, | 1166 | num = le32_to_cpu(head->num); |
1109 | list_head); | 1167 | dout(" queueing partial %p with %d/%d\n", partial, num, |
1110 | head = msg->front.iov_base; | 1168 | (int)CEPH_CAPS_PER_RELEASE); |
1111 | if (head->num) { | 1169 | list_move_tail(&partial->list_head, |
1112 | dout(" queueing non-full %p (%d)\n", msg, | 1170 | &session->s_cap_releases_done); |
1113 | le32_to_cpu(head->num)); | 1171 | session->s_num_cap_releases -= CEPH_CAPS_PER_RELEASE - num; |
1114 | list_move_tail(&msg->list_head, | ||
1115 | &session->s_cap_releases_done); | ||
1116 | session->s_num_cap_releases -= | ||
1117 | CEPH_CAPS_PER_RELEASE - le32_to_cpu(head->num); | ||
1118 | } | ||
1119 | } | 1172 | } |
1120 | err = 0; | 1173 | err = 0; |
1121 | spin_unlock(&session->s_cap_lock); | 1174 | spin_unlock(&session->s_cap_lock); |
@@ -1250,6 +1303,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) | |||
1250 | return ERR_PTR(-ENOMEM); | 1303 | return ERR_PTR(-ENOMEM); |
1251 | 1304 | ||
1252 | mutex_init(&req->r_fill_mutex); | 1305 | mutex_init(&req->r_fill_mutex); |
1306 | req->r_mdsc = mdsc; | ||
1253 | req->r_started = jiffies; | 1307 | req->r_started = jiffies; |
1254 | req->r_resend_mds = -1; | 1308 | req->r_resend_mds = -1; |
1255 | INIT_LIST_HEAD(&req->r_unsafe_dir_item); | 1309 | INIT_LIST_HEAD(&req->r_unsafe_dir_item); |
@@ -1580,6 +1634,15 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, | |||
1580 | 1634 | ||
1581 | req->r_mds = mds; | 1635 | req->r_mds = mds; |
1582 | req->r_attempts++; | 1636 | req->r_attempts++; |
1637 | if (req->r_inode) { | ||
1638 | struct ceph_cap *cap = | ||
1639 | ceph_get_cap_for_mds(ceph_inode(req->r_inode), mds); | ||
1640 | |||
1641 | if (cap) | ||
1642 | req->r_sent_on_mseq = cap->mseq; | ||
1643 | else | ||
1644 | req->r_sent_on_mseq = -1; | ||
1645 | } | ||
1583 | dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req, | 1646 | dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req, |
1584 | req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts); | 1647 | req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts); |
1585 | 1648 | ||
@@ -1914,21 +1977,40 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
1914 | result = le32_to_cpu(head->result); | 1977 | result = le32_to_cpu(head->result); |
1915 | 1978 | ||
1916 | /* | 1979 | /* |
1917 | * Tolerate 2 consecutive ESTALEs from the same mds. | 1980 | * Handle an ESTALE |
1918 | * FIXME: we should be looking at the cap migrate_seq. | 1981 | * if we're not talking to the authority, send to them |
1982 | * if the authority has changed while we weren't looking, | ||
1983 | * send to new authority | ||
1984 | * Otherwise we just have to return an ESTALE | ||
1919 | */ | 1985 | */ |
1920 | if (result == -ESTALE) { | 1986 | if (result == -ESTALE) { |
1921 | req->r_direct_mode = USE_AUTH_MDS; | 1987 | dout("got ESTALE on request %llu", req->r_tid); |
1922 | req->r_num_stale++; | 1988 | if (!req->r_inode) { |
1923 | if (req->r_num_stale <= 2) { | 1989 | /* do nothing; not an authority problem */ |
1990 | } else if (req->r_direct_mode != USE_AUTH_MDS) { | ||
1991 | dout("not using auth, setting for that now"); | ||
1992 | req->r_direct_mode = USE_AUTH_MDS; | ||
1924 | __do_request(mdsc, req); | 1993 | __do_request(mdsc, req); |
1925 | mutex_unlock(&mdsc->mutex); | 1994 | mutex_unlock(&mdsc->mutex); |
1926 | goto out; | 1995 | goto out; |
1996 | } else { | ||
1997 | struct ceph_inode_info *ci = ceph_inode(req->r_inode); | ||
1998 | struct ceph_cap *cap = | ||
1999 | ceph_get_cap_for_mds(ci, req->r_mds);; | ||
2000 | |||
2001 | dout("already using auth"); | ||
2002 | if ((!cap || cap != ci->i_auth_cap) || | ||
2003 | (cap->mseq != req->r_sent_on_mseq)) { | ||
2004 | dout("but cap changed, so resending"); | ||
2005 | __do_request(mdsc, req); | ||
2006 | mutex_unlock(&mdsc->mutex); | ||
2007 | goto out; | ||
2008 | } | ||
1927 | } | 2009 | } |
1928 | } else { | 2010 | dout("have to return ESTALE on request %llu", req->r_tid); |
1929 | req->r_num_stale = 0; | ||
1930 | } | 2011 | } |
1931 | 2012 | ||
2013 | |||
1932 | if (head->safe) { | 2014 | if (head->safe) { |
1933 | req->r_got_safe = true; | 2015 | req->r_got_safe = true; |
1934 | __unregister_request(mdsc, req); | 2016 | __unregister_request(mdsc, req); |
@@ -1985,7 +2067,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
1985 | if (err == 0) { | 2067 | if (err == 0) { |
1986 | if (result == 0 && rinfo->dir_nr) | 2068 | if (result == 0 && rinfo->dir_nr) |
1987 | ceph_readdir_prepopulate(req, req->r_session); | 2069 | ceph_readdir_prepopulate(req, req->r_session); |
1988 | ceph_unreserve_caps(&req->r_caps_reservation); | 2070 | ceph_unreserve_caps(mdsc, &req->r_caps_reservation); |
1989 | } | 2071 | } |
1990 | mutex_unlock(&req->r_fill_mutex); | 2072 | mutex_unlock(&req->r_fill_mutex); |
1991 | 2073 | ||
@@ -2005,7 +2087,7 @@ out_err: | |||
2005 | } | 2087 | } |
2006 | mutex_unlock(&mdsc->mutex); | 2088 | mutex_unlock(&mdsc->mutex); |
2007 | 2089 | ||
2008 | ceph_add_cap_releases(mdsc, req->r_session, -1); | 2090 | ceph_add_cap_releases(mdsc, req->r_session); |
2009 | mutex_unlock(&session->s_mutex); | 2091 | mutex_unlock(&session->s_mutex); |
2010 | 2092 | ||
2011 | /* kick calling process */ | 2093 | /* kick calling process */ |
@@ -2193,9 +2275,14 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc, | |||
2193 | static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | 2275 | static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, |
2194 | void *arg) | 2276 | void *arg) |
2195 | { | 2277 | { |
2196 | struct ceph_mds_cap_reconnect rec; | 2278 | union { |
2279 | struct ceph_mds_cap_reconnect v2; | ||
2280 | struct ceph_mds_cap_reconnect_v1 v1; | ||
2281 | } rec; | ||
2282 | size_t reclen; | ||
2197 | struct ceph_inode_info *ci; | 2283 | struct ceph_inode_info *ci; |
2198 | struct ceph_pagelist *pagelist = arg; | 2284 | struct ceph_reconnect_state *recon_state = arg; |
2285 | struct ceph_pagelist *pagelist = recon_state->pagelist; | ||
2199 | char *path; | 2286 | char *path; |
2200 | int pathlen, err; | 2287 | int pathlen, err; |
2201 | u64 pathbase; | 2288 | u64 pathbase; |
@@ -2228,17 +2315,44 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
2228 | spin_lock(&inode->i_lock); | 2315 | spin_lock(&inode->i_lock); |
2229 | cap->seq = 0; /* reset cap seq */ | 2316 | cap->seq = 0; /* reset cap seq */ |
2230 | cap->issue_seq = 0; /* and issue_seq */ | 2317 | cap->issue_seq = 0; /* and issue_seq */ |
2231 | rec.cap_id = cpu_to_le64(cap->cap_id); | 2318 | |
2232 | rec.pathbase = cpu_to_le64(pathbase); | 2319 | if (recon_state->flock) { |
2233 | rec.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); | 2320 | rec.v2.cap_id = cpu_to_le64(cap->cap_id); |
2234 | rec.issued = cpu_to_le32(cap->issued); | 2321 | rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); |
2235 | rec.size = cpu_to_le64(inode->i_size); | 2322 | rec.v2.issued = cpu_to_le32(cap->issued); |
2236 | ceph_encode_timespec(&rec.mtime, &inode->i_mtime); | 2323 | rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); |
2237 | ceph_encode_timespec(&rec.atime, &inode->i_atime); | 2324 | rec.v2.pathbase = cpu_to_le64(pathbase); |
2238 | rec.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); | 2325 | rec.v2.flock_len = 0; |
2326 | reclen = sizeof(rec.v2); | ||
2327 | } else { | ||
2328 | rec.v1.cap_id = cpu_to_le64(cap->cap_id); | ||
2329 | rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); | ||
2330 | rec.v1.issued = cpu_to_le32(cap->issued); | ||
2331 | rec.v1.size = cpu_to_le64(inode->i_size); | ||
2332 | ceph_encode_timespec(&rec.v1.mtime, &inode->i_mtime); | ||
2333 | ceph_encode_timespec(&rec.v1.atime, &inode->i_atime); | ||
2334 | rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); | ||
2335 | rec.v1.pathbase = cpu_to_le64(pathbase); | ||
2336 | reclen = sizeof(rec.v1); | ||
2337 | } | ||
2239 | spin_unlock(&inode->i_lock); | 2338 | spin_unlock(&inode->i_lock); |
2240 | 2339 | ||
2241 | err = ceph_pagelist_append(pagelist, &rec, sizeof(rec)); | 2340 | if (recon_state->flock) { |
2341 | int num_fcntl_locks, num_flock_locks; | ||
2342 | |||
2343 | lock_kernel(); | ||
2344 | ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); | ||
2345 | rec.v2.flock_len = (2*sizeof(u32) + | ||
2346 | (num_fcntl_locks+num_flock_locks) * | ||
2347 | sizeof(struct ceph_filelock)); | ||
2348 | |||
2349 | err = ceph_pagelist_append(pagelist, &rec, reclen); | ||
2350 | if (!err) | ||
2351 | err = ceph_encode_locks(inode, pagelist, | ||
2352 | num_fcntl_locks, | ||
2353 | num_flock_locks); | ||
2354 | unlock_kernel(); | ||
2355 | } | ||
2242 | 2356 | ||
2243 | out: | 2357 | out: |
2244 | kfree(path); | 2358 | kfree(path); |
@@ -2267,6 +2381,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
2267 | int mds = session->s_mds; | 2381 | int mds = session->s_mds; |
2268 | int err = -ENOMEM; | 2382 | int err = -ENOMEM; |
2269 | struct ceph_pagelist *pagelist; | 2383 | struct ceph_pagelist *pagelist; |
2384 | struct ceph_reconnect_state recon_state; | ||
2270 | 2385 | ||
2271 | pr_info("mds%d reconnect start\n", mds); | 2386 | pr_info("mds%d reconnect start\n", mds); |
2272 | 2387 | ||
@@ -2301,7 +2416,10 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
2301 | err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps); | 2416 | err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps); |
2302 | if (err) | 2417 | if (err) |
2303 | goto fail; | 2418 | goto fail; |
2304 | err = iterate_session_caps(session, encode_caps_cb, pagelist); | 2419 | |
2420 | recon_state.pagelist = pagelist; | ||
2421 | recon_state.flock = session->s_con.peer_features & CEPH_FEATURE_FLOCK; | ||
2422 | err = iterate_session_caps(session, encode_caps_cb, &recon_state); | ||
2305 | if (err < 0) | 2423 | if (err < 0) |
2306 | goto fail; | 2424 | goto fail; |
2307 | 2425 | ||
@@ -2326,6 +2444,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
2326 | } | 2444 | } |
2327 | 2445 | ||
2328 | reply->pagelist = pagelist; | 2446 | reply->pagelist = pagelist; |
2447 | if (recon_state.flock) | ||
2448 | reply->hdr.version = cpu_to_le16(2); | ||
2329 | reply->hdr.data_len = cpu_to_le32(pagelist->length); | 2449 | reply->hdr.data_len = cpu_to_le32(pagelist->length); |
2330 | reply->nr_pages = calc_pages_for(0, pagelist->length); | 2450 | reply->nr_pages = calc_pages_for(0, pagelist->length); |
2331 | ceph_con_send(&session->s_con, reply); | 2451 | ceph_con_send(&session->s_con, reply); |
@@ -2376,9 +2496,11 @@ static void check_new_map(struct ceph_mds_client *mdsc, | |||
2376 | oldstate = ceph_mdsmap_get_state(oldmap, i); | 2496 | oldstate = ceph_mdsmap_get_state(oldmap, i); |
2377 | newstate = ceph_mdsmap_get_state(newmap, i); | 2497 | newstate = ceph_mdsmap_get_state(newmap, i); |
2378 | 2498 | ||
2379 | dout("check_new_map mds%d state %s -> %s (session %s)\n", | 2499 | dout("check_new_map mds%d state %s%s -> %s%s (session %s)\n", |
2380 | i, ceph_mds_state_name(oldstate), | 2500 | i, ceph_mds_state_name(oldstate), |
2501 | ceph_mdsmap_is_laggy(oldmap, i) ? " (laggy)" : "", | ||
2381 | ceph_mds_state_name(newstate), | 2502 | ceph_mds_state_name(newstate), |
2503 | ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "", | ||
2382 | session_state_name(s->s_state)); | 2504 | session_state_name(s->s_state)); |
2383 | 2505 | ||
2384 | if (memcmp(ceph_mdsmap_get_addr(oldmap, i), | 2506 | if (memcmp(ceph_mdsmap_get_addr(oldmap, i), |
@@ -2428,6 +2550,21 @@ static void check_new_map(struct ceph_mds_client *mdsc, | |||
2428 | wake_up_session_caps(s, 1); | 2550 | wake_up_session_caps(s, 1); |
2429 | } | 2551 | } |
2430 | } | 2552 | } |
2553 | |||
2554 | for (i = 0; i < newmap->m_max_mds && i < mdsc->max_sessions; i++) { | ||
2555 | s = mdsc->sessions[i]; | ||
2556 | if (!s) | ||
2557 | continue; | ||
2558 | if (!ceph_mdsmap_is_laggy(newmap, i)) | ||
2559 | continue; | ||
2560 | if (s->s_state == CEPH_MDS_SESSION_OPEN || | ||
2561 | s->s_state == CEPH_MDS_SESSION_HUNG || | ||
2562 | s->s_state == CEPH_MDS_SESSION_CLOSING) { | ||
2563 | dout(" connecting to export targets of laggy mds%d\n", | ||
2564 | i); | ||
2565 | __open_export_target_sessions(mdsc, s); | ||
2566 | } | ||
2567 | } | ||
2431 | } | 2568 | } |
2432 | 2569 | ||
2433 | 2570 | ||
@@ -2715,7 +2852,7 @@ static void delayed_work(struct work_struct *work) | |||
2715 | send_renew_caps(mdsc, s); | 2852 | send_renew_caps(mdsc, s); |
2716 | else | 2853 | else |
2717 | ceph_con_keepalive(&s->s_con); | 2854 | ceph_con_keepalive(&s->s_con); |
2718 | ceph_add_cap_releases(mdsc, s, -1); | 2855 | ceph_add_cap_releases(mdsc, s); |
2719 | if (s->s_state == CEPH_MDS_SESSION_OPEN || | 2856 | if (s->s_state == CEPH_MDS_SESSION_OPEN || |
2720 | s->s_state == CEPH_MDS_SESSION_HUNG) | 2857 | s->s_state == CEPH_MDS_SESSION_HUNG) |
2721 | ceph_send_cap_releases(mdsc, s); | 2858 | ceph_send_cap_releases(mdsc, s); |
@@ -2764,6 +2901,9 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | |||
2764 | spin_lock_init(&mdsc->dentry_lru_lock); | 2901 | spin_lock_init(&mdsc->dentry_lru_lock); |
2765 | INIT_LIST_HEAD(&mdsc->dentry_lru); | 2902 | INIT_LIST_HEAD(&mdsc->dentry_lru); |
2766 | 2903 | ||
2904 | ceph_caps_init(mdsc); | ||
2905 | ceph_adjust_min_caps(mdsc, client->min_caps); | ||
2906 | |||
2767 | return 0; | 2907 | return 0; |
2768 | } | 2908 | } |
2769 | 2909 | ||
@@ -2959,6 +3099,7 @@ void ceph_mdsc_stop(struct ceph_mds_client *mdsc) | |||
2959 | if (mdsc->mdsmap) | 3099 | if (mdsc->mdsmap) |
2960 | ceph_mdsmap_destroy(mdsc->mdsmap); | 3100 | ceph_mdsmap_destroy(mdsc->mdsmap); |
2961 | kfree(mdsc->sessions); | 3101 | kfree(mdsc->sessions); |
3102 | ceph_caps_finalize(mdsc); | ||
2962 | } | 3103 | } |
2963 | 3104 | ||
2964 | 3105 | ||
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 952410c60d09..ab7e89f5e344 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -151,6 +151,7 @@ typedef void (*ceph_mds_request_callback_t) (struct ceph_mds_client *mdsc, | |||
151 | struct ceph_mds_request { | 151 | struct ceph_mds_request { |
152 | u64 r_tid; /* transaction id */ | 152 | u64 r_tid; /* transaction id */ |
153 | struct rb_node r_node; | 153 | struct rb_node r_node; |
154 | struct ceph_mds_client *r_mdsc; | ||
154 | 155 | ||
155 | int r_op; /* mds op code */ | 156 | int r_op; /* mds op code */ |
156 | int r_mds; | 157 | int r_mds; |
@@ -207,8 +208,8 @@ struct ceph_mds_request { | |||
207 | 208 | ||
208 | int r_attempts; /* resend attempts */ | 209 | int r_attempts; /* resend attempts */ |
209 | int r_num_fwd; /* number of forward attempts */ | 210 | int r_num_fwd; /* number of forward attempts */ |
210 | int r_num_stale; | ||
211 | int r_resend_mds; /* mds to resend to next, if any*/ | 211 | int r_resend_mds; /* mds to resend to next, if any*/ |
212 | u32 r_sent_on_mseq; /* cap mseq request was sent at*/ | ||
212 | 213 | ||
213 | struct kref r_kref; | 214 | struct kref r_kref; |
214 | struct list_head r_wait; | 215 | struct list_head r_wait; |
@@ -267,6 +268,27 @@ struct ceph_mds_client { | |||
267 | spinlock_t cap_dirty_lock; /* protects above items */ | 268 | spinlock_t cap_dirty_lock; /* protects above items */ |
268 | wait_queue_head_t cap_flushing_wq; | 269 | wait_queue_head_t cap_flushing_wq; |
269 | 270 | ||
271 | /* | ||
272 | * Cap reservations | ||
273 | * | ||
274 | * Maintain a global pool of preallocated struct ceph_caps, referenced | ||
275 | * by struct ceph_caps_reservations. This ensures that we preallocate | ||
276 | * memory needed to successfully process an MDS response. (If an MDS | ||
277 | * sends us cap information and we fail to process it, we will have | ||
278 | * problems due to the client and MDS being out of sync.) | ||
279 | * | ||
280 | * Reservations are 'owned' by a ceph_cap_reservation context. | ||
281 | */ | ||
282 | spinlock_t caps_list_lock; | ||
283 | struct list_head caps_list; /* unused (reserved or | ||
284 | unreserved) */ | ||
285 | int caps_total_count; /* total caps allocated */ | ||
286 | int caps_use_count; /* in use */ | ||
287 | int caps_reserve_count; /* unused, reserved */ | ||
288 | int caps_avail_count; /* unused, unreserved */ | ||
289 | int caps_min_count; /* keep at least this many | ||
290 | (unreserved) */ | ||
291 | |||
270 | #ifdef CONFIG_DEBUG_FS | 292 | #ifdef CONFIG_DEBUG_FS |
271 | struct dentry *debugfs_file; | 293 | struct dentry *debugfs_file; |
272 | #endif | 294 | #endif |
@@ -324,8 +346,7 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req) | |||
324 | } | 346 | } |
325 | 347 | ||
326 | extern int ceph_add_cap_releases(struct ceph_mds_client *mdsc, | 348 | extern int ceph_add_cap_releases(struct ceph_mds_client *mdsc, |
327 | struct ceph_mds_session *session, | 349 | struct ceph_mds_session *session); |
328 | int extra); | ||
329 | extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc, | 350 | extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc, |
330 | struct ceph_mds_session *session); | 351 | struct ceph_mds_session *session); |
331 | 352 | ||
@@ -343,4 +364,7 @@ extern void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, | |||
343 | extern void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, | 364 | extern void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, |
344 | struct ceph_msg *msg); | 365 | struct ceph_msg *msg); |
345 | 366 | ||
367 | extern void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc, | ||
368 | struct ceph_mds_session *session); | ||
369 | |||
346 | #endif | 370 | #endif |
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index c4c498e6dfef..040be6d1150b 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c | |||
@@ -85,6 +85,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
85 | struct ceph_entity_addr addr; | 85 | struct ceph_entity_addr addr; |
86 | u32 num_export_targets; | 86 | u32 num_export_targets; |
87 | void *pexport_targets = NULL; | 87 | void *pexport_targets = NULL; |
88 | struct ceph_timespec laggy_since; | ||
88 | 89 | ||
89 | ceph_decode_need(p, end, sizeof(u64)*2 + 1 + sizeof(u32), bad); | 90 | ceph_decode_need(p, end, sizeof(u64)*2 + 1 + sizeof(u32), bad); |
90 | global_id = ceph_decode_64(p); | 91 | global_id = ceph_decode_64(p); |
@@ -103,7 +104,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
103 | state_seq = ceph_decode_64(p); | 104 | state_seq = ceph_decode_64(p); |
104 | ceph_decode_copy(p, &addr, sizeof(addr)); | 105 | ceph_decode_copy(p, &addr, sizeof(addr)); |
105 | ceph_decode_addr(&addr); | 106 | ceph_decode_addr(&addr); |
106 | *p += sizeof(struct ceph_timespec); | 107 | ceph_decode_copy(p, &laggy_since, sizeof(laggy_since)); |
107 | *p += sizeof(u32); | 108 | *p += sizeof(u32); |
108 | ceph_decode_32_safe(p, end, namelen, bad); | 109 | ceph_decode_32_safe(p, end, namelen, bad); |
109 | *p += namelen; | 110 | *p += namelen; |
@@ -122,6 +123,9 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
122 | m->m_info[mds].global_id = global_id; | 123 | m->m_info[mds].global_id = global_id; |
123 | m->m_info[mds].state = state; | 124 | m->m_info[mds].state = state; |
124 | m->m_info[mds].addr = addr; | 125 | m->m_info[mds].addr = addr; |
126 | m->m_info[mds].laggy = | ||
127 | (laggy_since.tv_sec != 0 || | ||
128 | laggy_since.tv_nsec != 0); | ||
125 | m->m_info[mds].num_export_targets = num_export_targets; | 129 | m->m_info[mds].num_export_targets = num_export_targets; |
126 | if (num_export_targets) { | 130 | if (num_export_targets) { |
127 | m->m_info[mds].export_targets = | 131 | m->m_info[mds].export_targets = |
diff --git a/fs/ceph/mdsmap.h b/fs/ceph/mdsmap.h index eacc131aa5cb..4c5cb0880bba 100644 --- a/fs/ceph/mdsmap.h +++ b/fs/ceph/mdsmap.h | |||
@@ -13,6 +13,7 @@ struct ceph_mds_info { | |||
13 | struct ceph_entity_addr addr; | 13 | struct ceph_entity_addr addr; |
14 | s32 state; | 14 | s32 state; |
15 | int num_export_targets; | 15 | int num_export_targets; |
16 | bool laggy; | ||
16 | u32 *export_targets; | 17 | u32 *export_targets; |
17 | }; | 18 | }; |
18 | 19 | ||
@@ -47,6 +48,13 @@ static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w) | |||
47 | return m->m_info[w].state; | 48 | return m->m_info[w].state; |
48 | } | 49 | } |
49 | 50 | ||
51 | static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w) | ||
52 | { | ||
53 | if (w >= 0 && w < m->m_max_mds) | ||
54 | return m->m_info[w].laggy; | ||
55 | return false; | ||
56 | } | ||
57 | |||
50 | extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); | 58 | extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); |
51 | extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end); | 59 | extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end); |
52 | extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); | 60 | extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); |
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 15167b2daa55..2502d76fcec1 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c | |||
@@ -108,7 +108,7 @@ void ceph_msgr_exit(void) | |||
108 | destroy_workqueue(ceph_msgr_wq); | 108 | destroy_workqueue(ceph_msgr_wq); |
109 | } | 109 | } |
110 | 110 | ||
111 | void ceph_msgr_flush() | 111 | void ceph_msgr_flush(void) |
112 | { | 112 | { |
113 | flush_workqueue(ceph_msgr_wq); | 113 | flush_workqueue(ceph_msgr_wq); |
114 | } | 114 | } |
@@ -647,7 +647,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr, | |||
647 | dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, | 647 | dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, |
648 | con->connect_seq, global_seq, proto); | 648 | con->connect_seq, global_seq, proto); |
649 | 649 | ||
650 | con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED_CLIENT); | 650 | con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED); |
651 | con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); | 651 | con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); |
652 | con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); | 652 | con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); |
653 | con->out_connect.global_seq = cpu_to_le32(global_seq); | 653 | con->out_connect.global_seq = cpu_to_le32(global_seq); |
@@ -1081,11 +1081,11 @@ static int process_banner(struct ceph_connection *con) | |||
1081 | sizeof(con->peer_addr)) != 0 && | 1081 | sizeof(con->peer_addr)) != 0 && |
1082 | !(addr_is_blank(&con->actual_peer_addr.in_addr) && | 1082 | !(addr_is_blank(&con->actual_peer_addr.in_addr) && |
1083 | con->actual_peer_addr.nonce == con->peer_addr.nonce)) { | 1083 | con->actual_peer_addr.nonce == con->peer_addr.nonce)) { |
1084 | pr_warning("wrong peer, want %s/%lld, got %s/%lld\n", | 1084 | pr_warning("wrong peer, want %s/%d, got %s/%d\n", |
1085 | pr_addr(&con->peer_addr.in_addr), | 1085 | pr_addr(&con->peer_addr.in_addr), |
1086 | le64_to_cpu(con->peer_addr.nonce), | 1086 | (int)le32_to_cpu(con->peer_addr.nonce), |
1087 | pr_addr(&con->actual_peer_addr.in_addr), | 1087 | pr_addr(&con->actual_peer_addr.in_addr), |
1088 | le64_to_cpu(con->actual_peer_addr.nonce)); | 1088 | (int)le32_to_cpu(con->actual_peer_addr.nonce)); |
1089 | con->error_msg = "wrong peer at address"; | 1089 | con->error_msg = "wrong peer at address"; |
1090 | return -1; | 1090 | return -1; |
1091 | } | 1091 | } |
@@ -1123,8 +1123,8 @@ static void fail_protocol(struct ceph_connection *con) | |||
1123 | 1123 | ||
1124 | static int process_connect(struct ceph_connection *con) | 1124 | static int process_connect(struct ceph_connection *con) |
1125 | { | 1125 | { |
1126 | u64 sup_feat = CEPH_FEATURE_SUPPORTED_CLIENT; | 1126 | u64 sup_feat = CEPH_FEATURE_SUPPORTED; |
1127 | u64 req_feat = CEPH_FEATURE_REQUIRED_CLIENT; | 1127 | u64 req_feat = CEPH_FEATURE_REQUIRED; |
1128 | u64 server_feat = le64_to_cpu(con->in_reply.features); | 1128 | u64 server_feat = le64_to_cpu(con->in_reply.features); |
1129 | 1129 | ||
1130 | dout("process_connect on %p tag %d\n", con, (int)con->in_tag); | 1130 | dout("process_connect on %p tag %d\n", con, (int)con->in_tag); |
@@ -1302,8 +1302,8 @@ static void process_ack(struct ceph_connection *con) | |||
1302 | 1302 | ||
1303 | 1303 | ||
1304 | static int read_partial_message_section(struct ceph_connection *con, | 1304 | static int read_partial_message_section(struct ceph_connection *con, |
1305 | struct kvec *section, unsigned int sec_len, | 1305 | struct kvec *section, |
1306 | u32 *crc) | 1306 | unsigned int sec_len, u32 *crc) |
1307 | { | 1307 | { |
1308 | int left; | 1308 | int left; |
1309 | int ret; | 1309 | int ret; |
@@ -1434,7 +1434,8 @@ static int read_partial_message(struct ceph_connection *con) | |||
1434 | 1434 | ||
1435 | /* middle */ | 1435 | /* middle */ |
1436 | if (m->middle) { | 1436 | if (m->middle) { |
1437 | ret = read_partial_message_section(con, &m->middle->vec, middle_len, | 1437 | ret = read_partial_message_section(con, &m->middle->vec, |
1438 | middle_len, | ||
1438 | &con->in_middle_crc); | 1439 | &con->in_middle_crc); |
1439 | if (ret <= 0) | 1440 | if (ret <= 0) |
1440 | return ret; | 1441 | return ret; |
@@ -1920,7 +1921,7 @@ out: | |||
1920 | /* | 1921 | /* |
1921 | * in case we faulted due to authentication, invalidate our | 1922 | * in case we faulted due to authentication, invalidate our |
1922 | * current tickets so that we can get new ones. | 1923 | * current tickets so that we can get new ones. |
1923 | */ | 1924 | */ |
1924 | if (con->auth_retry && con->ops->invalidate_authorizer) { | 1925 | if (con->auth_retry && con->ops->invalidate_authorizer) { |
1925 | dout("calling invalidate_authorizer()\n"); | 1926 | dout("calling invalidate_authorizer()\n"); |
1926 | con->ops->invalidate_authorizer(con); | 1927 | con->ops->invalidate_authorizer(con); |
diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index 54fe01c50706..b2a5a3e4a671 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c | |||
@@ -349,7 +349,7 @@ out: | |||
349 | } | 349 | } |
350 | 350 | ||
351 | /* | 351 | /* |
352 | * statfs | 352 | * generic requests (e.g., statfs, poolop) |
353 | */ | 353 | */ |
354 | static struct ceph_mon_generic_request *__lookup_generic_req( | 354 | static struct ceph_mon_generic_request *__lookup_generic_req( |
355 | struct ceph_mon_client *monc, u64 tid) | 355 | struct ceph_mon_client *monc, u64 tid) |
@@ -442,6 +442,35 @@ static struct ceph_msg *get_generic_reply(struct ceph_connection *con, | |||
442 | return m; | 442 | return m; |
443 | } | 443 | } |
444 | 444 | ||
445 | static int do_generic_request(struct ceph_mon_client *monc, | ||
446 | struct ceph_mon_generic_request *req) | ||
447 | { | ||
448 | int err; | ||
449 | |||
450 | /* register request */ | ||
451 | mutex_lock(&monc->mutex); | ||
452 | req->tid = ++monc->last_tid; | ||
453 | req->request->hdr.tid = cpu_to_le64(req->tid); | ||
454 | __insert_generic_request(monc, req); | ||
455 | monc->num_generic_requests++; | ||
456 | ceph_con_send(monc->con, ceph_msg_get(req->request)); | ||
457 | mutex_unlock(&monc->mutex); | ||
458 | |||
459 | err = wait_for_completion_interruptible(&req->completion); | ||
460 | |||
461 | mutex_lock(&monc->mutex); | ||
462 | rb_erase(&req->node, &monc->generic_request_tree); | ||
463 | monc->num_generic_requests--; | ||
464 | mutex_unlock(&monc->mutex); | ||
465 | |||
466 | if (!err) | ||
467 | err = req->result; | ||
468 | return err; | ||
469 | } | ||
470 | |||
471 | /* | ||
472 | * statfs | ||
473 | */ | ||
445 | static void handle_statfs_reply(struct ceph_mon_client *monc, | 474 | static void handle_statfs_reply(struct ceph_mon_client *monc, |
446 | struct ceph_msg *msg) | 475 | struct ceph_msg *msg) |
447 | { | 476 | { |
@@ -468,7 +497,7 @@ static void handle_statfs_reply(struct ceph_mon_client *monc, | |||
468 | return; | 497 | return; |
469 | 498 | ||
470 | bad: | 499 | bad: |
471 | pr_err("corrupt generic reply, no tid\n"); | 500 | pr_err("corrupt generic reply, tid %llu\n", tid); |
472 | ceph_msg_dump(msg); | 501 | ceph_msg_dump(msg); |
473 | } | 502 | } |
474 | 503 | ||
@@ -487,6 +516,7 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) | |||
487 | 516 | ||
488 | kref_init(&req->kref); | 517 | kref_init(&req->kref); |
489 | req->buf = buf; | 518 | req->buf = buf; |
519 | req->buf_len = sizeof(*buf); | ||
490 | init_completion(&req->completion); | 520 | init_completion(&req->completion); |
491 | 521 | ||
492 | err = -ENOMEM; | 522 | err = -ENOMEM; |
@@ -504,33 +534,134 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) | |||
504 | h->monhdr.session_mon_tid = 0; | 534 | h->monhdr.session_mon_tid = 0; |
505 | h->fsid = monc->monmap->fsid; | 535 | h->fsid = monc->monmap->fsid; |
506 | 536 | ||
507 | /* register request */ | 537 | err = do_generic_request(monc, req); |
508 | mutex_lock(&monc->mutex); | ||
509 | req->tid = ++monc->last_tid; | ||
510 | req->request->hdr.tid = cpu_to_le64(req->tid); | ||
511 | __insert_generic_request(monc, req); | ||
512 | monc->num_generic_requests++; | ||
513 | mutex_unlock(&monc->mutex); | ||
514 | 538 | ||
515 | /* send request and wait */ | 539 | out: |
516 | ceph_con_send(monc->con, ceph_msg_get(req->request)); | 540 | kref_put(&req->kref, release_generic_request); |
517 | err = wait_for_completion_interruptible(&req->completion); | 541 | return err; |
542 | } | ||
543 | |||
544 | /* | ||
545 | * pool ops | ||
546 | */ | ||
547 | static int get_poolop_reply_buf(const char *src, size_t src_len, | ||
548 | char *dst, size_t dst_len) | ||
549 | { | ||
550 | u32 buf_len; | ||
551 | |||
552 | if (src_len != sizeof(u32) + dst_len) | ||
553 | return -EINVAL; | ||
554 | |||
555 | buf_len = le32_to_cpu(*(u32 *)src); | ||
556 | if (buf_len != dst_len) | ||
557 | return -EINVAL; | ||
558 | |||
559 | memcpy(dst, src + sizeof(u32), dst_len); | ||
560 | return 0; | ||
561 | } | ||
562 | |||
563 | static void handle_poolop_reply(struct ceph_mon_client *monc, | ||
564 | struct ceph_msg *msg) | ||
565 | { | ||
566 | struct ceph_mon_generic_request *req; | ||
567 | struct ceph_mon_poolop_reply *reply = msg->front.iov_base; | ||
568 | u64 tid = le64_to_cpu(msg->hdr.tid); | ||
569 | |||
570 | if (msg->front.iov_len < sizeof(*reply)) | ||
571 | goto bad; | ||
572 | dout("handle_poolop_reply %p tid %llu\n", msg, tid); | ||
518 | 573 | ||
519 | mutex_lock(&monc->mutex); | 574 | mutex_lock(&monc->mutex); |
520 | rb_erase(&req->node, &monc->generic_request_tree); | 575 | req = __lookup_generic_req(monc, tid); |
521 | monc->num_generic_requests--; | 576 | if (req) { |
577 | if (req->buf_len && | ||
578 | get_poolop_reply_buf(msg->front.iov_base + sizeof(*reply), | ||
579 | msg->front.iov_len - sizeof(*reply), | ||
580 | req->buf, req->buf_len) < 0) { | ||
581 | mutex_unlock(&monc->mutex); | ||
582 | goto bad; | ||
583 | } | ||
584 | req->result = le32_to_cpu(reply->reply_code); | ||
585 | get_generic_request(req); | ||
586 | } | ||
522 | mutex_unlock(&monc->mutex); | 587 | mutex_unlock(&monc->mutex); |
588 | if (req) { | ||
589 | complete(&req->completion); | ||
590 | put_generic_request(req); | ||
591 | } | ||
592 | return; | ||
523 | 593 | ||
524 | if (!err) | 594 | bad: |
525 | err = req->result; | 595 | pr_err("corrupt generic reply, tid %llu\n", tid); |
596 | ceph_msg_dump(msg); | ||
597 | } | ||
598 | |||
599 | /* | ||
600 | * Do a synchronous pool op. | ||
601 | */ | ||
602 | int ceph_monc_do_poolop(struct ceph_mon_client *monc, u32 op, | ||
603 | u32 pool, u64 snapid, | ||
604 | char *buf, int len) | ||
605 | { | ||
606 | struct ceph_mon_generic_request *req; | ||
607 | struct ceph_mon_poolop *h; | ||
608 | int err; | ||
609 | |||
610 | req = kzalloc(sizeof(*req), GFP_NOFS); | ||
611 | if (!req) | ||
612 | return -ENOMEM; | ||
613 | |||
614 | kref_init(&req->kref); | ||
615 | req->buf = buf; | ||
616 | req->buf_len = len; | ||
617 | init_completion(&req->completion); | ||
618 | |||
619 | err = -ENOMEM; | ||
620 | req->request = ceph_msg_new(CEPH_MSG_POOLOP, sizeof(*h), GFP_NOFS); | ||
621 | if (!req->request) | ||
622 | goto out; | ||
623 | req->reply = ceph_msg_new(CEPH_MSG_POOLOP_REPLY, 1024, GFP_NOFS); | ||
624 | if (!req->reply) | ||
625 | goto out; | ||
626 | |||
627 | /* fill out request */ | ||
628 | req->request->hdr.version = cpu_to_le16(2); | ||
629 | h = req->request->front.iov_base; | ||
630 | h->monhdr.have_version = 0; | ||
631 | h->monhdr.session_mon = cpu_to_le16(-1); | ||
632 | h->monhdr.session_mon_tid = 0; | ||
633 | h->fsid = monc->monmap->fsid; | ||
634 | h->pool = cpu_to_le32(pool); | ||
635 | h->op = cpu_to_le32(op); | ||
636 | h->auid = 0; | ||
637 | h->snapid = cpu_to_le64(snapid); | ||
638 | h->name_len = 0; | ||
639 | |||
640 | err = do_generic_request(monc, req); | ||
526 | 641 | ||
527 | out: | 642 | out: |
528 | kref_put(&req->kref, release_generic_request); | 643 | kref_put(&req->kref, release_generic_request); |
529 | return err; | 644 | return err; |
530 | } | 645 | } |
531 | 646 | ||
647 | int ceph_monc_create_snapid(struct ceph_mon_client *monc, | ||
648 | u32 pool, u64 *snapid) | ||
649 | { | ||
650 | return ceph_monc_do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP, | ||
651 | pool, 0, (char *)snapid, sizeof(*snapid)); | ||
652 | |||
653 | } | ||
654 | |||
655 | int ceph_monc_delete_snapid(struct ceph_mon_client *monc, | ||
656 | u32 pool, u64 snapid) | ||
657 | { | ||
658 | return ceph_monc_do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP, | ||
659 | pool, snapid, 0, 0); | ||
660 | |||
661 | } | ||
662 | |||
532 | /* | 663 | /* |
533 | * Resend pending statfs requests. | 664 | * Resend pending generic requests. |
534 | */ | 665 | */ |
535 | static void __resend_generic_request(struct ceph_mon_client *monc) | 666 | static void __resend_generic_request(struct ceph_mon_client *monc) |
536 | { | 667 | { |
@@ -783,6 +914,10 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) | |||
783 | handle_statfs_reply(monc, msg); | 914 | handle_statfs_reply(monc, msg); |
784 | break; | 915 | break; |
785 | 916 | ||
917 | case CEPH_MSG_POOLOP_REPLY: | ||
918 | handle_poolop_reply(monc, msg); | ||
919 | break; | ||
920 | |||
786 | case CEPH_MSG_MON_MAP: | 921 | case CEPH_MSG_MON_MAP: |
787 | ceph_monc_handle_map(monc, msg); | 922 | ceph_monc_handle_map(monc, msg); |
788 | break; | 923 | break; |
@@ -820,6 +955,7 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, | |||
820 | case CEPH_MSG_MON_SUBSCRIBE_ACK: | 955 | case CEPH_MSG_MON_SUBSCRIBE_ACK: |
821 | m = ceph_msg_get(monc->m_subscribe_ack); | 956 | m = ceph_msg_get(monc->m_subscribe_ack); |
822 | break; | 957 | break; |
958 | case CEPH_MSG_POOLOP_REPLY: | ||
823 | case CEPH_MSG_STATFS_REPLY: | 959 | case CEPH_MSG_STATFS_REPLY: |
824 | return get_generic_reply(con, hdr, skip); | 960 | return get_generic_reply(con, hdr, skip); |
825 | case CEPH_MSG_AUTH_REPLY: | 961 | case CEPH_MSG_AUTH_REPLY: |
diff --git a/fs/ceph/mon_client.h b/fs/ceph/mon_client.h index 174d794321d0..8e396f2c0963 100644 --- a/fs/ceph/mon_client.h +++ b/fs/ceph/mon_client.h | |||
@@ -50,6 +50,7 @@ struct ceph_mon_generic_request { | |||
50 | struct rb_node node; | 50 | struct rb_node node; |
51 | int result; | 51 | int result; |
52 | void *buf; | 52 | void *buf; |
53 | int buf_len; | ||
53 | struct completion completion; | 54 | struct completion completion; |
54 | struct ceph_msg *request; /* original request */ | 55 | struct ceph_msg *request; /* original request */ |
55 | struct ceph_msg *reply; /* and reply */ | 56 | struct ceph_msg *reply; /* and reply */ |
@@ -111,6 +112,10 @@ extern int ceph_monc_open_session(struct ceph_mon_client *monc); | |||
111 | 112 | ||
112 | extern int ceph_monc_validate_auth(struct ceph_mon_client *monc); | 113 | extern int ceph_monc_validate_auth(struct ceph_mon_client *monc); |
113 | 114 | ||
115 | extern int ceph_monc_create_snapid(struct ceph_mon_client *monc, | ||
116 | u32 pool, u64 *snapid); | ||
114 | 117 | ||
118 | extern int ceph_monc_delete_snapid(struct ceph_mon_client *monc, | ||
119 | u32 pool, u64 snapid); | ||
115 | 120 | ||
116 | #endif | 121 | #endif |
diff --git a/fs/ceph/msgr.h b/fs/ceph/msgr.h index 892a0298dfdf..680d3d648cac 100644 --- a/fs/ceph/msgr.h +++ b/fs/ceph/msgr.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifndef __MSGR_H | 1 | #ifndef CEPH_MSGR_H |
2 | #define __MSGR_H | 2 | #define CEPH_MSGR_H |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * Data types for message passing layer used by Ceph. | 5 | * Data types for message passing layer used by Ceph. |
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index e38522347898..bed6391e52c7 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c | |||
@@ -1276,8 +1276,6 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, | |||
1276 | 1276 | ||
1277 | /* it may be a short read due to an object boundary */ | 1277 | /* it may be a short read due to an object boundary */ |
1278 | req->r_pages = pages; | 1278 | req->r_pages = pages; |
1279 | num_pages = calc_pages_for(off, *plen); | ||
1280 | req->r_num_pages = num_pages; | ||
1281 | 1279 | ||
1282 | dout("readpages final extent is %llu~%llu (%d pages)\n", | 1280 | dout("readpages final extent is %llu~%llu (%d pages)\n", |
1283 | off, *plen, req->r_num_pages); | 1281 | off, *plen, req->r_num_pages); |
@@ -1319,7 +1317,6 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, | |||
1319 | 1317 | ||
1320 | /* it may be a short write due to an object boundary */ | 1318 | /* it may be a short write due to an object boundary */ |
1321 | req->r_pages = pages; | 1319 | req->r_pages = pages; |
1322 | req->r_num_pages = calc_pages_for(off, len); | ||
1323 | dout("writepages %llu~%llu (%d pages)\n", off, len, | 1320 | dout("writepages %llu~%llu (%d pages)\n", off, len, |
1324 | req->r_num_pages); | 1321 | req->r_num_pages); |
1325 | 1322 | ||
@@ -1476,8 +1473,8 @@ static void put_osd_con(struct ceph_connection *con) | |||
1476 | * authentication | 1473 | * authentication |
1477 | */ | 1474 | */ |
1478 | static int get_authorizer(struct ceph_connection *con, | 1475 | static int get_authorizer(struct ceph_connection *con, |
1479 | void **buf, int *len, int *proto, | 1476 | void **buf, int *len, int *proto, |
1480 | void **reply_buf, int *reply_len, int force_new) | 1477 | void **reply_buf, int *reply_len, int force_new) |
1481 | { | 1478 | { |
1482 | struct ceph_osd *o = con->private; | 1479 | struct ceph_osd *o = con->private; |
1483 | struct ceph_osd_client *osdc = o->o_osdc; | 1480 | struct ceph_osd_client *osdc = o->o_osdc; |
@@ -1497,7 +1494,7 @@ static int get_authorizer(struct ceph_connection *con, | |||
1497 | &o->o_authorizer_reply_buf, | 1494 | &o->o_authorizer_reply_buf, |
1498 | &o->o_authorizer_reply_buf_len); | 1495 | &o->o_authorizer_reply_buf_len); |
1499 | if (ret) | 1496 | if (ret) |
1500 | return ret; | 1497 | return ret; |
1501 | } | 1498 | } |
1502 | 1499 | ||
1503 | *proto = ac->protocol; | 1500 | *proto = ac->protocol; |
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index 416d46adbf87..e31f118f1392 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c | |||
@@ -424,12 +424,30 @@ static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) | |||
424 | kfree(pi); | 424 | kfree(pi); |
425 | } | 425 | } |
426 | 426 | ||
427 | void __decode_pool(void **p, struct ceph_pg_pool_info *pi) | 427 | static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) |
428 | { | 428 | { |
429 | unsigned n, m; | ||
430 | |||
429 | ceph_decode_copy(p, &pi->v, sizeof(pi->v)); | 431 | ceph_decode_copy(p, &pi->v, sizeof(pi->v)); |
430 | calc_pg_masks(pi); | 432 | calc_pg_masks(pi); |
431 | *p += le32_to_cpu(pi->v.num_snaps) * sizeof(u64); | 433 | |
434 | /* num_snaps * snap_info_t */ | ||
435 | n = le32_to_cpu(pi->v.num_snaps); | ||
436 | while (n--) { | ||
437 | ceph_decode_need(p, end, sizeof(u64) + 1 + sizeof(u64) + | ||
438 | sizeof(struct ceph_timespec), bad); | ||
439 | *p += sizeof(u64) + /* key */ | ||
440 | 1 + sizeof(u64) + /* u8, snapid */ | ||
441 | sizeof(struct ceph_timespec); | ||
442 | m = ceph_decode_32(p); /* snap name */ | ||
443 | *p += m; | ||
444 | } | ||
445 | |||
432 | *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; | 446 | *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; |
447 | return 0; | ||
448 | |||
449 | bad: | ||
450 | return -EINVAL; | ||
433 | } | 451 | } |
434 | 452 | ||
435 | static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) | 453 | static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) |
@@ -571,7 +589,9 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) | |||
571 | kfree(pi); | 589 | kfree(pi); |
572 | goto bad; | 590 | goto bad; |
573 | } | 591 | } |
574 | __decode_pool(p, pi); | 592 | err = __decode_pool(p, end, pi); |
593 | if (err < 0) | ||
594 | goto bad; | ||
575 | __insert_pg_pool(&map->pg_pools, pi); | 595 | __insert_pg_pool(&map->pg_pools, pi); |
576 | } | 596 | } |
577 | 597 | ||
@@ -760,7 +780,9 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
760 | pi->id = pool; | 780 | pi->id = pool; |
761 | __insert_pg_pool(&map->pg_pools, pi); | 781 | __insert_pg_pool(&map->pg_pools, pi); |
762 | } | 782 | } |
763 | __decode_pool(p, pi); | 783 | err = __decode_pool(p, end, pi); |
784 | if (err < 0) | ||
785 | goto bad; | ||
764 | } | 786 | } |
765 | if (version >= 5 && __decode_pool_names(p, end, map) < 0) | 787 | if (version >= 5 && __decode_pool_names(p, end, map) < 0) |
766 | goto bad; | 788 | goto bad; |
@@ -833,7 +855,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
833 | node)->pgid, pgid) <= 0) { | 855 | node)->pgid, pgid) <= 0) { |
834 | struct ceph_pg_mapping *cur = | 856 | struct ceph_pg_mapping *cur = |
835 | rb_entry(rbp, struct ceph_pg_mapping, node); | 857 | rb_entry(rbp, struct ceph_pg_mapping, node); |
836 | 858 | ||
837 | rbp = rb_next(rbp); | 859 | rbp = rb_next(rbp); |
838 | dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid); | 860 | dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid); |
839 | rb_erase(&cur->node, &map->pg_temp); | 861 | rb_erase(&cur->node, &map->pg_temp); |
@@ -1026,8 +1048,9 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | |||
1026 | ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, | 1048 | ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, |
1027 | pool->v.type, pool->v.size); | 1049 | pool->v.type, pool->v.size); |
1028 | if (ruleno < 0) { | 1050 | if (ruleno < 0) { |
1029 | pr_err("no crush rule pool %d type %d size %d\n", | 1051 | pr_err("no crush rule pool %d ruleset %d type %d size %d\n", |
1030 | poolid, pool->v.type, pool->v.size); | 1052 | poolid, pool->v.crush_ruleset, pool->v.type, |
1053 | pool->v.size); | ||
1031 | return NULL; | 1054 | return NULL; |
1032 | } | 1055 | } |
1033 | 1056 | ||
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h index 8fcc023056c7..6d5247f2e81b 100644 --- a/fs/ceph/rados.h +++ b/fs/ceph/rados.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifndef __RADOS_H | 1 | #ifndef CEPH_RADOS_H |
2 | #define __RADOS_H | 2 | #define CEPH_RADOS_H |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * Data types for the Ceph distributed object storage layer RADOS | 5 | * Data types for the Ceph distributed object storage layer RADOS |
@@ -203,6 +203,7 @@ enum { | |||
203 | CEPH_OSD_OP_TMAPGET = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 12, | 203 | CEPH_OSD_OP_TMAPGET = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 12, |
204 | 204 | ||
205 | CEPH_OSD_OP_CREATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 13, | 205 | CEPH_OSD_OP_CREATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 13, |
206 | CEPH_OSD_OP_ROLLBACK= CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 14, | ||
206 | 207 | ||
207 | /** attrs **/ | 208 | /** attrs **/ |
208 | /* read */ | 209 | /* read */ |
@@ -272,6 +273,10 @@ static inline int ceph_osd_op_mode_modify(int op) | |||
272 | return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR; | 273 | return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR; |
273 | } | 274 | } |
274 | 275 | ||
276 | /* | ||
277 | * note that the following tmap stuff is also defined in the ceph librados.h | ||
278 | * any modification here needs to be updated there | ||
279 | */ | ||
275 | #define CEPH_OSD_TMAP_HDR 'h' | 280 | #define CEPH_OSD_TMAP_HDR 'h' |
276 | #define CEPH_OSD_TMAP_SET 's' | 281 | #define CEPH_OSD_TMAP_SET 's' |
277 | #define CEPH_OSD_TMAP_RM 'r' | 282 | #define CEPH_OSD_TMAP_RM 'r' |
@@ -297,6 +302,7 @@ enum { | |||
297 | CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */ | 302 | CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */ |
298 | CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */ | 303 | CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */ |
299 | CEPH_OSD_FLAG_EXEC = 2048, /* op may exec */ | 304 | CEPH_OSD_FLAG_EXEC = 2048, /* op may exec */ |
305 | CEPH_OSD_FLAG_EXEC_PUBLIC = 4096, /* op may exec (public) */ | ||
300 | }; | 306 | }; |
301 | 307 | ||
302 | enum { | 308 | enum { |
@@ -350,6 +356,9 @@ struct ceph_osd_op { | |||
350 | struct { | 356 | struct { |
351 | __le64 cookie, count; | 357 | __le64 cookie, count; |
352 | } __attribute__ ((packed)) pgls; | 358 | } __attribute__ ((packed)) pgls; |
359 | struct { | ||
360 | __le64 snapid; | ||
361 | } __attribute__ ((packed)) snap; | ||
353 | }; | 362 | }; |
354 | __le32 payload_len; | 363 | __le32 payload_len; |
355 | } __attribute__ ((packed)); | 364 | } __attribute__ ((packed)); |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index fa87f51e38e1..9922628532b2 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -2,6 +2,7 @@ | |||
2 | #include "ceph_debug.h" | 2 | #include "ceph_debug.h" |
3 | 3 | ||
4 | #include <linux/backing-dev.h> | 4 | #include <linux/backing-dev.h> |
5 | #include <linux/ctype.h> | ||
5 | #include <linux/fs.h> | 6 | #include <linux/fs.h> |
6 | #include <linux/inet.h> | 7 | #include <linux/inet.h> |
7 | #include <linux/in6.h> | 8 | #include <linux/in6.h> |
@@ -101,12 +102,21 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
101 | } | 102 | } |
102 | 103 | ||
103 | 104 | ||
104 | static int ceph_syncfs(struct super_block *sb, int wait) | 105 | static int ceph_sync_fs(struct super_block *sb, int wait) |
105 | { | 106 | { |
106 | dout("sync_fs %d\n", wait); | 107 | struct ceph_client *client = ceph_sb_to_client(sb); |
108 | |||
109 | if (!wait) { | ||
110 | dout("sync_fs (non-blocking)\n"); | ||
111 | ceph_flush_dirty_caps(&client->mdsc); | ||
112 | dout("sync_fs (non-blocking) done\n"); | ||
113 | return 0; | ||
114 | } | ||
115 | |||
116 | dout("sync_fs (blocking)\n"); | ||
107 | ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc); | 117 | ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc); |
108 | ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc); | 118 | ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc); |
109 | dout("sync_fs %d done\n", wait); | 119 | dout("sync_fs (blocking) done\n"); |
110 | return 0; | 120 | return 0; |
111 | } | 121 | } |
112 | 122 | ||
@@ -150,9 +160,7 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) | |||
150 | struct ceph_mount_args *args = client->mount_args; | 160 | struct ceph_mount_args *args = client->mount_args; |
151 | 161 | ||
152 | if (args->flags & CEPH_OPT_FSID) | 162 | if (args->flags & CEPH_OPT_FSID) |
153 | seq_printf(m, ",fsidmajor=%llu,fsidminor%llu", | 163 | seq_printf(m, ",fsid=%pU", &args->fsid); |
154 | le64_to_cpu(*(__le64 *)&args->fsid.fsid[0]), | ||
155 | le64_to_cpu(*(__le64 *)&args->fsid.fsid[8])); | ||
156 | if (args->flags & CEPH_OPT_NOSHARE) | 164 | if (args->flags & CEPH_OPT_NOSHARE) |
157 | seq_puts(m, ",noshare"); | 165 | seq_puts(m, ",noshare"); |
158 | if (args->flags & CEPH_OPT_DIRSTAT) | 166 | if (args->flags & CEPH_OPT_DIRSTAT) |
@@ -279,7 +287,7 @@ static const struct super_operations ceph_super_ops = { | |||
279 | .alloc_inode = ceph_alloc_inode, | 287 | .alloc_inode = ceph_alloc_inode, |
280 | .destroy_inode = ceph_destroy_inode, | 288 | .destroy_inode = ceph_destroy_inode, |
281 | .write_inode = ceph_write_inode, | 289 | .write_inode = ceph_write_inode, |
282 | .sync_fs = ceph_syncfs, | 290 | .sync_fs = ceph_sync_fs, |
283 | .put_super = ceph_put_super, | 291 | .put_super = ceph_put_super, |
284 | .show_options = ceph_show_options, | 292 | .show_options = ceph_show_options, |
285 | .statfs = ceph_statfs, | 293 | .statfs = ceph_statfs, |
@@ -322,9 +330,6 @@ const char *ceph_msg_type_name(int type) | |||
322 | * mount options | 330 | * mount options |
323 | */ | 331 | */ |
324 | enum { | 332 | enum { |
325 | Opt_fsidmajor, | ||
326 | Opt_fsidminor, | ||
327 | Opt_monport, | ||
328 | Opt_wsize, | 333 | Opt_wsize, |
329 | Opt_rsize, | 334 | Opt_rsize, |
330 | Opt_osdtimeout, | 335 | Opt_osdtimeout, |
@@ -339,6 +344,7 @@ enum { | |||
339 | Opt_congestion_kb, | 344 | Opt_congestion_kb, |
340 | Opt_last_int, | 345 | Opt_last_int, |
341 | /* int args above */ | 346 | /* int args above */ |
347 | Opt_fsid, | ||
342 | Opt_snapdirname, | 348 | Opt_snapdirname, |
343 | Opt_name, | 349 | Opt_name, |
344 | Opt_secret, | 350 | Opt_secret, |
@@ -355,9 +361,6 @@ enum { | |||
355 | }; | 361 | }; |
356 | 362 | ||
357 | static match_table_t arg_tokens = { | 363 | static match_table_t arg_tokens = { |
358 | {Opt_fsidmajor, "fsidmajor=%ld"}, | ||
359 | {Opt_fsidminor, "fsidminor=%ld"}, | ||
360 | {Opt_monport, "monport=%d"}, | ||
361 | {Opt_wsize, "wsize=%d"}, | 364 | {Opt_wsize, "wsize=%d"}, |
362 | {Opt_rsize, "rsize=%d"}, | 365 | {Opt_rsize, "rsize=%d"}, |
363 | {Opt_osdtimeout, "osdtimeout=%d"}, | 366 | {Opt_osdtimeout, "osdtimeout=%d"}, |
@@ -371,6 +374,7 @@ static match_table_t arg_tokens = { | |||
371 | {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, | 374 | {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, |
372 | {Opt_congestion_kb, "write_congestion_kb=%d"}, | 375 | {Opt_congestion_kb, "write_congestion_kb=%d"}, |
373 | /* int args above */ | 376 | /* int args above */ |
377 | {Opt_fsid, "fsid=%s"}, | ||
374 | {Opt_snapdirname, "snapdirname=%s"}, | 378 | {Opt_snapdirname, "snapdirname=%s"}, |
375 | {Opt_name, "name=%s"}, | 379 | {Opt_name, "name=%s"}, |
376 | {Opt_secret, "secret=%s"}, | 380 | {Opt_secret, "secret=%s"}, |
@@ -386,6 +390,36 @@ static match_table_t arg_tokens = { | |||
386 | {-1, NULL} | 390 | {-1, NULL} |
387 | }; | 391 | }; |
388 | 392 | ||
393 | static int parse_fsid(const char *str, struct ceph_fsid *fsid) | ||
394 | { | ||
395 | int i = 0; | ||
396 | char tmp[3]; | ||
397 | int err = -EINVAL; | ||
398 | int d; | ||
399 | |||
400 | dout("parse_fsid '%s'\n", str); | ||
401 | tmp[2] = 0; | ||
402 | while (*str && i < 16) { | ||
403 | if (ispunct(*str)) { | ||
404 | str++; | ||
405 | continue; | ||
406 | } | ||
407 | if (!isxdigit(str[0]) || !isxdigit(str[1])) | ||
408 | break; | ||
409 | tmp[0] = str[0]; | ||
410 | tmp[1] = str[1]; | ||
411 | if (sscanf(tmp, "%x", &d) < 1) | ||
412 | break; | ||
413 | fsid->fsid[i] = d & 0xff; | ||
414 | i++; | ||
415 | str += 2; | ||
416 | } | ||
417 | |||
418 | if (i == 16) | ||
419 | err = 0; | ||
420 | dout("parse_fsid ret %d got fsid %pU", err, fsid); | ||
421 | return err; | ||
422 | } | ||
389 | 423 | ||
390 | static struct ceph_mount_args *parse_mount_args(int flags, char *options, | 424 | static struct ceph_mount_args *parse_mount_args(int flags, char *options, |
391 | const char *dev_name, | 425 | const char *dev_name, |
@@ -469,12 +503,6 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, | |||
469 | dout("got token %d\n", token); | 503 | dout("got token %d\n", token); |
470 | } | 504 | } |
471 | switch (token) { | 505 | switch (token) { |
472 | case Opt_fsidmajor: | ||
473 | *(__le64 *)&args->fsid.fsid[0] = cpu_to_le64(intval); | ||
474 | break; | ||
475 | case Opt_fsidminor: | ||
476 | *(__le64 *)&args->fsid.fsid[8] = cpu_to_le64(intval); | ||
477 | break; | ||
478 | case Opt_ip: | 506 | case Opt_ip: |
479 | err = ceph_parse_ips(argstr[0].from, | 507 | err = ceph_parse_ips(argstr[0].from, |
480 | argstr[0].to, | 508 | argstr[0].to, |
@@ -485,6 +513,11 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, | |||
485 | args->flags |= CEPH_OPT_MYIP; | 513 | args->flags |= CEPH_OPT_MYIP; |
486 | break; | 514 | break; |
487 | 515 | ||
516 | case Opt_fsid: | ||
517 | err = parse_fsid(argstr[0].from, &args->fsid); | ||
518 | if (err == 0) | ||
519 | args->flags |= CEPH_OPT_FSID; | ||
520 | break; | ||
488 | case Opt_snapdirname: | 521 | case Opt_snapdirname: |
489 | kfree(args->snapdir_name); | 522 | kfree(args->snapdir_name); |
490 | args->snapdir_name = kstrndup(argstr[0].from, | 523 | args->snapdir_name = kstrndup(argstr[0].from, |
@@ -515,6 +548,9 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, | |||
515 | case Opt_osdkeepalivetimeout: | 548 | case Opt_osdkeepalivetimeout: |
516 | args->osd_keepalive_timeout = intval; | 549 | args->osd_keepalive_timeout = intval; |
517 | break; | 550 | break; |
551 | case Opt_osd_idle_ttl: | ||
552 | args->osd_idle_ttl = intval; | ||
553 | break; | ||
518 | case Opt_mount_timeout: | 554 | case Opt_mount_timeout: |
519 | args->mount_timeout = intval; | 555 | args->mount_timeout = intval; |
520 | break; | 556 | break; |
@@ -630,7 +666,6 @@ static struct ceph_client *ceph_create_client(struct ceph_mount_args *args) | |||
630 | 666 | ||
631 | /* caps */ | 667 | /* caps */ |
632 | client->min_caps = args->max_readdir; | 668 | client->min_caps = args->max_readdir; |
633 | ceph_adjust_min_caps(client->min_caps); | ||
634 | 669 | ||
635 | /* subsystems */ | 670 | /* subsystems */ |
636 | err = ceph_monc_init(&client->monc, client); | 671 | err = ceph_monc_init(&client->monc, client); |
@@ -680,8 +715,6 @@ static void ceph_destroy_client(struct ceph_client *client) | |||
680 | 715 | ||
681 | ceph_monc_stop(&client->monc); | 716 | ceph_monc_stop(&client->monc); |
682 | 717 | ||
683 | ceph_adjust_min_caps(-client->min_caps); | ||
684 | |||
685 | ceph_debugfs_client_cleanup(client); | 718 | ceph_debugfs_client_cleanup(client); |
686 | destroy_workqueue(client->wb_wq); | 719 | destroy_workqueue(client->wb_wq); |
687 | destroy_workqueue(client->pg_inv_wq); | 720 | destroy_workqueue(client->pg_inv_wq); |
@@ -706,13 +739,13 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) | |||
706 | { | 739 | { |
707 | if (client->have_fsid) { | 740 | if (client->have_fsid) { |
708 | if (ceph_fsid_compare(&client->fsid, fsid)) { | 741 | if (ceph_fsid_compare(&client->fsid, fsid)) { |
709 | pr_err("bad fsid, had " FSID_FORMAT " got " FSID_FORMAT, | 742 | pr_err("bad fsid, had %pU got %pU", |
710 | PR_FSID(&client->fsid), PR_FSID(fsid)); | 743 | &client->fsid, fsid); |
711 | return -1; | 744 | return -1; |
712 | } | 745 | } |
713 | } else { | 746 | } else { |
714 | pr_info("client%lld fsid " FSID_FORMAT "\n", | 747 | pr_info("client%lld fsid %pU\n", client->monc.auth->global_id, |
715 | client->monc.auth->global_id, PR_FSID(fsid)); | 748 | fsid); |
716 | memcpy(&client->fsid, fsid, sizeof(*fsid)); | 749 | memcpy(&client->fsid, fsid, sizeof(*fsid)); |
717 | ceph_debugfs_client_init(client); | 750 | ceph_debugfs_client_init(client); |
718 | client->have_fsid = true; | 751 | client->have_fsid = true; |
@@ -1043,8 +1076,6 @@ static int __init init_ceph(void) | |||
1043 | if (ret) | 1076 | if (ret) |
1044 | goto out_msgr; | 1077 | goto out_msgr; |
1045 | 1078 | ||
1046 | ceph_caps_init(); | ||
1047 | |||
1048 | ret = register_filesystem(&ceph_fs_type); | 1079 | ret = register_filesystem(&ceph_fs_type); |
1049 | if (ret) | 1080 | if (ret) |
1050 | goto out_icache; | 1081 | goto out_icache; |
@@ -1069,7 +1100,6 @@ static void __exit exit_ceph(void) | |||
1069 | { | 1100 | { |
1070 | dout("exit_ceph\n"); | 1101 | dout("exit_ceph\n"); |
1071 | unregister_filesystem(&ceph_fs_type); | 1102 | unregister_filesystem(&ceph_fs_type); |
1072 | ceph_caps_finalize(); | ||
1073 | destroy_caches(); | 1103 | destroy_caches(); |
1074 | ceph_msgr_exit(); | 1104 | ceph_msgr_exit(); |
1075 | ceph_debugfs_cleanup(); | 1105 | ceph_debugfs_cleanup(); |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 10a4a406e887..2482d696f0de 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -31,6 +31,12 @@ | |||
31 | #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) | 31 | #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) |
32 | 32 | ||
33 | /* | 33 | /* |
34 | * Supported features | ||
35 | */ | ||
36 | #define CEPH_FEATURE_SUPPORTED CEPH_FEATURE_NOSRCADDR | CEPH_FEATURE_FLOCK | ||
37 | #define CEPH_FEATURE_REQUIRED CEPH_FEATURE_NOSRCADDR | ||
38 | |||
39 | /* | ||
34 | * mount options | 40 | * mount options |
35 | */ | 41 | */ |
36 | #define CEPH_OPT_FSID (1<<0) | 42 | #define CEPH_OPT_FSID (1<<0) |
@@ -560,11 +566,13 @@ static inline int __ceph_caps_wanted(struct ceph_inode_info *ci) | |||
560 | /* what the mds thinks we want */ | 566 | /* what the mds thinks we want */ |
561 | extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci); | 567 | extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci); |
562 | 568 | ||
563 | extern void ceph_caps_init(void); | 569 | extern void ceph_caps_init(struct ceph_mds_client *mdsc); |
564 | extern void ceph_caps_finalize(void); | 570 | extern void ceph_caps_finalize(struct ceph_mds_client *mdsc); |
565 | extern void ceph_adjust_min_caps(int delta); | 571 | extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta); |
566 | extern int ceph_reserve_caps(struct ceph_cap_reservation *ctx, int need); | 572 | extern int ceph_reserve_caps(struct ceph_mds_client *mdsc, |
567 | extern int ceph_unreserve_caps(struct ceph_cap_reservation *ctx); | 573 | struct ceph_cap_reservation *ctx, int need); |
574 | extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc, | ||
575 | struct ceph_cap_reservation *ctx); | ||
568 | extern void ceph_reservation_status(struct ceph_client *client, | 576 | extern void ceph_reservation_status(struct ceph_client *client, |
569 | int *total, int *avail, int *used, | 577 | int *total, int *avail, int *used, |
570 | int *reserved, int *min); | 578 | int *reserved, int *min); |
@@ -738,13 +746,6 @@ extern struct kmem_cache *ceph_file_cachep; | |||
738 | extern const char *ceph_msg_type_name(int type); | 746 | extern const char *ceph_msg_type_name(int type); |
739 | extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); | 747 | extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); |
740 | 748 | ||
741 | #define FSID_FORMAT "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-" \ | ||
742 | "%02x%02x%02x%02x%02x%02x" | ||
743 | #define PR_FSID(f) (f)->fsid[0], (f)->fsid[1], (f)->fsid[2], (f)->fsid[3], \ | ||
744 | (f)->fsid[4], (f)->fsid[5], (f)->fsid[6], (f)->fsid[7], \ | ||
745 | (f)->fsid[8], (f)->fsid[9], (f)->fsid[10], (f)->fsid[11], \ | ||
746 | (f)->fsid[12], (f)->fsid[13], (f)->fsid[14], (f)->fsid[15] | ||
747 | |||
748 | /* inode.c */ | 749 | /* inode.c */ |
749 | extern const struct inode_operations ceph_file_iops; | 750 | extern const struct inode_operations ceph_file_iops; |
750 | 751 | ||
@@ -806,13 +807,16 @@ static inline void ceph_remove_cap(struct ceph_cap *cap) | |||
806 | __ceph_remove_cap(cap); | 807 | __ceph_remove_cap(cap); |
807 | spin_unlock(&inode->i_lock); | 808 | spin_unlock(&inode->i_lock); |
808 | } | 809 | } |
809 | extern void ceph_put_cap(struct ceph_cap *cap); | 810 | extern void ceph_put_cap(struct ceph_mds_client *mdsc, |
811 | struct ceph_cap *cap); | ||
810 | 812 | ||
811 | extern void ceph_queue_caps_release(struct inode *inode); | 813 | extern void ceph_queue_caps_release(struct inode *inode); |
812 | extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc); | 814 | extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc); |
813 | extern int ceph_fsync(struct file *file, int datasync); | 815 | extern int ceph_fsync(struct file *file, int datasync); |
814 | extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, | 816 | extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, |
815 | struct ceph_mds_session *session); | 817 | struct ceph_mds_session *session); |
818 | extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, | ||
819 | int mds); | ||
816 | extern int ceph_get_cap_mds(struct inode *inode); | 820 | extern int ceph_get_cap_mds(struct inode *inode); |
817 | extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps); | 821 | extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps); |
818 | extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); | 822 | extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); |
@@ -857,7 +861,7 @@ extern void ceph_release_page_vector(struct page **pages, int num_pages); | |||
857 | /* dir.c */ | 861 | /* dir.c */ |
858 | extern const struct file_operations ceph_dir_fops; | 862 | extern const struct file_operations ceph_dir_fops; |
859 | extern const struct inode_operations ceph_dir_iops; | 863 | extern const struct inode_operations ceph_dir_iops; |
860 | extern struct dentry_operations ceph_dentry_ops, ceph_snap_dentry_ops, | 864 | extern const struct dentry_operations ceph_dentry_ops, ceph_snap_dentry_ops, |
861 | ceph_snapdir_dentry_ops; | 865 | ceph_snapdir_dentry_ops; |
862 | 866 | ||
863 | extern int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry); | 867 | extern int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry); |
@@ -888,6 +892,14 @@ extern void ceph_debugfs_cleanup(void); | |||
888 | extern int ceph_debugfs_client_init(struct ceph_client *client); | 892 | extern int ceph_debugfs_client_init(struct ceph_client *client); |
889 | extern void ceph_debugfs_client_cleanup(struct ceph_client *client); | 893 | extern void ceph_debugfs_client_cleanup(struct ceph_client *client); |
890 | 894 | ||
895 | /* locks.c */ | ||
896 | extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl); | ||
897 | extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl); | ||
898 | extern void ceph_count_locks(struct inode *inode, int *p_num, int *f_num); | ||
899 | extern int ceph_encode_locks(struct inode *i, struct ceph_pagelist *p, | ||
900 | int p_locks, int f_locks); | ||
901 | extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c); | ||
902 | |||
891 | static inline struct inode *get_dentry_parent_inode(struct dentry *dentry) | 903 | static inline struct inode *get_dentry_parent_inode(struct dentry *dentry) |
892 | { | 904 | { |
893 | if (dentry && dentry->d_parent) | 905 | if (dentry && dentry->d_parent) |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 68aeebc69681..097a2654c00f 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
@@ -337,6 +337,8 @@ void __ceph_destroy_xattrs(struct ceph_inode_info *ci) | |||
337 | } | 337 | } |
338 | 338 | ||
339 | static int __build_xattrs(struct inode *inode) | 339 | static int __build_xattrs(struct inode *inode) |
340 | __releases(inode->i_lock) | ||
341 | __acquires(inode->i_lock) | ||
340 | { | 342 | { |
341 | u32 namelen; | 343 | u32 namelen; |
342 | u32 numattr = 0; | 344 | u32 numattr = 0; |