diff options
author | Yehuda Sadeh <yehuda@hq.newdream.net> | 2010-04-06 18:14:15 -0400 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2010-10-20 18:37:28 -0400 |
commit | 3d14c5d2b6e15c21d8e5467dc62d33127c23a644 (patch) | |
tree | 7d123c47847df9d1e865b6b78dc7da3fe739b704 /fs/ceph/file.c | |
parent | ae1533b62b3369e6ae32338f4a77d64d0e88f676 (diff) |
ceph: factor out libceph from Ceph file system
This factors out protocol and low-level storage parts of ceph into a
separate libceph module living in net/ceph and include/linux/ceph. This
is mostly a matter of moving files around. However, a few key pieces
of the interface change as well:
- ceph_client becomes ceph_fs_client and ceph_client, where the latter
captures the mon and osd clients, and the fs_client gets the mds client
and file system specific pieces.
- Mount option parsing and debugfs setup is correspondingly broken into
two pieces.
- The mon client gets a generic handler callback for otherwise unknown
messages (mds map, in this case).
- The basic supported/required feature bits can be expanded (and are by
ceph_fs_client).
No functional change, aside from some subtle error handling cases that got
cleaned up in the refactoring process.
Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'fs/ceph/file.c')
-rw-r--r-- | fs/ceph/file.c | 207 |
1 files changed, 26 insertions, 181 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 66e4da6dba22..e77c28cf3690 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -1,5 +1,6 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/module.h> | ||
3 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
4 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
5 | #include <linux/file.h> | 6 | #include <linux/file.h> |
@@ -38,8 +39,8 @@ | |||
38 | static struct ceph_mds_request * | 39 | static struct ceph_mds_request * |
39 | prepare_open_request(struct super_block *sb, int flags, int create_mode) | 40 | prepare_open_request(struct super_block *sb, int flags, int create_mode) |
40 | { | 41 | { |
41 | struct ceph_client *client = ceph_sb_to_client(sb); | 42 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); |
42 | struct ceph_mds_client *mdsc = &client->mdsc; | 43 | struct ceph_mds_client *mdsc = fsc->mdsc; |
43 | struct ceph_mds_request *req; | 44 | struct ceph_mds_request *req; |
44 | int want_auth = USE_ANY_MDS; | 45 | int want_auth = USE_ANY_MDS; |
45 | int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN; | 46 | int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN; |
@@ -117,8 +118,8 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) | |||
117 | int ceph_open(struct inode *inode, struct file *file) | 118 | int ceph_open(struct inode *inode, struct file *file) |
118 | { | 119 | { |
119 | struct ceph_inode_info *ci = ceph_inode(inode); | 120 | struct ceph_inode_info *ci = ceph_inode(inode); |
120 | struct ceph_client *client = ceph_sb_to_client(inode->i_sb); | 121 | struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); |
121 | struct ceph_mds_client *mdsc = &client->mdsc; | 122 | struct ceph_mds_client *mdsc = fsc->mdsc; |
122 | struct ceph_mds_request *req; | 123 | struct ceph_mds_request *req; |
123 | struct ceph_file_info *cf = file->private_data; | 124 | struct ceph_file_info *cf = file->private_data; |
124 | struct inode *parent_inode = file->f_dentry->d_parent->d_inode; | 125 | struct inode *parent_inode = file->f_dentry->d_parent->d_inode; |
@@ -216,8 +217,8 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, | |||
216 | struct nameidata *nd, int mode, | 217 | struct nameidata *nd, int mode, |
217 | int locked_dir) | 218 | int locked_dir) |
218 | { | 219 | { |
219 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 220 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
220 | struct ceph_mds_client *mdsc = &client->mdsc; | 221 | struct ceph_mds_client *mdsc = fsc->mdsc; |
221 | struct file *file = nd->intent.open.file; | 222 | struct file *file = nd->intent.open.file; |
222 | struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry); | 223 | struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry); |
223 | struct ceph_mds_request *req; | 224 | struct ceph_mds_request *req; |
@@ -270,163 +271,6 @@ int ceph_release(struct inode *inode, struct file *file) | |||
270 | } | 271 | } |
271 | 272 | ||
272 | /* | 273 | /* |
273 | * build a vector of user pages | ||
274 | */ | ||
275 | static struct page **get_direct_page_vector(const char __user *data, | ||
276 | int num_pages, | ||
277 | loff_t off, size_t len) | ||
278 | { | ||
279 | struct page **pages; | ||
280 | int rc; | ||
281 | |||
282 | pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); | ||
283 | if (!pages) | ||
284 | return ERR_PTR(-ENOMEM); | ||
285 | |||
286 | down_read(¤t->mm->mmap_sem); | ||
287 | rc = get_user_pages(current, current->mm, (unsigned long)data, | ||
288 | num_pages, 0, 0, pages, NULL); | ||
289 | up_read(¤t->mm->mmap_sem); | ||
290 | if (rc < 0) | ||
291 | goto fail; | ||
292 | return pages; | ||
293 | |||
294 | fail: | ||
295 | kfree(pages); | ||
296 | return ERR_PTR(rc); | ||
297 | } | ||
298 | |||
299 | static void put_page_vector(struct page **pages, int num_pages) | ||
300 | { | ||
301 | int i; | ||
302 | |||
303 | for (i = 0; i < num_pages; i++) | ||
304 | put_page(pages[i]); | ||
305 | kfree(pages); | ||
306 | } | ||
307 | |||
308 | void ceph_release_page_vector(struct page **pages, int num_pages) | ||
309 | { | ||
310 | int i; | ||
311 | |||
312 | for (i = 0; i < num_pages; i++) | ||
313 | __free_pages(pages[i], 0); | ||
314 | kfree(pages); | ||
315 | } | ||
316 | |||
317 | /* | ||
318 | * allocate a vector new pages | ||
319 | */ | ||
320 | static struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) | ||
321 | { | ||
322 | struct page **pages; | ||
323 | int i; | ||
324 | |||
325 | pages = kmalloc(sizeof(*pages) * num_pages, flags); | ||
326 | if (!pages) | ||
327 | return ERR_PTR(-ENOMEM); | ||
328 | for (i = 0; i < num_pages; i++) { | ||
329 | pages[i] = __page_cache_alloc(flags); | ||
330 | if (pages[i] == NULL) { | ||
331 | ceph_release_page_vector(pages, i); | ||
332 | return ERR_PTR(-ENOMEM); | ||
333 | } | ||
334 | } | ||
335 | return pages; | ||
336 | } | ||
337 | |||
338 | /* | ||
339 | * copy user data into a page vector | ||
340 | */ | ||
341 | static int copy_user_to_page_vector(struct page **pages, | ||
342 | const char __user *data, | ||
343 | loff_t off, size_t len) | ||
344 | { | ||
345 | int i = 0; | ||
346 | int po = off & ~PAGE_CACHE_MASK; | ||
347 | int left = len; | ||
348 | int l, bad; | ||
349 | |||
350 | while (left > 0) { | ||
351 | l = min_t(int, PAGE_CACHE_SIZE-po, left); | ||
352 | bad = copy_from_user(page_address(pages[i]) + po, data, l); | ||
353 | if (bad == l) | ||
354 | return -EFAULT; | ||
355 | data += l - bad; | ||
356 | left -= l - bad; | ||
357 | po += l - bad; | ||
358 | if (po == PAGE_CACHE_SIZE) { | ||
359 | po = 0; | ||
360 | i++; | ||
361 | } | ||
362 | } | ||
363 | return len; | ||
364 | } | ||
365 | |||
366 | /* | ||
367 | * copy user data from a page vector into a user pointer | ||
368 | */ | ||
369 | static int copy_page_vector_to_user(struct page **pages, char __user *data, | ||
370 | loff_t off, size_t len) | ||
371 | { | ||
372 | int i = 0; | ||
373 | int po = off & ~PAGE_CACHE_MASK; | ||
374 | int left = len; | ||
375 | int l, bad; | ||
376 | |||
377 | while (left > 0) { | ||
378 | l = min_t(int, left, PAGE_CACHE_SIZE-po); | ||
379 | bad = copy_to_user(data, page_address(pages[i]) + po, l); | ||
380 | if (bad == l) | ||
381 | return -EFAULT; | ||
382 | data += l - bad; | ||
383 | left -= l - bad; | ||
384 | if (po) { | ||
385 | po += l - bad; | ||
386 | if (po == PAGE_CACHE_SIZE) | ||
387 | po = 0; | ||
388 | } | ||
389 | i++; | ||
390 | } | ||
391 | return len; | ||
392 | } | ||
393 | |||
394 | /* | ||
395 | * Zero an extent within a page vector. Offset is relative to the | ||
396 | * start of the first page. | ||
397 | */ | ||
398 | static void zero_page_vector_range(int off, int len, struct page **pages) | ||
399 | { | ||
400 | int i = off >> PAGE_CACHE_SHIFT; | ||
401 | |||
402 | off &= ~PAGE_CACHE_MASK; | ||
403 | |||
404 | dout("zero_page_vector_page %u~%u\n", off, len); | ||
405 | |||
406 | /* leading partial page? */ | ||
407 | if (off) { | ||
408 | int end = min((int)PAGE_CACHE_SIZE, off + len); | ||
409 | dout("zeroing %d %p head from %d\n", i, pages[i], | ||
410 | (int)off); | ||
411 | zero_user_segment(pages[i], off, end); | ||
412 | len -= (end - off); | ||
413 | i++; | ||
414 | } | ||
415 | while (len >= PAGE_CACHE_SIZE) { | ||
416 | dout("zeroing %d %p len=%d\n", i, pages[i], len); | ||
417 | zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE); | ||
418 | len -= PAGE_CACHE_SIZE; | ||
419 | i++; | ||
420 | } | ||
421 | /* trailing partial page? */ | ||
422 | if (len) { | ||
423 | dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len); | ||
424 | zero_user_segment(pages[i], 0, len); | ||
425 | } | ||
426 | } | ||
427 | |||
428 | |||
429 | /* | ||
430 | * Read a range of bytes striped over one or more objects. Iterate over | 274 | * Read a range of bytes striped over one or more objects. Iterate over |
431 | * objects we stripe over. (That's not atomic, but good enough for now.) | 275 | * objects we stripe over. (That's not atomic, but good enough for now.) |
432 | * | 276 | * |
@@ -438,7 +282,7 @@ static int striped_read(struct inode *inode, | |||
438 | struct page **pages, int num_pages, | 282 | struct page **pages, int num_pages, |
439 | int *checkeof) | 283 | int *checkeof) |
440 | { | 284 | { |
441 | struct ceph_client *client = ceph_inode_to_client(inode); | 285 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
442 | struct ceph_inode_info *ci = ceph_inode(inode); | 286 | struct ceph_inode_info *ci = ceph_inode(inode); |
443 | u64 pos, this_len; | 287 | u64 pos, this_len; |
444 | int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ | 288 | int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ |
@@ -459,7 +303,7 @@ static int striped_read(struct inode *inode, | |||
459 | 303 | ||
460 | more: | 304 | more: |
461 | this_len = left; | 305 | this_len = left; |
462 | ret = ceph_osdc_readpages(&client->osdc, ceph_vino(inode), | 306 | ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), |
463 | &ci->i_layout, pos, &this_len, | 307 | &ci->i_layout, pos, &this_len, |
464 | ci->i_truncate_seq, | 308 | ci->i_truncate_seq, |
465 | ci->i_truncate_size, | 309 | ci->i_truncate_size, |
@@ -477,8 +321,8 @@ more: | |||
477 | 321 | ||
478 | if (read < pos - off) { | 322 | if (read < pos - off) { |
479 | dout(" zero gap %llu to %llu\n", off + read, pos); | 323 | dout(" zero gap %llu to %llu\n", off + read, pos); |
480 | zero_page_vector_range(page_off + read, | 324 | ceph_zero_page_vector_range(page_off + read, |
481 | pos - off - read, pages); | 325 | pos - off - read, pages); |
482 | } | 326 | } |
483 | pos += ret; | 327 | pos += ret; |
484 | read = pos - off; | 328 | read = pos - off; |
@@ -495,8 +339,8 @@ more: | |||
495 | /* was original extent fully inside i_size? */ | 339 | /* was original extent fully inside i_size? */ |
496 | if (pos + left <= inode->i_size) { | 340 | if (pos + left <= inode->i_size) { |
497 | dout("zero tail\n"); | 341 | dout("zero tail\n"); |
498 | zero_page_vector_range(page_off + read, len - read, | 342 | ceph_zero_page_vector_range(page_off + read, len - read, |
499 | pages); | 343 | pages); |
500 | read = len; | 344 | read = len; |
501 | goto out; | 345 | goto out; |
502 | } | 346 | } |
@@ -531,7 +375,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, | |||
531 | (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); | 375 | (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); |
532 | 376 | ||
533 | if (file->f_flags & O_DIRECT) { | 377 | if (file->f_flags & O_DIRECT) { |
534 | pages = get_direct_page_vector(data, num_pages, off, len); | 378 | pages = ceph_get_direct_page_vector(data, num_pages, off, len); |
535 | 379 | ||
536 | /* | 380 | /* |
537 | * flush any page cache pages in this range. this | 381 | * flush any page cache pages in this range. this |
@@ -552,13 +396,13 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, | |||
552 | ret = striped_read(inode, off, len, pages, num_pages, checkeof); | 396 | ret = striped_read(inode, off, len, pages, num_pages, checkeof); |
553 | 397 | ||
554 | if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) | 398 | if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) |
555 | ret = copy_page_vector_to_user(pages, data, off, ret); | 399 | ret = ceph_copy_page_vector_to_user(pages, data, off, ret); |
556 | if (ret >= 0) | 400 | if (ret >= 0) |
557 | *poff = off + ret; | 401 | *poff = off + ret; |
558 | 402 | ||
559 | done: | 403 | done: |
560 | if (file->f_flags & O_DIRECT) | 404 | if (file->f_flags & O_DIRECT) |
561 | put_page_vector(pages, num_pages); | 405 | ceph_put_page_vector(pages, num_pages); |
562 | else | 406 | else |
563 | ceph_release_page_vector(pages, num_pages); | 407 | ceph_release_page_vector(pages, num_pages); |
564 | dout("sync_read result %d\n", ret); | 408 | dout("sync_read result %d\n", ret); |
@@ -594,7 +438,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
594 | { | 438 | { |
595 | struct inode *inode = file->f_dentry->d_inode; | 439 | struct inode *inode = file->f_dentry->d_inode; |
596 | struct ceph_inode_info *ci = ceph_inode(inode); | 440 | struct ceph_inode_info *ci = ceph_inode(inode); |
597 | struct ceph_client *client = ceph_inode_to_client(inode); | 441 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
598 | struct ceph_osd_request *req; | 442 | struct ceph_osd_request *req; |
599 | struct page **pages; | 443 | struct page **pages; |
600 | int num_pages; | 444 | int num_pages; |
@@ -642,7 +486,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
642 | */ | 486 | */ |
643 | more: | 487 | more: |
644 | len = left; | 488 | len = left; |
645 | req = ceph_osdc_new_request(&client->osdc, &ci->i_layout, | 489 | req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, |
646 | ceph_vino(inode), pos, &len, | 490 | ceph_vino(inode), pos, &len, |
647 | CEPH_OSD_OP_WRITE, flags, | 491 | CEPH_OSD_OP_WRITE, flags, |
648 | ci->i_snap_realm->cached_context, | 492 | ci->i_snap_realm->cached_context, |
@@ -655,7 +499,7 @@ more: | |||
655 | num_pages = calc_pages_for(pos, len); | 499 | num_pages = calc_pages_for(pos, len); |
656 | 500 | ||
657 | if (file->f_flags & O_DIRECT) { | 501 | if (file->f_flags & O_DIRECT) { |
658 | pages = get_direct_page_vector(data, num_pages, pos, len); | 502 | pages = ceph_get_direct_page_vector(data, num_pages, pos, len); |
659 | if (IS_ERR(pages)) { | 503 | if (IS_ERR(pages)) { |
660 | ret = PTR_ERR(pages); | 504 | ret = PTR_ERR(pages); |
661 | goto out; | 505 | goto out; |
@@ -673,7 +517,7 @@ more: | |||
673 | ret = PTR_ERR(pages); | 517 | ret = PTR_ERR(pages); |
674 | goto out; | 518 | goto out; |
675 | } | 519 | } |
676 | ret = copy_user_to_page_vector(pages, data, pos, len); | 520 | ret = ceph_copy_user_to_page_vector(pages, data, pos, len); |
677 | if (ret < 0) { | 521 | if (ret < 0) { |
678 | ceph_release_page_vector(pages, num_pages); | 522 | ceph_release_page_vector(pages, num_pages); |
679 | goto out; | 523 | goto out; |
@@ -689,7 +533,7 @@ more: | |||
689 | req->r_num_pages = num_pages; | 533 | req->r_num_pages = num_pages; |
690 | req->r_inode = inode; | 534 | req->r_inode = inode; |
691 | 535 | ||
692 | ret = ceph_osdc_start_request(&client->osdc, req, false); | 536 | ret = ceph_osdc_start_request(&fsc->client->osdc, req, false); |
693 | if (!ret) { | 537 | if (!ret) { |
694 | if (req->r_safe_callback) { | 538 | if (req->r_safe_callback) { |
695 | /* | 539 | /* |
@@ -701,11 +545,11 @@ more: | |||
701 | spin_unlock(&ci->i_unsafe_lock); | 545 | spin_unlock(&ci->i_unsafe_lock); |
702 | ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); | 546 | ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); |
703 | } | 547 | } |
704 | ret = ceph_osdc_wait_request(&client->osdc, req); | 548 | ret = ceph_osdc_wait_request(&fsc->client->osdc, req); |
705 | } | 549 | } |
706 | 550 | ||
707 | if (file->f_flags & O_DIRECT) | 551 | if (file->f_flags & O_DIRECT) |
708 | put_page_vector(pages, num_pages); | 552 | ceph_put_page_vector(pages, num_pages); |
709 | else if (file->f_flags & O_SYNC) | 553 | else if (file->f_flags & O_SYNC) |
710 | ceph_release_page_vector(pages, num_pages); | 554 | ceph_release_page_vector(pages, num_pages); |
711 | 555 | ||
@@ -814,7 +658,8 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
814 | struct ceph_file_info *fi = file->private_data; | 658 | struct ceph_file_info *fi = file->private_data; |
815 | struct inode *inode = file->f_dentry->d_inode; | 659 | struct inode *inode = file->f_dentry->d_inode; |
816 | struct ceph_inode_info *ci = ceph_inode(inode); | 660 | struct ceph_inode_info *ci = ceph_inode(inode); |
817 | struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; | 661 | struct ceph_osd_client *osdc = |
662 | &ceph_sb_to_client(inode->i_sb)->client->osdc; | ||
818 | loff_t endoff = pos + iov->iov_len; | 663 | loff_t endoff = pos + iov->iov_len; |
819 | int want, got = 0; | 664 | int want, got = 0; |
820 | int ret, err; | 665 | int ret, err; |