diff options
| -rw-r--r-- | MAINTAINERS | 7 | ||||
| -rw-r--r-- | drivers/block/rbd.c | 193 | ||||
| -rw-r--r-- | fs/ceph/acl.c | 14 | ||||
| -rw-r--r-- | fs/ceph/addr.c | 19 | ||||
| -rw-r--r-- | fs/ceph/caps.c | 127 | ||||
| -rw-r--r-- | fs/ceph/dir.c | 33 | ||||
| -rw-r--r-- | fs/ceph/file.c | 37 | ||||
| -rw-r--r-- | fs/ceph/inode.c | 41 | ||||
| -rw-r--r-- | fs/ceph/mds_client.c | 127 | ||||
| -rw-r--r-- | fs/ceph/mds_client.h | 2 | ||||
| -rw-r--r-- | fs/ceph/snap.c | 54 | ||||
| -rw-r--r-- | fs/ceph/super.c | 4 | ||||
| -rw-r--r-- | fs/ceph/super.h | 5 | ||||
| -rw-r--r-- | include/linux/ceph/ceph_fs.h | 37 | ||||
| -rw-r--r-- | include/linux/ceph/libceph.h | 3 | ||||
| -rw-r--r-- | include/linux/ceph/messenger.h | 4 | ||||
| -rw-r--r-- | include/linux/ceph/mon_client.h | 9 | ||||
| -rw-r--r-- | net/ceph/ceph_common.c | 16 | ||||
| -rw-r--r-- | net/ceph/ceph_strings.c | 14 | ||||
| -rw-r--r-- | net/ceph/debugfs.c | 2 | ||||
| -rw-r--r-- | net/ceph/messenger.c | 14 | ||||
| -rw-r--r-- | net/ceph/mon_client.c | 139 | ||||
| -rw-r--r-- | net/ceph/osd_client.c | 31 |
23 files changed, 444 insertions, 488 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 1921ed58d1a0..7cfcee4e2bea 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
| @@ -2433,7 +2433,8 @@ F: arch/powerpc/oprofile/*cell* | |||
| 2433 | F: arch/powerpc/platforms/cell/ | 2433 | F: arch/powerpc/platforms/cell/ |
| 2434 | 2434 | ||
| 2435 | CEPH DISTRIBUTED FILE SYSTEM CLIENT | 2435 | CEPH DISTRIBUTED FILE SYSTEM CLIENT |
| 2436 | M: Sage Weil <sage@inktank.com> | 2436 | M: Yan, Zheng <zyan@redhat.com> |
| 2437 | M: Sage Weil <sage@redhat.com> | ||
| 2437 | L: ceph-devel@vger.kernel.org | 2438 | L: ceph-devel@vger.kernel.org |
| 2438 | W: http://ceph.com/ | 2439 | W: http://ceph.com/ |
| 2439 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git | 2440 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git |
| @@ -7998,8 +7999,8 @@ S: Supported | |||
| 7998 | F: drivers/net/wireless/ath/wcn36xx/ | 7999 | F: drivers/net/wireless/ath/wcn36xx/ |
| 7999 | 8000 | ||
| 8000 | RADOS BLOCK DEVICE (RBD) | 8001 | RADOS BLOCK DEVICE (RBD) |
| 8001 | M: Yehuda Sadeh <yehuda@inktank.com> | 8002 | M: Ilya Dryomov <idryomov@gmail.com> |
| 8002 | M: Sage Weil <sage@inktank.com> | 8003 | M: Sage Weil <sage@redhat.com> |
| 8003 | M: Alex Elder <elder@kernel.org> | 8004 | M: Alex Elder <elder@kernel.org> |
| 8004 | M: ceph-devel@vger.kernel.org | 8005 | M: ceph-devel@vger.kernel.org |
| 8005 | W: http://ceph.com/ | 8006 | W: http://ceph.com/ |
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 8a86b62466f7..b40af3203089 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c | |||
| @@ -38,6 +38,7 @@ | |||
| 38 | #include <linux/kernel.h> | 38 | #include <linux/kernel.h> |
| 39 | #include <linux/device.h> | 39 | #include <linux/device.h> |
| 40 | #include <linux/module.h> | 40 | #include <linux/module.h> |
| 41 | #include <linux/blk-mq.h> | ||
| 41 | #include <linux/fs.h> | 42 | #include <linux/fs.h> |
| 42 | #include <linux/blkdev.h> | 43 | #include <linux/blkdev.h> |
| 43 | #include <linux/slab.h> | 44 | #include <linux/slab.h> |
| @@ -340,9 +341,7 @@ struct rbd_device { | |||
| 340 | 341 | ||
| 341 | char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ | 342 | char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ |
| 342 | 343 | ||
| 343 | struct list_head rq_queue; /* incoming rq queue */ | ||
| 344 | spinlock_t lock; /* queue, flags, open_count */ | 344 | spinlock_t lock; /* queue, flags, open_count */ |
| 345 | struct work_struct rq_work; | ||
| 346 | 345 | ||
| 347 | struct rbd_image_header header; | 346 | struct rbd_image_header header; |
| 348 | unsigned long flags; /* possibly lock protected */ | 347 | unsigned long flags; /* possibly lock protected */ |
| @@ -360,6 +359,9 @@ struct rbd_device { | |||
| 360 | atomic_t parent_ref; | 359 | atomic_t parent_ref; |
| 361 | struct rbd_device *parent; | 360 | struct rbd_device *parent; |
| 362 | 361 | ||
| 362 | /* Block layer tags. */ | ||
| 363 | struct blk_mq_tag_set tag_set; | ||
| 364 | |||
| 363 | /* protects updating the header */ | 365 | /* protects updating the header */ |
| 364 | struct rw_semaphore header_rwsem; | 366 | struct rw_semaphore header_rwsem; |
| 365 | 367 | ||
| @@ -1817,7 +1819,8 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, | |||
| 1817 | 1819 | ||
| 1818 | /* | 1820 | /* |
| 1819 | * We support a 64-bit length, but ultimately it has to be | 1821 | * We support a 64-bit length, but ultimately it has to be |
| 1820 | * passed to blk_end_request(), which takes an unsigned int. | 1822 | * passed to the block layer, which just supports a 32-bit |
| 1823 | * length field. | ||
| 1821 | */ | 1824 | */ |
| 1822 | obj_request->xferred = osd_req->r_reply_op_len[0]; | 1825 | obj_request->xferred = osd_req->r_reply_op_len[0]; |
| 1823 | rbd_assert(obj_request->xferred < (u64)UINT_MAX); | 1826 | rbd_assert(obj_request->xferred < (u64)UINT_MAX); |
| @@ -2275,7 +2278,10 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request) | |||
| 2275 | more = obj_request->which < img_request->obj_request_count - 1; | 2278 | more = obj_request->which < img_request->obj_request_count - 1; |
| 2276 | } else { | 2279 | } else { |
| 2277 | rbd_assert(img_request->rq != NULL); | 2280 | rbd_assert(img_request->rq != NULL); |
| 2278 | more = blk_end_request(img_request->rq, result, xferred); | 2281 | |
| 2282 | more = blk_update_request(img_request->rq, result, xferred); | ||
| 2283 | if (!more) | ||
| 2284 | __blk_mq_end_request(img_request->rq, result); | ||
| 2279 | } | 2285 | } |
| 2280 | 2286 | ||
| 2281 | return more; | 2287 | return more; |
| @@ -3304,8 +3310,10 @@ out: | |||
| 3304 | return ret; | 3310 | return ret; |
| 3305 | } | 3311 | } |
| 3306 | 3312 | ||
| 3307 | static void rbd_handle_request(struct rbd_device *rbd_dev, struct request *rq) | 3313 | static void rbd_queue_workfn(struct work_struct *work) |
| 3308 | { | 3314 | { |
| 3315 | struct request *rq = blk_mq_rq_from_pdu(work); | ||
| 3316 | struct rbd_device *rbd_dev = rq->q->queuedata; | ||
| 3309 | struct rbd_img_request *img_request; | 3317 | struct rbd_img_request *img_request; |
| 3310 | struct ceph_snap_context *snapc = NULL; | 3318 | struct ceph_snap_context *snapc = NULL; |
| 3311 | u64 offset = (u64)blk_rq_pos(rq) << SECTOR_SHIFT; | 3319 | u64 offset = (u64)blk_rq_pos(rq) << SECTOR_SHIFT; |
| @@ -3314,6 +3322,13 @@ static void rbd_handle_request(struct rbd_device *rbd_dev, struct request *rq) | |||
| 3314 | u64 mapping_size; | 3322 | u64 mapping_size; |
| 3315 | int result; | 3323 | int result; |
| 3316 | 3324 | ||
| 3325 | if (rq->cmd_type != REQ_TYPE_FS) { | ||
| 3326 | dout("%s: non-fs request type %d\n", __func__, | ||
| 3327 | (int) rq->cmd_type); | ||
| 3328 | result = -EIO; | ||
| 3329 | goto err; | ||
| 3330 | } | ||
| 3331 | |||
| 3317 | if (rq->cmd_flags & REQ_DISCARD) | 3332 | if (rq->cmd_flags & REQ_DISCARD) |
| 3318 | op_type = OBJ_OP_DISCARD; | 3333 | op_type = OBJ_OP_DISCARD; |
| 3319 | else if (rq->cmd_flags & REQ_WRITE) | 3334 | else if (rq->cmd_flags & REQ_WRITE) |
| @@ -3359,6 +3374,8 @@ static void rbd_handle_request(struct rbd_device *rbd_dev, struct request *rq) | |||
| 3359 | goto err_rq; /* Shouldn't happen */ | 3374 | goto err_rq; /* Shouldn't happen */ |
| 3360 | } | 3375 | } |
| 3361 | 3376 | ||
| 3377 | blk_mq_start_request(rq); | ||
| 3378 | |||
| 3362 | down_read(&rbd_dev->header_rwsem); | 3379 | down_read(&rbd_dev->header_rwsem); |
| 3363 | mapping_size = rbd_dev->mapping.size; | 3380 | mapping_size = rbd_dev->mapping.size; |
| 3364 | if (op_type != OBJ_OP_READ) { | 3381 | if (op_type != OBJ_OP_READ) { |
| @@ -3404,53 +3421,18 @@ err_rq: | |||
| 3404 | rbd_warn(rbd_dev, "%s %llx at %llx result %d", | 3421 | rbd_warn(rbd_dev, "%s %llx at %llx result %d", |
| 3405 | obj_op_name(op_type), length, offset, result); | 3422 | obj_op_name(op_type), length, offset, result); |
| 3406 | ceph_put_snap_context(snapc); | 3423 | ceph_put_snap_context(snapc); |
| 3407 | blk_end_request_all(rq, result); | 3424 | err: |
| 3425 | blk_mq_end_request(rq, result); | ||
| 3408 | } | 3426 | } |
| 3409 | 3427 | ||
| 3410 | static void rbd_request_workfn(struct work_struct *work) | 3428 | static int rbd_queue_rq(struct blk_mq_hw_ctx *hctx, |
| 3429 | const struct blk_mq_queue_data *bd) | ||
| 3411 | { | 3430 | { |
| 3412 | struct rbd_device *rbd_dev = | 3431 | struct request *rq = bd->rq; |
| 3413 | container_of(work, struct rbd_device, rq_work); | 3432 | struct work_struct *work = blk_mq_rq_to_pdu(rq); |
| 3414 | struct request *rq, *next; | ||
| 3415 | LIST_HEAD(requests); | ||
| 3416 | |||
| 3417 | spin_lock_irq(&rbd_dev->lock); /* rq->q->queue_lock */ | ||
| 3418 | list_splice_init(&rbd_dev->rq_queue, &requests); | ||
| 3419 | spin_unlock_irq(&rbd_dev->lock); | ||
| 3420 | 3433 | ||
| 3421 | list_for_each_entry_safe(rq, next, &requests, queuelist) { | 3434 | queue_work(rbd_wq, work); |
| 3422 | list_del_init(&rq->queuelist); | 3435 | return BLK_MQ_RQ_QUEUE_OK; |
| 3423 | rbd_handle_request(rbd_dev, rq); | ||
| 3424 | } | ||
| 3425 | } | ||
| 3426 | |||
| 3427 | /* | ||
| 3428 | * Called with q->queue_lock held and interrupts disabled, possibly on | ||
| 3429 | * the way to schedule(). Do not sleep here! | ||
| 3430 | */ | ||
| 3431 | static void rbd_request_fn(struct request_queue *q) | ||
| 3432 | { | ||
| 3433 | struct rbd_device *rbd_dev = q->queuedata; | ||
| 3434 | struct request *rq; | ||
| 3435 | int queued = 0; | ||
| 3436 | |||
| 3437 | rbd_assert(rbd_dev); | ||
| 3438 | |||
| 3439 | while ((rq = blk_fetch_request(q))) { | ||
| 3440 | /* Ignore any non-FS requests that filter through. */ | ||
| 3441 | if (rq->cmd_type != REQ_TYPE_FS) { | ||
| 3442 | dout("%s: non-fs request type %d\n", __func__, | ||
| 3443 | (int) rq->cmd_type); | ||
| 3444 | __blk_end_request_all(rq, 0); | ||
| 3445 | continue; | ||
| 3446 | } | ||
| 3447 | |||
| 3448 | list_add_tail(&rq->queuelist, &rbd_dev->rq_queue); | ||
| 3449 | queued++; | ||
| 3450 | } | ||
| 3451 | |||
| 3452 | if (queued) | ||
| 3453 | queue_work(rbd_wq, &rbd_dev->rq_work); | ||
| 3454 | } | 3436 | } |
| 3455 | 3437 | ||
| 3456 | /* | 3438 | /* |
| @@ -3511,6 +3493,7 @@ static void rbd_free_disk(struct rbd_device *rbd_dev) | |||
| 3511 | del_gendisk(disk); | 3493 | del_gendisk(disk); |
| 3512 | if (disk->queue) | 3494 | if (disk->queue) |
| 3513 | blk_cleanup_queue(disk->queue); | 3495 | blk_cleanup_queue(disk->queue); |
| 3496 | blk_mq_free_tag_set(&rbd_dev->tag_set); | ||
| 3514 | } | 3497 | } |
| 3515 | put_disk(disk); | 3498 | put_disk(disk); |
| 3516 | } | 3499 | } |
| @@ -3694,7 +3677,7 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev) | |||
| 3694 | 3677 | ||
| 3695 | ret = rbd_dev_header_info(rbd_dev); | 3678 | ret = rbd_dev_header_info(rbd_dev); |
| 3696 | if (ret) | 3679 | if (ret) |
| 3697 | return ret; | 3680 | goto out; |
| 3698 | 3681 | ||
| 3699 | /* | 3682 | /* |
| 3700 | * If there is a parent, see if it has disappeared due to the | 3683 | * If there is a parent, see if it has disappeared due to the |
| @@ -3703,30 +3686,46 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev) | |||
| 3703 | if (rbd_dev->parent) { | 3686 | if (rbd_dev->parent) { |
| 3704 | ret = rbd_dev_v2_parent_info(rbd_dev); | 3687 | ret = rbd_dev_v2_parent_info(rbd_dev); |
| 3705 | if (ret) | 3688 | if (ret) |
| 3706 | return ret; | 3689 | goto out; |
| 3707 | } | 3690 | } |
| 3708 | 3691 | ||
| 3709 | if (rbd_dev->spec->snap_id == CEPH_NOSNAP) { | 3692 | if (rbd_dev->spec->snap_id == CEPH_NOSNAP) { |
| 3710 | if (rbd_dev->mapping.size != rbd_dev->header.image_size) | 3693 | rbd_dev->mapping.size = rbd_dev->header.image_size; |
| 3711 | rbd_dev->mapping.size = rbd_dev->header.image_size; | ||
| 3712 | } else { | 3694 | } else { |
| 3713 | /* validate mapped snapshot's EXISTS flag */ | 3695 | /* validate mapped snapshot's EXISTS flag */ |
| 3714 | rbd_exists_validate(rbd_dev); | 3696 | rbd_exists_validate(rbd_dev); |
| 3715 | } | 3697 | } |
| 3716 | 3698 | ||
| 3699 | out: | ||
| 3717 | up_write(&rbd_dev->header_rwsem); | 3700 | up_write(&rbd_dev->header_rwsem); |
| 3718 | 3701 | if (!ret && mapping_size != rbd_dev->mapping.size) | |
| 3719 | if (mapping_size != rbd_dev->mapping.size) | ||
| 3720 | rbd_dev_update_size(rbd_dev); | 3702 | rbd_dev_update_size(rbd_dev); |
| 3721 | 3703 | ||
| 3704 | return ret; | ||
| 3705 | } | ||
| 3706 | |||
| 3707 | static int rbd_init_request(void *data, struct request *rq, | ||
| 3708 | unsigned int hctx_idx, unsigned int request_idx, | ||
| 3709 | unsigned int numa_node) | ||
| 3710 | { | ||
| 3711 | struct work_struct *work = blk_mq_rq_to_pdu(rq); | ||
| 3712 | |||
| 3713 | INIT_WORK(work, rbd_queue_workfn); | ||
| 3722 | return 0; | 3714 | return 0; |
| 3723 | } | 3715 | } |
| 3724 | 3716 | ||
| 3717 | static struct blk_mq_ops rbd_mq_ops = { | ||
| 3718 | .queue_rq = rbd_queue_rq, | ||
| 3719 | .map_queue = blk_mq_map_queue, | ||
| 3720 | .init_request = rbd_init_request, | ||
| 3721 | }; | ||
| 3722 | |||
| 3725 | static int rbd_init_disk(struct rbd_device *rbd_dev) | 3723 | static int rbd_init_disk(struct rbd_device *rbd_dev) |
| 3726 | { | 3724 | { |
| 3727 | struct gendisk *disk; | 3725 | struct gendisk *disk; |
| 3728 | struct request_queue *q; | 3726 | struct request_queue *q; |
| 3729 | u64 segment_size; | 3727 | u64 segment_size; |
| 3728 | int err; | ||
| 3730 | 3729 | ||
| 3731 | /* create gendisk info */ | 3730 | /* create gendisk info */ |
| 3732 | disk = alloc_disk(single_major ? | 3731 | disk = alloc_disk(single_major ? |
| @@ -3744,10 +3743,25 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) | |||
| 3744 | disk->fops = &rbd_bd_ops; | 3743 | disk->fops = &rbd_bd_ops; |
| 3745 | disk->private_data = rbd_dev; | 3744 | disk->private_data = rbd_dev; |
| 3746 | 3745 | ||
| 3747 | q = blk_init_queue(rbd_request_fn, &rbd_dev->lock); | 3746 | memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set)); |
| 3748 | if (!q) | 3747 | rbd_dev->tag_set.ops = &rbd_mq_ops; |
| 3748 | rbd_dev->tag_set.queue_depth = BLKDEV_MAX_RQ; | ||
| 3749 | rbd_dev->tag_set.numa_node = NUMA_NO_NODE; | ||
| 3750 | rbd_dev->tag_set.flags = | ||
| 3751 | BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; | ||
| 3752 | rbd_dev->tag_set.nr_hw_queues = 1; | ||
| 3753 | rbd_dev->tag_set.cmd_size = sizeof(struct work_struct); | ||
| 3754 | |||
| 3755 | err = blk_mq_alloc_tag_set(&rbd_dev->tag_set); | ||
| 3756 | if (err) | ||
| 3749 | goto out_disk; | 3757 | goto out_disk; |
| 3750 | 3758 | ||
| 3759 | q = blk_mq_init_queue(&rbd_dev->tag_set); | ||
| 3760 | if (IS_ERR(q)) { | ||
| 3761 | err = PTR_ERR(q); | ||
| 3762 | goto out_tag_set; | ||
| 3763 | } | ||
| 3764 | |||
| 3751 | /* We use the default size, but let's be explicit about it. */ | 3765 | /* We use the default size, but let's be explicit about it. */ |
| 3752 | blk_queue_physical_block_size(q, SECTOR_SIZE); | 3766 | blk_queue_physical_block_size(q, SECTOR_SIZE); |
| 3753 | 3767 | ||
| @@ -3773,10 +3787,11 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) | |||
| 3773 | rbd_dev->disk = disk; | 3787 | rbd_dev->disk = disk; |
| 3774 | 3788 | ||
| 3775 | return 0; | 3789 | return 0; |
| 3790 | out_tag_set: | ||
| 3791 | blk_mq_free_tag_set(&rbd_dev->tag_set); | ||
| 3776 | out_disk: | 3792 | out_disk: |
| 3777 | put_disk(disk); | 3793 | put_disk(disk); |
| 3778 | 3794 | return err; | |
| 3779 | return -ENOMEM; | ||
| 3780 | } | 3795 | } |
| 3781 | 3796 | ||
| 3782 | /* | 3797 | /* |
| @@ -4033,8 +4048,6 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, | |||
| 4033 | return NULL; | 4048 | return NULL; |
| 4034 | 4049 | ||
| 4035 | spin_lock_init(&rbd_dev->lock); | 4050 | spin_lock_init(&rbd_dev->lock); |
| 4036 | INIT_LIST_HEAD(&rbd_dev->rq_queue); | ||
| 4037 | INIT_WORK(&rbd_dev->rq_work, rbd_request_workfn); | ||
| 4038 | rbd_dev->flags = 0; | 4051 | rbd_dev->flags = 0; |
| 4039 | atomic_set(&rbd_dev->parent_ref, 0); | 4052 | atomic_set(&rbd_dev->parent_ref, 0); |
| 4040 | INIT_LIST_HEAD(&rbd_dev->node); | 4053 | INIT_LIST_HEAD(&rbd_dev->node); |
| @@ -4274,32 +4287,22 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) | |||
| 4274 | } | 4287 | } |
| 4275 | 4288 | ||
| 4276 | /* | 4289 | /* |
| 4277 | * We always update the parent overlap. If it's zero we | 4290 | * We always update the parent overlap. If it's zero we issue |
| 4278 | * treat it specially. | 4291 | * a warning, as we will proceed as if there was no parent. |
| 4279 | */ | 4292 | */ |
| 4280 | rbd_dev->parent_overlap = overlap; | ||
| 4281 | if (!overlap) { | 4293 | if (!overlap) { |
| 4282 | |||
| 4283 | /* A null parent_spec indicates it's the initial probe */ | ||
| 4284 | |||
| 4285 | if (parent_spec) { | 4294 | if (parent_spec) { |
| 4286 | /* | 4295 | /* refresh, careful to warn just once */ |
| 4287 | * The overlap has become zero, so the clone | 4296 | if (rbd_dev->parent_overlap) |
| 4288 | * must have been resized down to 0 at some | 4297 | rbd_warn(rbd_dev, |
| 4289 | * point. Treat this the same as a flatten. | 4298 | "clone now standalone (overlap became 0)"); |
| 4290 | */ | ||
| 4291 | rbd_dev_parent_put(rbd_dev); | ||
| 4292 | pr_info("%s: clone image now standalone\n", | ||
| 4293 | rbd_dev->disk->disk_name); | ||
| 4294 | } else { | 4299 | } else { |
| 4295 | /* | 4300 | /* initial probe */ |
| 4296 | * For the initial probe, if we find the | 4301 | rbd_warn(rbd_dev, "clone is standalone (overlap 0)"); |
| 4297 | * overlap is zero we just pretend there was | ||
| 4298 | * no parent image. | ||
| 4299 | */ | ||
| 4300 | rbd_warn(rbd_dev, "ignoring parent with overlap 0"); | ||
| 4301 | } | 4302 | } |
| 4302 | } | 4303 | } |
| 4304 | rbd_dev->parent_overlap = overlap; | ||
| 4305 | |||
| 4303 | out: | 4306 | out: |
| 4304 | ret = 0; | 4307 | ret = 0; |
| 4305 | out_err: | 4308 | out_err: |
| @@ -4771,36 +4774,6 @@ static inline size_t next_token(const char **buf) | |||
| 4771 | } | 4774 | } |
| 4772 | 4775 | ||
| 4773 | /* | 4776 | /* |
| 4774 | * Finds the next token in *buf, and if the provided token buffer is | ||
| 4775 | * big enough, copies the found token into it. The result, if | ||
| 4776 | * copied, is guaranteed to be terminated with '\0'. Note that *buf | ||
| 4777 | * must be terminated with '\0' on entry. | ||
| 4778 | * | ||
| 4779 | * Returns the length of the token found (not including the '\0'). | ||
| 4780 | * Return value will be 0 if no token is found, and it will be >= | ||
| 4781 | * token_size if the token would not fit. | ||
| 4782 | * | ||
| 4783 | * The *buf pointer will be updated to point beyond the end of the | ||
| 4784 | * found token. Note that this occurs even if the token buffer is | ||
| 4785 | * too small to hold it. | ||
| 4786 | */ | ||
| 4787 | static inline size_t copy_token(const char **buf, | ||
| 4788 | char *token, | ||
| 4789 | size_t token_size) | ||
| 4790 | { | ||
| 4791 | size_t len; | ||
| 4792 | |||
| 4793 | len = next_token(buf); | ||
| 4794 | if (len < token_size) { | ||
| 4795 | memcpy(token, *buf, len); | ||
| 4796 | *(token + len) = '\0'; | ||
| 4797 | } | ||
| 4798 | *buf += len; | ||
| 4799 | |||
| 4800 | return len; | ||
| 4801 | } | ||
| 4802 | |||
| 4803 | /* | ||
| 4804 | * Finds the next token in *buf, dynamically allocates a buffer big | 4777 | * Finds the next token in *buf, dynamically allocates a buffer big |
| 4805 | * enough to hold a copy of it, and copies the token into the new | 4778 | * enough to hold a copy of it, and copies the token into the new |
| 4806 | * buffer. The copy is guaranteed to be terminated with '\0'. Note | 4779 | * buffer. The copy is guaranteed to be terminated with '\0'. Note |
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 5bd853ba44ff..64fa248343f6 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c | |||
| @@ -40,20 +40,6 @@ static inline void ceph_set_cached_acl(struct inode *inode, | |||
| 40 | spin_unlock(&ci->i_ceph_lock); | 40 | spin_unlock(&ci->i_ceph_lock); |
| 41 | } | 41 | } |
| 42 | 42 | ||
| 43 | static inline struct posix_acl *ceph_get_cached_acl(struct inode *inode, | ||
| 44 | int type) | ||
| 45 | { | ||
| 46 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
| 47 | struct posix_acl *acl = ACL_NOT_CACHED; | ||
| 48 | |||
| 49 | spin_lock(&ci->i_ceph_lock); | ||
| 50 | if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0)) | ||
| 51 | acl = get_cached_acl(inode, type); | ||
| 52 | spin_unlock(&ci->i_ceph_lock); | ||
| 53 | |||
| 54 | return acl; | ||
| 55 | } | ||
| 56 | |||
| 57 | struct posix_acl *ceph_get_acl(struct inode *inode, int type) | 43 | struct posix_acl *ceph_get_acl(struct inode *inode, int type) |
| 58 | { | 44 | { |
| 59 | int size; | 45 | int size; |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 24be059fd1f8..fd5599d32362 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
| @@ -196,17 +196,22 @@ static int readpage_nounlock(struct file *filp, struct page *page) | |||
| 196 | u64 len = PAGE_CACHE_SIZE; | 196 | u64 len = PAGE_CACHE_SIZE; |
| 197 | 197 | ||
| 198 | if (off >= i_size_read(inode)) { | 198 | if (off >= i_size_read(inode)) { |
| 199 | zero_user_segment(page, err, PAGE_CACHE_SIZE); | 199 | zero_user_segment(page, 0, PAGE_CACHE_SIZE); |
| 200 | SetPageUptodate(page); | 200 | SetPageUptodate(page); |
| 201 | return 0; | 201 | return 0; |
| 202 | } | 202 | } |
| 203 | 203 | ||
| 204 | /* | 204 | if (ci->i_inline_version != CEPH_INLINE_NONE) { |
| 205 | * Uptodate inline data should have been added into page cache | 205 | /* |
| 206 | * while getting Fcr caps. | 206 | * Uptodate inline data should have been added |
| 207 | */ | 207 | * into page cache while getting Fcr caps. |
| 208 | if (ci->i_inline_version != CEPH_INLINE_NONE) | 208 | */ |
| 209 | return -EINVAL; | 209 | if (off == 0) |
| 210 | return -EINVAL; | ||
| 211 | zero_user_segment(page, 0, PAGE_CACHE_SIZE); | ||
| 212 | SetPageUptodate(page); | ||
| 213 | return 0; | ||
| 214 | } | ||
| 210 | 215 | ||
| 211 | err = ceph_readpage_from_fscache(inode, page); | 216 | err = ceph_readpage_from_fscache(inode, page); |
| 212 | if (err == 0) | 217 | if (err == 0) |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index b93c631c6c87..8172775428a0 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
| @@ -577,7 +577,6 @@ void ceph_add_cap(struct inode *inode, | |||
| 577 | struct ceph_snap_realm *realm = ceph_lookup_snap_realm(mdsc, | 577 | struct ceph_snap_realm *realm = ceph_lookup_snap_realm(mdsc, |
| 578 | realmino); | 578 | realmino); |
| 579 | if (realm) { | 579 | if (realm) { |
| 580 | ceph_get_snap_realm(mdsc, realm); | ||
| 581 | spin_lock(&realm->inodes_with_caps_lock); | 580 | spin_lock(&realm->inodes_with_caps_lock); |
| 582 | ci->i_snap_realm = realm; | 581 | ci->i_snap_realm = realm; |
| 583 | list_add(&ci->i_snap_realm_item, | 582 | list_add(&ci->i_snap_realm_item, |
| @@ -1451,8 +1450,8 @@ static int __mark_caps_flushing(struct inode *inode, | |||
| 1451 | spin_lock(&mdsc->cap_dirty_lock); | 1450 | spin_lock(&mdsc->cap_dirty_lock); |
| 1452 | list_del_init(&ci->i_dirty_item); | 1451 | list_del_init(&ci->i_dirty_item); |
| 1453 | 1452 | ||
| 1454 | ci->i_cap_flush_seq = ++mdsc->cap_flush_seq; | ||
| 1455 | if (list_empty(&ci->i_flushing_item)) { | 1453 | if (list_empty(&ci->i_flushing_item)) { |
| 1454 | ci->i_cap_flush_seq = ++mdsc->cap_flush_seq; | ||
| 1456 | list_add_tail(&ci->i_flushing_item, &session->s_cap_flushing); | 1455 | list_add_tail(&ci->i_flushing_item, &session->s_cap_flushing); |
| 1457 | mdsc->num_cap_flushing++; | 1456 | mdsc->num_cap_flushing++; |
| 1458 | dout(" inode %p now flushing seq %lld\n", inode, | 1457 | dout(" inode %p now flushing seq %lld\n", inode, |
| @@ -2073,17 +2072,16 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got) | |||
| 2073 | * requested from the MDS. | 2072 | * requested from the MDS. |
| 2074 | */ | 2073 | */ |
| 2075 | static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, | 2074 | static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, |
| 2076 | loff_t endoff, int *got, struct page **pinned_page, | 2075 | loff_t endoff, int *got, int *check_max, int *err) |
| 2077 | int *check_max, int *err) | ||
| 2078 | { | 2076 | { |
| 2079 | struct inode *inode = &ci->vfs_inode; | 2077 | struct inode *inode = &ci->vfs_inode; |
| 2080 | int ret = 0; | 2078 | int ret = 0; |
| 2081 | int have, implemented, _got = 0; | 2079 | int have, implemented; |
| 2082 | int file_wanted; | 2080 | int file_wanted; |
| 2083 | 2081 | ||
| 2084 | dout("get_cap_refs %p need %s want %s\n", inode, | 2082 | dout("get_cap_refs %p need %s want %s\n", inode, |
| 2085 | ceph_cap_string(need), ceph_cap_string(want)); | 2083 | ceph_cap_string(need), ceph_cap_string(want)); |
| 2086 | again: | 2084 | |
| 2087 | spin_lock(&ci->i_ceph_lock); | 2085 | spin_lock(&ci->i_ceph_lock); |
| 2088 | 2086 | ||
| 2089 | /* make sure file is actually open */ | 2087 | /* make sure file is actually open */ |
| @@ -2138,50 +2136,34 @@ again: | |||
| 2138 | inode, ceph_cap_string(have), ceph_cap_string(not), | 2136 | inode, ceph_cap_string(have), ceph_cap_string(not), |
| 2139 | ceph_cap_string(revoking)); | 2137 | ceph_cap_string(revoking)); |
| 2140 | if ((revoking & not) == 0) { | 2138 | if ((revoking & not) == 0) { |
| 2141 | _got = need | (have & want); | 2139 | *got = need | (have & want); |
| 2142 | __take_cap_refs(ci, _got); | 2140 | __take_cap_refs(ci, *got); |
| 2143 | ret = 1; | 2141 | ret = 1; |
| 2144 | } | 2142 | } |
| 2145 | } else { | 2143 | } else { |
| 2144 | int session_readonly = false; | ||
| 2145 | if ((need & CEPH_CAP_FILE_WR) && ci->i_auth_cap) { | ||
| 2146 | struct ceph_mds_session *s = ci->i_auth_cap->session; | ||
| 2147 | spin_lock(&s->s_cap_lock); | ||
| 2148 | session_readonly = s->s_readonly; | ||
| 2149 | spin_unlock(&s->s_cap_lock); | ||
| 2150 | } | ||
| 2151 | if (session_readonly) { | ||
| 2152 | dout("get_cap_refs %p needed %s but mds%d readonly\n", | ||
| 2153 | inode, ceph_cap_string(need), ci->i_auth_cap->mds); | ||
| 2154 | *err = -EROFS; | ||
| 2155 | ret = 1; | ||
| 2156 | goto out_unlock; | ||
| 2157 | } | ||
| 2158 | |||
| 2146 | dout("get_cap_refs %p have %s needed %s\n", inode, | 2159 | dout("get_cap_refs %p have %s needed %s\n", inode, |
| 2147 | ceph_cap_string(have), ceph_cap_string(need)); | 2160 | ceph_cap_string(have), ceph_cap_string(need)); |
| 2148 | } | 2161 | } |
| 2149 | out_unlock: | 2162 | out_unlock: |
| 2150 | spin_unlock(&ci->i_ceph_lock); | 2163 | spin_unlock(&ci->i_ceph_lock); |
| 2151 | 2164 | ||
| 2152 | if (ci->i_inline_version != CEPH_INLINE_NONE && | ||
| 2153 | (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) && | ||
| 2154 | i_size_read(inode) > 0) { | ||
| 2155 | int ret1; | ||
| 2156 | struct page *page = find_get_page(inode->i_mapping, 0); | ||
| 2157 | if (page) { | ||
| 2158 | if (PageUptodate(page)) { | ||
| 2159 | *pinned_page = page; | ||
| 2160 | goto out; | ||
| 2161 | } | ||
| 2162 | page_cache_release(page); | ||
| 2163 | } | ||
| 2164 | /* | ||
| 2165 | * drop cap refs first because getattr while holding | ||
| 2166 | * caps refs can cause deadlock. | ||
| 2167 | */ | ||
| 2168 | ceph_put_cap_refs(ci, _got); | ||
| 2169 | _got = 0; | ||
| 2170 | |||
| 2171 | /* getattr request will bring inline data into page cache */ | ||
| 2172 | ret1 = __ceph_do_getattr(inode, NULL, | ||
| 2173 | CEPH_STAT_CAP_INLINE_DATA, true); | ||
| 2174 | if (ret1 >= 0) { | ||
| 2175 | ret = 0; | ||
| 2176 | goto again; | ||
| 2177 | } | ||
| 2178 | *err = ret1; | ||
| 2179 | ret = 1; | ||
| 2180 | } | ||
| 2181 | out: | ||
| 2182 | dout("get_cap_refs %p ret %d got %s\n", inode, | 2165 | dout("get_cap_refs %p ret %d got %s\n", inode, |
| 2183 | ret, ceph_cap_string(_got)); | 2166 | ret, ceph_cap_string(*got)); |
| 2184 | *got = _got; | ||
| 2185 | return ret; | 2167 | return ret; |
| 2186 | } | 2168 | } |
| 2187 | 2169 | ||
| @@ -2221,22 +2203,52 @@ static void check_max_size(struct inode *inode, loff_t endoff) | |||
| 2221 | int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, | 2203 | int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, |
| 2222 | loff_t endoff, int *got, struct page **pinned_page) | 2204 | loff_t endoff, int *got, struct page **pinned_page) |
| 2223 | { | 2205 | { |
| 2224 | int check_max, ret, err; | 2206 | int _got, check_max, ret, err = 0; |
| 2225 | 2207 | ||
| 2226 | retry: | 2208 | retry: |
| 2227 | if (endoff > 0) | 2209 | if (endoff > 0) |
| 2228 | check_max_size(&ci->vfs_inode, endoff); | 2210 | check_max_size(&ci->vfs_inode, endoff); |
| 2211 | _got = 0; | ||
| 2229 | check_max = 0; | 2212 | check_max = 0; |
| 2230 | err = 0; | ||
| 2231 | ret = wait_event_interruptible(ci->i_cap_wq, | 2213 | ret = wait_event_interruptible(ci->i_cap_wq, |
| 2232 | try_get_cap_refs(ci, need, want, endoff, | 2214 | try_get_cap_refs(ci, need, want, endoff, |
| 2233 | got, pinned_page, | 2215 | &_got, &check_max, &err)); |
| 2234 | &check_max, &err)); | ||
| 2235 | if (err) | 2216 | if (err) |
| 2236 | ret = err; | 2217 | ret = err; |
| 2218 | if (ret < 0) | ||
| 2219 | return ret; | ||
| 2220 | |||
| 2237 | if (check_max) | 2221 | if (check_max) |
| 2238 | goto retry; | 2222 | goto retry; |
| 2239 | return ret; | 2223 | |
| 2224 | if (ci->i_inline_version != CEPH_INLINE_NONE && | ||
| 2225 | (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) && | ||
| 2226 | i_size_read(&ci->vfs_inode) > 0) { | ||
| 2227 | struct page *page = find_get_page(ci->vfs_inode.i_mapping, 0); | ||
| 2228 | if (page) { | ||
| 2229 | if (PageUptodate(page)) { | ||
| 2230 | *pinned_page = page; | ||
| 2231 | goto out; | ||
| 2232 | } | ||
| 2233 | page_cache_release(page); | ||
| 2234 | } | ||
| 2235 | /* | ||
| 2236 | * drop cap refs first because getattr while holding | ||
| 2237 | * caps refs can cause deadlock. | ||
| 2238 | */ | ||
| 2239 | ceph_put_cap_refs(ci, _got); | ||
| 2240 | _got = 0; | ||
| 2241 | |||
| 2242 | /* getattr request will bring inline data into page cache */ | ||
| 2243 | ret = __ceph_do_getattr(&ci->vfs_inode, NULL, | ||
| 2244 | CEPH_STAT_CAP_INLINE_DATA, true); | ||
| 2245 | if (ret < 0) | ||
| 2246 | return ret; | ||
| 2247 | goto retry; | ||
| 2248 | } | ||
| 2249 | out: | ||
| 2250 | *got = _got; | ||
| 2251 | return 0; | ||
| 2240 | } | 2252 | } |
| 2241 | 2253 | ||
| 2242 | /* | 2254 | /* |
| @@ -2432,13 +2444,13 @@ static void invalidate_aliases(struct inode *inode) | |||
| 2432 | */ | 2444 | */ |
| 2433 | static void handle_cap_grant(struct ceph_mds_client *mdsc, | 2445 | static void handle_cap_grant(struct ceph_mds_client *mdsc, |
| 2434 | struct inode *inode, struct ceph_mds_caps *grant, | 2446 | struct inode *inode, struct ceph_mds_caps *grant, |
| 2435 | void *snaptrace, int snaptrace_len, | ||
| 2436 | u64 inline_version, | 2447 | u64 inline_version, |
| 2437 | void *inline_data, int inline_len, | 2448 | void *inline_data, int inline_len, |
| 2438 | struct ceph_buffer *xattr_buf, | 2449 | struct ceph_buffer *xattr_buf, |
| 2439 | struct ceph_mds_session *session, | 2450 | struct ceph_mds_session *session, |
| 2440 | struct ceph_cap *cap, int issued) | 2451 | struct ceph_cap *cap, int issued) |
| 2441 | __releases(ci->i_ceph_lock) | 2452 | __releases(ci->i_ceph_lock) |
| 2453 | __releases(mdsc->snap_rwsem) | ||
| 2442 | { | 2454 | { |
| 2443 | struct ceph_inode_info *ci = ceph_inode(inode); | 2455 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 2444 | int mds = session->s_mds; | 2456 | int mds = session->s_mds; |
| @@ -2639,10 +2651,6 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, | |||
| 2639 | spin_unlock(&ci->i_ceph_lock); | 2651 | spin_unlock(&ci->i_ceph_lock); |
| 2640 | 2652 | ||
| 2641 | if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) { | 2653 | if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) { |
| 2642 | down_write(&mdsc->snap_rwsem); | ||
| 2643 | ceph_update_snap_trace(mdsc, snaptrace, | ||
| 2644 | snaptrace + snaptrace_len, false); | ||
| 2645 | downgrade_write(&mdsc->snap_rwsem); | ||
| 2646 | kick_flushing_inode_caps(mdsc, session, inode); | 2654 | kick_flushing_inode_caps(mdsc, session, inode); |
| 2647 | up_read(&mdsc->snap_rwsem); | 2655 | up_read(&mdsc->snap_rwsem); |
| 2648 | if (newcaps & ~issued) | 2656 | if (newcaps & ~issued) |
| @@ -3052,6 +3060,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
| 3052 | struct ceph_cap *cap; | 3060 | struct ceph_cap *cap; |
| 3053 | struct ceph_mds_caps *h; | 3061 | struct ceph_mds_caps *h; |
| 3054 | struct ceph_mds_cap_peer *peer = NULL; | 3062 | struct ceph_mds_cap_peer *peer = NULL; |
| 3063 | struct ceph_snap_realm *realm; | ||
| 3055 | int mds = session->s_mds; | 3064 | int mds = session->s_mds; |
| 3056 | int op, issued; | 3065 | int op, issued; |
| 3057 | u32 seq, mseq; | 3066 | u32 seq, mseq; |
| @@ -3153,11 +3162,23 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
| 3153 | goto done_unlocked; | 3162 | goto done_unlocked; |
| 3154 | 3163 | ||
| 3155 | case CEPH_CAP_OP_IMPORT: | 3164 | case CEPH_CAP_OP_IMPORT: |
| 3165 | realm = NULL; | ||
| 3166 | if (snaptrace_len) { | ||
| 3167 | down_write(&mdsc->snap_rwsem); | ||
| 3168 | ceph_update_snap_trace(mdsc, snaptrace, | ||
| 3169 | snaptrace + snaptrace_len, | ||
| 3170 | false, &realm); | ||
| 3171 | downgrade_write(&mdsc->snap_rwsem); | ||
| 3172 | } else { | ||
| 3173 | down_read(&mdsc->snap_rwsem); | ||
| 3174 | } | ||
| 3156 | handle_cap_import(mdsc, inode, h, peer, session, | 3175 | handle_cap_import(mdsc, inode, h, peer, session, |
| 3157 | &cap, &issued); | 3176 | &cap, &issued); |
| 3158 | handle_cap_grant(mdsc, inode, h, snaptrace, snaptrace_len, | 3177 | handle_cap_grant(mdsc, inode, h, |
| 3159 | inline_version, inline_data, inline_len, | 3178 | inline_version, inline_data, inline_len, |
| 3160 | msg->middle, session, cap, issued); | 3179 | msg->middle, session, cap, issued); |
| 3180 | if (realm) | ||
| 3181 | ceph_put_snap_realm(mdsc, realm); | ||
| 3161 | goto done_unlocked; | 3182 | goto done_unlocked; |
| 3162 | } | 3183 | } |
| 3163 | 3184 | ||
| @@ -3177,7 +3198,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
| 3177 | case CEPH_CAP_OP_GRANT: | 3198 | case CEPH_CAP_OP_GRANT: |
| 3178 | __ceph_caps_issued(ci, &issued); | 3199 | __ceph_caps_issued(ci, &issued); |
| 3179 | issued |= __ceph_caps_dirty(ci); | 3200 | issued |= __ceph_caps_dirty(ci); |
| 3180 | handle_cap_grant(mdsc, inode, h, NULL, 0, | 3201 | handle_cap_grant(mdsc, inode, h, |
| 3181 | inline_version, inline_data, inline_len, | 3202 | inline_version, inline_data, inline_len, |
| 3182 | msg->middle, session, cap, issued); | 3203 | msg->middle, session, cap, issued); |
| 3183 | goto done_unlocked; | 3204 | goto done_unlocked; |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index c241603764fd..0411dbb15815 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
| @@ -26,8 +26,6 @@ | |||
| 26 | * point by name. | 26 | * point by name. |
| 27 | */ | 27 | */ |
| 28 | 28 | ||
| 29 | const struct inode_operations ceph_dir_iops; | ||
| 30 | const struct file_operations ceph_dir_fops; | ||
| 31 | const struct dentry_operations ceph_dentry_ops; | 29 | const struct dentry_operations ceph_dentry_ops; |
| 32 | 30 | ||
| 33 | /* | 31 | /* |
| @@ -672,13 +670,17 @@ int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry) | |||
| 672 | /* | 670 | /* |
| 673 | * We created the item, then did a lookup, and found | 671 | * We created the item, then did a lookup, and found |
| 674 | * it was already linked to another inode we already | 672 | * it was already linked to another inode we already |
| 675 | * had in our cache (and thus got spliced). Link our | 673 | * had in our cache (and thus got spliced). To not |
| 676 | * dentry to that inode, but don't hash it, just in | 674 | * confuse VFS (especially when inode is a directory), |
| 677 | * case the VFS wants to dereference it. | 675 | * we don't link our dentry to that inode, return an |
| 676 | * error instead. | ||
| 677 | * | ||
| 678 | * This event should be rare and it happens only when | ||
| 679 | * we talk to old MDS. Recent MDS does not send traceless | ||
| 680 | * reply for request that creates new inode. | ||
| 678 | */ | 681 | */ |
| 679 | BUG_ON(!result->d_inode); | 682 | d_drop(result); |
| 680 | d_instantiate(dentry, result->d_inode); | 683 | return -ESTALE; |
| 681 | return 0; | ||
| 682 | } | 684 | } |
| 683 | return PTR_ERR(result); | 685 | return PTR_ERR(result); |
| 684 | } | 686 | } |
| @@ -1335,6 +1337,13 @@ const struct file_operations ceph_dir_fops = { | |||
| 1335 | .fsync = ceph_dir_fsync, | 1337 | .fsync = ceph_dir_fsync, |
| 1336 | }; | 1338 | }; |
| 1337 | 1339 | ||
| 1340 | const struct file_operations ceph_snapdir_fops = { | ||
| 1341 | .iterate = ceph_readdir, | ||
| 1342 | .llseek = ceph_dir_llseek, | ||
| 1343 | .open = ceph_open, | ||
| 1344 | .release = ceph_release, | ||
| 1345 | }; | ||
| 1346 | |||
| 1338 | const struct inode_operations ceph_dir_iops = { | 1347 | const struct inode_operations ceph_dir_iops = { |
| 1339 | .lookup = ceph_lookup, | 1348 | .lookup = ceph_lookup, |
| 1340 | .permission = ceph_permission, | 1349 | .permission = ceph_permission, |
| @@ -1357,6 +1366,14 @@ const struct inode_operations ceph_dir_iops = { | |||
| 1357 | .atomic_open = ceph_atomic_open, | 1366 | .atomic_open = ceph_atomic_open, |
| 1358 | }; | 1367 | }; |
| 1359 | 1368 | ||
| 1369 | const struct inode_operations ceph_snapdir_iops = { | ||
| 1370 | .lookup = ceph_lookup, | ||
| 1371 | .permission = ceph_permission, | ||
| 1372 | .getattr = ceph_getattr, | ||
| 1373 | .mkdir = ceph_mkdir, | ||
| 1374 | .rmdir = ceph_unlink, | ||
| 1375 | }; | ||
| 1376 | |||
| 1360 | const struct dentry_operations ceph_dentry_ops = { | 1377 | const struct dentry_operations ceph_dentry_ops = { |
| 1361 | .d_revalidate = ceph_d_revalidate, | 1378 | .d_revalidate = ceph_d_revalidate, |
| 1362 | .d_release = ceph_d_release, | 1379 | .d_release = ceph_d_release, |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 905986dd4c3c..a3d774b35149 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
| @@ -275,10 +275,10 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, | |||
| 275 | err = ceph_mdsc_do_request(mdsc, | 275 | err = ceph_mdsc_do_request(mdsc, |
| 276 | (flags & (O_CREAT|O_TRUNC)) ? dir : NULL, | 276 | (flags & (O_CREAT|O_TRUNC)) ? dir : NULL, |
| 277 | req); | 277 | req); |
| 278 | err = ceph_handle_snapdir(req, dentry, err); | ||
| 278 | if (err) | 279 | if (err) |
| 279 | goto out_req; | 280 | goto out_req; |
| 280 | 281 | ||
| 281 | err = ceph_handle_snapdir(req, dentry, err); | ||
| 282 | if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry) | 282 | if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry) |
| 283 | err = ceph_handle_notrace_create(dir, dentry); | 283 | err = ceph_handle_notrace_create(dir, dentry); |
| 284 | 284 | ||
| @@ -392,13 +392,14 @@ more: | |||
| 392 | if (ret >= 0) { | 392 | if (ret >= 0) { |
| 393 | int didpages; | 393 | int didpages; |
| 394 | if (was_short && (pos + ret < inode->i_size)) { | 394 | if (was_short && (pos + ret < inode->i_size)) { |
| 395 | u64 tmp = min(this_len - ret, | 395 | int zlen = min(this_len - ret, |
| 396 | inode->i_size - pos - ret); | 396 | inode->i_size - pos - ret); |
| 397 | int zoff = (o_direct ? buf_align : io_align) + | ||
| 398 | read + ret; | ||
| 397 | dout(" zero gap %llu to %llu\n", | 399 | dout(" zero gap %llu to %llu\n", |
| 398 | pos + ret, pos + ret + tmp); | 400 | pos + ret, pos + ret + zlen); |
| 399 | ceph_zero_page_vector_range(page_align + read + ret, | 401 | ceph_zero_page_vector_range(zoff, zlen, pages); |
| 400 | tmp, pages); | 402 | ret += zlen; |
| 401 | ret += tmp; | ||
| 402 | } | 403 | } |
| 403 | 404 | ||
| 404 | didpages = (page_align + ret) >> PAGE_CACHE_SHIFT; | 405 | didpages = (page_align + ret) >> PAGE_CACHE_SHIFT; |
| @@ -878,28 +879,34 @@ again: | |||
| 878 | 879 | ||
| 879 | i_size = i_size_read(inode); | 880 | i_size = i_size_read(inode); |
| 880 | if (retry_op == READ_INLINE) { | 881 | if (retry_op == READ_INLINE) { |
| 881 | /* does not support inline data > PAGE_SIZE */ | 882 | BUG_ON(ret > 0 || read > 0); |
| 882 | if (i_size > PAGE_CACHE_SIZE) { | 883 | if (iocb->ki_pos < i_size && |
| 883 | ret = -EIO; | 884 | iocb->ki_pos < PAGE_CACHE_SIZE) { |
| 884 | } else if (iocb->ki_pos < i_size) { | ||
| 885 | loff_t end = min_t(loff_t, i_size, | 885 | loff_t end = min_t(loff_t, i_size, |
| 886 | iocb->ki_pos + len); | 886 | iocb->ki_pos + len); |
| 887 | end = min_t(loff_t, end, PAGE_CACHE_SIZE); | ||
| 887 | if (statret < end) | 888 | if (statret < end) |
| 888 | zero_user_segment(page, statret, end); | 889 | zero_user_segment(page, statret, end); |
| 889 | ret = copy_page_to_iter(page, | 890 | ret = copy_page_to_iter(page, |
| 890 | iocb->ki_pos & ~PAGE_MASK, | 891 | iocb->ki_pos & ~PAGE_MASK, |
| 891 | end - iocb->ki_pos, to); | 892 | end - iocb->ki_pos, to); |
| 892 | iocb->ki_pos += ret; | 893 | iocb->ki_pos += ret; |
| 893 | } else { | 894 | read += ret; |
| 894 | ret = 0; | 895 | } |
| 896 | if (iocb->ki_pos < i_size && read < len) { | ||
| 897 | size_t zlen = min_t(size_t, len - read, | ||
| 898 | i_size - iocb->ki_pos); | ||
| 899 | ret = iov_iter_zero(zlen, to); | ||
| 900 | iocb->ki_pos += ret; | ||
| 901 | read += ret; | ||
| 895 | } | 902 | } |
| 896 | __free_pages(page, 0); | 903 | __free_pages(page, 0); |
| 897 | return ret; | 904 | return read; |
| 898 | } | 905 | } |
| 899 | 906 | ||
| 900 | /* hit EOF or hole? */ | 907 | /* hit EOF or hole? */ |
| 901 | if (retry_op == CHECK_EOF && iocb->ki_pos < i_size && | 908 | if (retry_op == CHECK_EOF && iocb->ki_pos < i_size && |
| 902 | ret < len) { | 909 | ret < len) { |
| 903 | dout("sync_read hit hole, ppos %lld < size %lld" | 910 | dout("sync_read hit hole, ppos %lld < size %lld" |
| 904 | ", reading more\n", iocb->ki_pos, | 911 | ", reading more\n", iocb->ki_pos, |
| 905 | inode->i_size); | 912 | inode->i_size); |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 6b5173605154..119c43c80638 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
| @@ -82,8 +82,8 @@ struct inode *ceph_get_snapdir(struct inode *parent) | |||
| 82 | inode->i_mode = parent->i_mode; | 82 | inode->i_mode = parent->i_mode; |
| 83 | inode->i_uid = parent->i_uid; | 83 | inode->i_uid = parent->i_uid; |
| 84 | inode->i_gid = parent->i_gid; | 84 | inode->i_gid = parent->i_gid; |
| 85 | inode->i_op = &ceph_dir_iops; | 85 | inode->i_op = &ceph_snapdir_iops; |
| 86 | inode->i_fop = &ceph_dir_fops; | 86 | inode->i_fop = &ceph_snapdir_fops; |
| 87 | ci->i_snap_caps = CEPH_CAP_PIN; /* so we can open */ | 87 | ci->i_snap_caps = CEPH_CAP_PIN; /* so we can open */ |
| 88 | ci->i_rbytes = 0; | 88 | ci->i_rbytes = 0; |
| 89 | return inode; | 89 | return inode; |
| @@ -838,30 +838,31 @@ static int fill_inode(struct inode *inode, struct page *locked_page, | |||
| 838 | ceph_vinop(inode), inode->i_mode); | 838 | ceph_vinop(inode), inode->i_mode); |
| 839 | } | 839 | } |
| 840 | 840 | ||
| 841 | /* set dir completion flag? */ | ||
| 842 | if (S_ISDIR(inode->i_mode) && | ||
| 843 | ci->i_files == 0 && ci->i_subdirs == 0 && | ||
| 844 | ceph_snap(inode) == CEPH_NOSNAP && | ||
| 845 | (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) && | ||
| 846 | (issued & CEPH_CAP_FILE_EXCL) == 0 && | ||
| 847 | !__ceph_dir_is_complete(ci)) { | ||
| 848 | dout(" marking %p complete (empty)\n", inode); | ||
| 849 | __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count), | ||
| 850 | ci->i_ordered_count); | ||
| 851 | } | ||
| 852 | |||
| 853 | /* were we issued a capability? */ | 841 | /* were we issued a capability? */ |
| 854 | if (info->cap.caps) { | 842 | if (info->cap.caps) { |
| 855 | if (ceph_snap(inode) == CEPH_NOSNAP) { | 843 | if (ceph_snap(inode) == CEPH_NOSNAP) { |
| 844 | unsigned caps = le32_to_cpu(info->cap.caps); | ||
| 856 | ceph_add_cap(inode, session, | 845 | ceph_add_cap(inode, session, |
| 857 | le64_to_cpu(info->cap.cap_id), | 846 | le64_to_cpu(info->cap.cap_id), |
| 858 | cap_fmode, | 847 | cap_fmode, caps, |
| 859 | le32_to_cpu(info->cap.caps), | ||
| 860 | le32_to_cpu(info->cap.wanted), | 848 | le32_to_cpu(info->cap.wanted), |
| 861 | le32_to_cpu(info->cap.seq), | 849 | le32_to_cpu(info->cap.seq), |
| 862 | le32_to_cpu(info->cap.mseq), | 850 | le32_to_cpu(info->cap.mseq), |
| 863 | le64_to_cpu(info->cap.realm), | 851 | le64_to_cpu(info->cap.realm), |
| 864 | info->cap.flags, &new_cap); | 852 | info->cap.flags, &new_cap); |
| 853 | |||
| 854 | /* set dir completion flag? */ | ||
| 855 | if (S_ISDIR(inode->i_mode) && | ||
| 856 | ci->i_files == 0 && ci->i_subdirs == 0 && | ||
| 857 | (caps & CEPH_CAP_FILE_SHARED) && | ||
| 858 | (issued & CEPH_CAP_FILE_EXCL) == 0 && | ||
| 859 | !__ceph_dir_is_complete(ci)) { | ||
| 860 | dout(" marking %p complete (empty)\n", inode); | ||
| 861 | __ceph_dir_set_complete(ci, | ||
| 862 | atomic_read(&ci->i_release_count), | ||
| 863 | ci->i_ordered_count); | ||
| 864 | } | ||
| 865 | |||
| 865 | wake = true; | 866 | wake = true; |
| 866 | } else { | 867 | } else { |
| 867 | dout(" %p got snap_caps %s\n", inode, | 868 | dout(" %p got snap_caps %s\n", inode, |
| @@ -1446,12 +1447,14 @@ retry_lookup: | |||
| 1446 | } | 1447 | } |
| 1447 | 1448 | ||
| 1448 | if (!dn->d_inode) { | 1449 | if (!dn->d_inode) { |
| 1449 | dn = splice_dentry(dn, in, NULL); | 1450 | struct dentry *realdn = splice_dentry(dn, in, NULL); |
| 1450 | if (IS_ERR(dn)) { | 1451 | if (IS_ERR(realdn)) { |
| 1451 | err = PTR_ERR(dn); | 1452 | err = PTR_ERR(realdn); |
| 1453 | d_drop(dn); | ||
| 1452 | dn = NULL; | 1454 | dn = NULL; |
| 1453 | goto next_item; | 1455 | goto next_item; |
| 1454 | } | 1456 | } |
| 1457 | dn = realdn; | ||
| 1455 | } | 1458 | } |
| 1456 | 1459 | ||
| 1457 | di = dn->d_fsdata; | 1460 | di = dn->d_fsdata; |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 5f62fb7a5d0a..71c073f38e54 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
| @@ -480,6 +480,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, | |||
| 480 | mdsc->max_sessions = newmax; | 480 | mdsc->max_sessions = newmax; |
| 481 | } | 481 | } |
| 482 | mdsc->sessions[mds] = s; | 482 | mdsc->sessions[mds] = s; |
| 483 | atomic_inc(&mdsc->num_sessions); | ||
| 483 | atomic_inc(&s->s_ref); /* one ref to sessions[], one to caller */ | 484 | atomic_inc(&s->s_ref); /* one ref to sessions[], one to caller */ |
| 484 | 485 | ||
| 485 | ceph_con_open(&s->s_con, CEPH_ENTITY_TYPE_MDS, mds, | 486 | ceph_con_open(&s->s_con, CEPH_ENTITY_TYPE_MDS, mds, |
| @@ -503,6 +504,7 @@ static void __unregister_session(struct ceph_mds_client *mdsc, | |||
| 503 | mdsc->sessions[s->s_mds] = NULL; | 504 | mdsc->sessions[s->s_mds] = NULL; |
| 504 | ceph_con_close(&s->s_con); | 505 | ceph_con_close(&s->s_con); |
| 505 | ceph_put_mds_session(s); | 506 | ceph_put_mds_session(s); |
| 507 | atomic_dec(&mdsc->num_sessions); | ||
| 506 | } | 508 | } |
| 507 | 509 | ||
| 508 | /* | 510 | /* |
| @@ -842,8 +844,9 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 | |||
| 842 | struct ceph_options *opt = mdsc->fsc->client->options; | 844 | struct ceph_options *opt = mdsc->fsc->client->options; |
| 843 | void *p; | 845 | void *p; |
| 844 | 846 | ||
| 845 | const char* metadata[3][2] = { | 847 | const char* metadata[][2] = { |
| 846 | {"hostname", utsname()->nodename}, | 848 | {"hostname", utsname()->nodename}, |
| 849 | {"kernel_version", utsname()->release}, | ||
| 847 | {"entity_id", opt->name ? opt->name : ""}, | 850 | {"entity_id", opt->name ? opt->name : ""}, |
| 848 | {NULL, NULL} | 851 | {NULL, NULL} |
| 849 | }; | 852 | }; |
| @@ -1464,19 +1467,33 @@ out_unlocked: | |||
| 1464 | return err; | 1467 | return err; |
| 1465 | } | 1468 | } |
| 1466 | 1469 | ||
| 1470 | static int check_cap_flush(struct inode *inode, u64 want_flush_seq) | ||
| 1471 | { | ||
| 1472 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
| 1473 | int ret; | ||
| 1474 | spin_lock(&ci->i_ceph_lock); | ||
| 1475 | if (ci->i_flushing_caps) | ||
| 1476 | ret = ci->i_cap_flush_seq >= want_flush_seq; | ||
| 1477 | else | ||
| 1478 | ret = 1; | ||
| 1479 | spin_unlock(&ci->i_ceph_lock); | ||
| 1480 | return ret; | ||
| 1481 | } | ||
| 1482 | |||
| 1467 | /* | 1483 | /* |
| 1468 | * flush all dirty inode data to disk. | 1484 | * flush all dirty inode data to disk. |
| 1469 | * | 1485 | * |
| 1470 | * returns true if we've flushed through want_flush_seq | 1486 | * returns true if we've flushed through want_flush_seq |
| 1471 | */ | 1487 | */ |
| 1472 | static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq) | 1488 | static void wait_caps_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq) |
| 1473 | { | 1489 | { |
| 1474 | int mds, ret = 1; | 1490 | int mds; |
| 1475 | 1491 | ||
| 1476 | dout("check_cap_flush want %lld\n", want_flush_seq); | 1492 | dout("check_cap_flush want %lld\n", want_flush_seq); |
| 1477 | mutex_lock(&mdsc->mutex); | 1493 | mutex_lock(&mdsc->mutex); |
| 1478 | for (mds = 0; ret && mds < mdsc->max_sessions; mds++) { | 1494 | for (mds = 0; mds < mdsc->max_sessions; mds++) { |
| 1479 | struct ceph_mds_session *session = mdsc->sessions[mds]; | 1495 | struct ceph_mds_session *session = mdsc->sessions[mds]; |
| 1496 | struct inode *inode = NULL; | ||
| 1480 | 1497 | ||
| 1481 | if (!session) | 1498 | if (!session) |
| 1482 | continue; | 1499 | continue; |
| @@ -1489,29 +1506,29 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq) | |||
| 1489 | list_entry(session->s_cap_flushing.next, | 1506 | list_entry(session->s_cap_flushing.next, |
| 1490 | struct ceph_inode_info, | 1507 | struct ceph_inode_info, |
| 1491 | i_flushing_item); | 1508 | i_flushing_item); |
| 1492 | struct inode *inode = &ci->vfs_inode; | ||
| 1493 | 1509 | ||
| 1494 | spin_lock(&ci->i_ceph_lock); | 1510 | if (!check_cap_flush(&ci->vfs_inode, want_flush_seq)) { |
| 1495 | if (ci->i_cap_flush_seq <= want_flush_seq) { | ||
| 1496 | dout("check_cap_flush still flushing %p " | 1511 | dout("check_cap_flush still flushing %p " |
| 1497 | "seq %lld <= %lld to mds%d\n", inode, | 1512 | "seq %lld <= %lld to mds%d\n", |
| 1498 | ci->i_cap_flush_seq, want_flush_seq, | 1513 | &ci->vfs_inode, ci->i_cap_flush_seq, |
| 1499 | session->s_mds); | 1514 | want_flush_seq, session->s_mds); |
| 1500 | ret = 0; | 1515 | inode = igrab(&ci->vfs_inode); |
| 1501 | } | 1516 | } |
| 1502 | spin_unlock(&ci->i_ceph_lock); | ||
| 1503 | } | 1517 | } |
| 1504 | mutex_unlock(&session->s_mutex); | 1518 | mutex_unlock(&session->s_mutex); |
| 1505 | ceph_put_mds_session(session); | 1519 | ceph_put_mds_session(session); |
| 1506 | 1520 | ||
| 1507 | if (!ret) | 1521 | if (inode) { |
| 1508 | return ret; | 1522 | wait_event(mdsc->cap_flushing_wq, |
| 1523 | check_cap_flush(inode, want_flush_seq)); | ||
| 1524 | iput(inode); | ||
| 1525 | } | ||
| 1526 | |||
| 1509 | mutex_lock(&mdsc->mutex); | 1527 | mutex_lock(&mdsc->mutex); |
| 1510 | } | 1528 | } |
| 1511 | 1529 | ||
| 1512 | mutex_unlock(&mdsc->mutex); | 1530 | mutex_unlock(&mdsc->mutex); |
| 1513 | dout("check_cap_flush ok, flushed thru %lld\n", want_flush_seq); | 1531 | dout("check_cap_flush ok, flushed thru %lld\n", want_flush_seq); |
| 1514 | return ret; | ||
| 1515 | } | 1532 | } |
| 1516 | 1533 | ||
| 1517 | /* | 1534 | /* |
| @@ -1923,7 +1940,11 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, | |||
| 1923 | head->num_releases = cpu_to_le16(releases); | 1940 | head->num_releases = cpu_to_le16(releases); |
| 1924 | 1941 | ||
| 1925 | /* time stamp */ | 1942 | /* time stamp */ |
| 1926 | ceph_encode_copy(&p, &req->r_stamp, sizeof(req->r_stamp)); | 1943 | { |
| 1944 | struct ceph_timespec ts; | ||
| 1945 | ceph_encode_timespec(&ts, &req->r_stamp); | ||
| 1946 | ceph_encode_copy(&p, &ts, sizeof(ts)); | ||
| 1947 | } | ||
| 1927 | 1948 | ||
| 1928 | BUG_ON(p > end); | 1949 | BUG_ON(p > end); |
| 1929 | msg->front.iov_len = p - msg->front.iov_base; | 1950 | msg->front.iov_len = p - msg->front.iov_base; |
| @@ -2012,7 +2033,11 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, | |||
| 2012 | 2033 | ||
| 2013 | /* time stamp */ | 2034 | /* time stamp */ |
| 2014 | p = msg->front.iov_base + req->r_request_release_offset; | 2035 | p = msg->front.iov_base + req->r_request_release_offset; |
| 2015 | ceph_encode_copy(&p, &req->r_stamp, sizeof(req->r_stamp)); | 2036 | { |
| 2037 | struct ceph_timespec ts; | ||
| 2038 | ceph_encode_timespec(&ts, &req->r_stamp); | ||
| 2039 | ceph_encode_copy(&p, &ts, sizeof(ts)); | ||
| 2040 | } | ||
| 2016 | 2041 | ||
| 2017 | msg->front.iov_len = p - msg->front.iov_base; | 2042 | msg->front.iov_len = p - msg->front.iov_base; |
| 2018 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); | 2043 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); |
| @@ -2159,6 +2184,8 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds) | |||
| 2159 | p = rb_next(p); | 2184 | p = rb_next(p); |
| 2160 | if (req->r_got_unsafe) | 2185 | if (req->r_got_unsafe) |
| 2161 | continue; | 2186 | continue; |
| 2187 | if (req->r_attempts > 0) | ||
| 2188 | continue; /* only new requests */ | ||
| 2162 | if (req->r_session && | 2189 | if (req->r_session && |
| 2163 | req->r_session->s_mds == mds) { | 2190 | req->r_session->s_mds == mds) { |
| 2164 | dout(" kicking tid %llu\n", req->r_tid); | 2191 | dout(" kicking tid %llu\n", req->r_tid); |
| @@ -2286,6 +2313,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
| 2286 | struct ceph_mds_request *req; | 2313 | struct ceph_mds_request *req; |
| 2287 | struct ceph_mds_reply_head *head = msg->front.iov_base; | 2314 | struct ceph_mds_reply_head *head = msg->front.iov_base; |
| 2288 | struct ceph_mds_reply_info_parsed *rinfo; /* parsed reply info */ | 2315 | struct ceph_mds_reply_info_parsed *rinfo; /* parsed reply info */ |
| 2316 | struct ceph_snap_realm *realm; | ||
| 2289 | u64 tid; | 2317 | u64 tid; |
| 2290 | int err, result; | 2318 | int err, result; |
| 2291 | int mds = session->s_mds; | 2319 | int mds = session->s_mds; |
| @@ -2401,11 +2429,13 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
| 2401 | } | 2429 | } |
| 2402 | 2430 | ||
| 2403 | /* snap trace */ | 2431 | /* snap trace */ |
| 2432 | realm = NULL; | ||
| 2404 | if (rinfo->snapblob_len) { | 2433 | if (rinfo->snapblob_len) { |
| 2405 | down_write(&mdsc->snap_rwsem); | 2434 | down_write(&mdsc->snap_rwsem); |
| 2406 | ceph_update_snap_trace(mdsc, rinfo->snapblob, | 2435 | ceph_update_snap_trace(mdsc, rinfo->snapblob, |
| 2407 | rinfo->snapblob + rinfo->snapblob_len, | 2436 | rinfo->snapblob + rinfo->snapblob_len, |
| 2408 | le32_to_cpu(head->op) == CEPH_MDS_OP_RMSNAP); | 2437 | le32_to_cpu(head->op) == CEPH_MDS_OP_RMSNAP, |
| 2438 | &realm); | ||
| 2409 | downgrade_write(&mdsc->snap_rwsem); | 2439 | downgrade_write(&mdsc->snap_rwsem); |
| 2410 | } else { | 2440 | } else { |
| 2411 | down_read(&mdsc->snap_rwsem); | 2441 | down_read(&mdsc->snap_rwsem); |
| @@ -2423,6 +2453,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
| 2423 | mutex_unlock(&req->r_fill_mutex); | 2453 | mutex_unlock(&req->r_fill_mutex); |
| 2424 | 2454 | ||
| 2425 | up_read(&mdsc->snap_rwsem); | 2455 | up_read(&mdsc->snap_rwsem); |
| 2456 | if (realm) | ||
| 2457 | ceph_put_snap_realm(mdsc, realm); | ||
| 2426 | out_err: | 2458 | out_err: |
| 2427 | mutex_lock(&mdsc->mutex); | 2459 | mutex_lock(&mdsc->mutex); |
| 2428 | if (!req->r_aborted) { | 2460 | if (!req->r_aborted) { |
| @@ -2487,6 +2519,7 @@ static void handle_forward(struct ceph_mds_client *mdsc, | |||
| 2487 | dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds); | 2519 | dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds); |
| 2488 | BUG_ON(req->r_err); | 2520 | BUG_ON(req->r_err); |
| 2489 | BUG_ON(req->r_got_result); | 2521 | BUG_ON(req->r_got_result); |
| 2522 | req->r_attempts = 0; | ||
| 2490 | req->r_num_fwd = fwd_seq; | 2523 | req->r_num_fwd = fwd_seq; |
| 2491 | req->r_resend_mds = next_mds; | 2524 | req->r_resend_mds = next_mds; |
| 2492 | put_request_session(req); | 2525 | put_request_session(req); |
| @@ -2580,6 +2613,14 @@ static void handle_session(struct ceph_mds_session *session, | |||
| 2580 | send_flushmsg_ack(mdsc, session, seq); | 2613 | send_flushmsg_ack(mdsc, session, seq); |
| 2581 | break; | 2614 | break; |
| 2582 | 2615 | ||
| 2616 | case CEPH_SESSION_FORCE_RO: | ||
| 2617 | dout("force_session_readonly %p\n", session); | ||
| 2618 | spin_lock(&session->s_cap_lock); | ||
| 2619 | session->s_readonly = true; | ||
| 2620 | spin_unlock(&session->s_cap_lock); | ||
| 2621 | wake_up_session_caps(session, 0); | ||
| 2622 | break; | ||
| 2623 | |||
| 2583 | default: | 2624 | default: |
| 2584 | pr_err("mdsc_handle_session bad op %d mds%d\n", op, mds); | 2625 | pr_err("mdsc_handle_session bad op %d mds%d\n", op, mds); |
| 2585 | WARN_ON(1); | 2626 | WARN_ON(1); |
| @@ -2610,6 +2651,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc, | |||
| 2610 | struct ceph_mds_session *session) | 2651 | struct ceph_mds_session *session) |
| 2611 | { | 2652 | { |
| 2612 | struct ceph_mds_request *req, *nreq; | 2653 | struct ceph_mds_request *req, *nreq; |
| 2654 | struct rb_node *p; | ||
| 2613 | int err; | 2655 | int err; |
| 2614 | 2656 | ||
| 2615 | dout("replay_unsafe_requests mds%d\n", session->s_mds); | 2657 | dout("replay_unsafe_requests mds%d\n", session->s_mds); |
| @@ -2622,6 +2664,28 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc, | |||
| 2622 | ceph_con_send(&session->s_con, req->r_request); | 2664 | ceph_con_send(&session->s_con, req->r_request); |
| 2623 | } | 2665 | } |
| 2624 | } | 2666 | } |
| 2667 | |||
| 2668 | /* | ||
| 2669 | * also re-send old requests when MDS enters reconnect stage. So that MDS | ||
| 2670 | * can process completed request in clientreplay stage. | ||
| 2671 | */ | ||
| 2672 | p = rb_first(&mdsc->request_tree); | ||
| 2673 | while (p) { | ||
| 2674 | req = rb_entry(p, struct ceph_mds_request, r_node); | ||
| 2675 | p = rb_next(p); | ||
| 2676 | if (req->r_got_unsafe) | ||
| 2677 | continue; | ||
| 2678 | if (req->r_attempts == 0) | ||
| 2679 | continue; /* only old requests */ | ||
| 2680 | if (req->r_session && | ||
| 2681 | req->r_session->s_mds == session->s_mds) { | ||
| 2682 | err = __prepare_send_request(mdsc, req, session->s_mds); | ||
| 2683 | if (!err) { | ||
| 2684 | ceph_msg_get(req->r_request); | ||
| 2685 | ceph_con_send(&session->s_con, req->r_request); | ||
| 2686 | } | ||
| 2687 | } | ||
| 2688 | } | ||
| 2625 | mutex_unlock(&mdsc->mutex); | 2689 | mutex_unlock(&mdsc->mutex); |
| 2626 | } | 2690 | } |
| 2627 | 2691 | ||
| @@ -2787,6 +2851,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
| 2787 | spin_unlock(&session->s_gen_ttl_lock); | 2851 | spin_unlock(&session->s_gen_ttl_lock); |
| 2788 | 2852 | ||
| 2789 | spin_lock(&session->s_cap_lock); | 2853 | spin_lock(&session->s_cap_lock); |
| 2854 | /* don't know if session is readonly */ | ||
| 2855 | session->s_readonly = 0; | ||
| 2790 | /* | 2856 | /* |
| 2791 | * notify __ceph_remove_cap() that we are composing cap reconnect. | 2857 | * notify __ceph_remove_cap() that we are composing cap reconnect. |
| 2792 | * If a cap get released before being added to the cap reconnect, | 2858 | * If a cap get released before being added to the cap reconnect, |
| @@ -2933,9 +2999,6 @@ static void check_new_map(struct ceph_mds_client *mdsc, | |||
| 2933 | mutex_unlock(&s->s_mutex); | 2999 | mutex_unlock(&s->s_mutex); |
| 2934 | s->s_state = CEPH_MDS_SESSION_RESTARTING; | 3000 | s->s_state = CEPH_MDS_SESSION_RESTARTING; |
| 2935 | } | 3001 | } |
| 2936 | |||
| 2937 | /* kick any requests waiting on the recovering mds */ | ||
| 2938 | kick_requests(mdsc, i); | ||
| 2939 | } else if (oldstate == newstate) { | 3002 | } else if (oldstate == newstate) { |
| 2940 | continue; /* nothing new with this mds */ | 3003 | continue; /* nothing new with this mds */ |
| 2941 | } | 3004 | } |
| @@ -3295,6 +3358,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) | |||
| 3295 | init_waitqueue_head(&mdsc->session_close_wq); | 3358 | init_waitqueue_head(&mdsc->session_close_wq); |
| 3296 | INIT_LIST_HEAD(&mdsc->waiting_for_map); | 3359 | INIT_LIST_HEAD(&mdsc->waiting_for_map); |
| 3297 | mdsc->sessions = NULL; | 3360 | mdsc->sessions = NULL; |
| 3361 | atomic_set(&mdsc->num_sessions, 0); | ||
| 3298 | mdsc->max_sessions = 0; | 3362 | mdsc->max_sessions = 0; |
| 3299 | mdsc->stopping = 0; | 3363 | mdsc->stopping = 0; |
| 3300 | init_rwsem(&mdsc->snap_rwsem); | 3364 | init_rwsem(&mdsc->snap_rwsem); |
| @@ -3428,14 +3492,17 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) | |||
| 3428 | dout("sync\n"); | 3492 | dout("sync\n"); |
| 3429 | mutex_lock(&mdsc->mutex); | 3493 | mutex_lock(&mdsc->mutex); |
| 3430 | want_tid = mdsc->last_tid; | 3494 | want_tid = mdsc->last_tid; |
| 3431 | want_flush = mdsc->cap_flush_seq; | ||
| 3432 | mutex_unlock(&mdsc->mutex); | 3495 | mutex_unlock(&mdsc->mutex); |
| 3433 | dout("sync want tid %lld flush_seq %lld\n", want_tid, want_flush); | ||
| 3434 | 3496 | ||
| 3435 | ceph_flush_dirty_caps(mdsc); | 3497 | ceph_flush_dirty_caps(mdsc); |
| 3498 | spin_lock(&mdsc->cap_dirty_lock); | ||
| 3499 | want_flush = mdsc->cap_flush_seq; | ||
| 3500 | spin_unlock(&mdsc->cap_dirty_lock); | ||
| 3501 | |||
| 3502 | dout("sync want tid %lld flush_seq %lld\n", want_tid, want_flush); | ||
| 3436 | 3503 | ||
| 3437 | wait_unsafe_requests(mdsc, want_tid); | 3504 | wait_unsafe_requests(mdsc, want_tid); |
| 3438 | wait_event(mdsc->cap_flushing_wq, check_cap_flush(mdsc, want_flush)); | 3505 | wait_caps_flush(mdsc, want_flush); |
| 3439 | } | 3506 | } |
| 3440 | 3507 | ||
| 3441 | /* | 3508 | /* |
| @@ -3443,17 +3510,9 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) | |||
| 3443 | */ | 3510 | */ |
| 3444 | static bool done_closing_sessions(struct ceph_mds_client *mdsc) | 3511 | static bool done_closing_sessions(struct ceph_mds_client *mdsc) |
| 3445 | { | 3512 | { |
| 3446 | int i, n = 0; | ||
| 3447 | |||
| 3448 | if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) | 3513 | if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) |
| 3449 | return true; | 3514 | return true; |
| 3450 | 3515 | return atomic_read(&mdsc->num_sessions) == 0; | |
| 3451 | mutex_lock(&mdsc->mutex); | ||
| 3452 | for (i = 0; i < mdsc->max_sessions; i++) | ||
| 3453 | if (mdsc->sessions[i]) | ||
| 3454 | n++; | ||
| 3455 | mutex_unlock(&mdsc->mutex); | ||
| 3456 | return n == 0; | ||
| 3457 | } | 3516 | } |
| 3458 | 3517 | ||
| 3459 | /* | 3518 | /* |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index e2817d00f7d9..1875b5d985c6 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
| @@ -137,6 +137,7 @@ struct ceph_mds_session { | |||
| 137 | int s_nr_caps, s_trim_caps; | 137 | int s_nr_caps, s_trim_caps; |
| 138 | int s_num_cap_releases; | 138 | int s_num_cap_releases; |
| 139 | int s_cap_reconnect; | 139 | int s_cap_reconnect; |
| 140 | int s_readonly; | ||
| 140 | struct list_head s_cap_releases; /* waiting cap_release messages */ | 141 | struct list_head s_cap_releases; /* waiting cap_release messages */ |
| 141 | struct list_head s_cap_releases_done; /* ready to send */ | 142 | struct list_head s_cap_releases_done; /* ready to send */ |
| 142 | struct ceph_cap *s_cap_iterator; | 143 | struct ceph_cap *s_cap_iterator; |
| @@ -272,6 +273,7 @@ struct ceph_mds_client { | |||
| 272 | struct list_head waiting_for_map; | 273 | struct list_head waiting_for_map; |
| 273 | 274 | ||
| 274 | struct ceph_mds_session **sessions; /* NULL for mds if no session */ | 275 | struct ceph_mds_session **sessions; /* NULL for mds if no session */ |
| 276 | atomic_t num_sessions; | ||
| 275 | int max_sessions; /* len of s_mds_sessions */ | 277 | int max_sessions; /* len of s_mds_sessions */ |
| 276 | int stopping; /* true if shutting down */ | 278 | int stopping; /* true if shutting down */ |
| 277 | 279 | ||
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index ce35fbd4ba5d..a97e39f09ba6 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
| @@ -70,13 +70,11 @@ void ceph_get_snap_realm(struct ceph_mds_client *mdsc, | |||
| 70 | * safe. we do need to protect against concurrent empty list | 70 | * safe. we do need to protect against concurrent empty list |
| 71 | * additions, however. | 71 | * additions, however. |
| 72 | */ | 72 | */ |
| 73 | if (atomic_read(&realm->nref) == 0) { | 73 | if (atomic_inc_return(&realm->nref) == 1) { |
| 74 | spin_lock(&mdsc->snap_empty_lock); | 74 | spin_lock(&mdsc->snap_empty_lock); |
| 75 | list_del_init(&realm->empty_item); | 75 | list_del_init(&realm->empty_item); |
| 76 | spin_unlock(&mdsc->snap_empty_lock); | 76 | spin_unlock(&mdsc->snap_empty_lock); |
| 77 | } | 77 | } |
| 78 | |||
| 79 | atomic_inc(&realm->nref); | ||
| 80 | } | 78 | } |
| 81 | 79 | ||
| 82 | static void __insert_snap_realm(struct rb_root *root, | 80 | static void __insert_snap_realm(struct rb_root *root, |
| @@ -116,7 +114,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm( | |||
| 116 | if (!realm) | 114 | if (!realm) |
| 117 | return ERR_PTR(-ENOMEM); | 115 | return ERR_PTR(-ENOMEM); |
| 118 | 116 | ||
| 119 | atomic_set(&realm->nref, 0); /* tree does not take a ref */ | 117 | atomic_set(&realm->nref, 1); /* for caller */ |
| 120 | realm->ino = ino; | 118 | realm->ino = ino; |
| 121 | INIT_LIST_HEAD(&realm->children); | 119 | INIT_LIST_HEAD(&realm->children); |
| 122 | INIT_LIST_HEAD(&realm->child_item); | 120 | INIT_LIST_HEAD(&realm->child_item); |
| @@ -134,8 +132,8 @@ static struct ceph_snap_realm *ceph_create_snap_realm( | |||
| 134 | * | 132 | * |
| 135 | * caller must hold snap_rwsem for write. | 133 | * caller must hold snap_rwsem for write. |
| 136 | */ | 134 | */ |
| 137 | struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc, | 135 | static struct ceph_snap_realm *__lookup_snap_realm(struct ceph_mds_client *mdsc, |
| 138 | u64 ino) | 136 | u64 ino) |
| 139 | { | 137 | { |
| 140 | struct rb_node *n = mdsc->snap_realms.rb_node; | 138 | struct rb_node *n = mdsc->snap_realms.rb_node; |
| 141 | struct ceph_snap_realm *r; | 139 | struct ceph_snap_realm *r; |
| @@ -154,6 +152,16 @@ struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc, | |||
| 154 | return NULL; | 152 | return NULL; |
| 155 | } | 153 | } |
| 156 | 154 | ||
| 155 | struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc, | ||
| 156 | u64 ino) | ||
| 157 | { | ||
| 158 | struct ceph_snap_realm *r; | ||
| 159 | r = __lookup_snap_realm(mdsc, ino); | ||
| 160 | if (r) | ||
| 161 | ceph_get_snap_realm(mdsc, r); | ||
| 162 | return r; | ||
| 163 | } | ||
| 164 | |||
| 157 | static void __put_snap_realm(struct ceph_mds_client *mdsc, | 165 | static void __put_snap_realm(struct ceph_mds_client *mdsc, |
| 158 | struct ceph_snap_realm *realm); | 166 | struct ceph_snap_realm *realm); |
| 159 | 167 | ||
| @@ -273,7 +281,6 @@ static int adjust_snap_realm_parent(struct ceph_mds_client *mdsc, | |||
| 273 | } | 281 | } |
| 274 | realm->parent_ino = parentino; | 282 | realm->parent_ino = parentino; |
| 275 | realm->parent = parent; | 283 | realm->parent = parent; |
| 276 | ceph_get_snap_realm(mdsc, parent); | ||
| 277 | list_add(&realm->child_item, &parent->children); | 284 | list_add(&realm->child_item, &parent->children); |
| 278 | return 1; | 285 | return 1; |
| 279 | } | 286 | } |
| @@ -631,12 +638,14 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm) | |||
| 631 | * Caller must hold snap_rwsem for write. | 638 | * Caller must hold snap_rwsem for write. |
| 632 | */ | 639 | */ |
| 633 | int ceph_update_snap_trace(struct ceph_mds_client *mdsc, | 640 | int ceph_update_snap_trace(struct ceph_mds_client *mdsc, |
| 634 | void *p, void *e, bool deletion) | 641 | void *p, void *e, bool deletion, |
| 642 | struct ceph_snap_realm **realm_ret) | ||
| 635 | { | 643 | { |
| 636 | struct ceph_mds_snap_realm *ri; /* encoded */ | 644 | struct ceph_mds_snap_realm *ri; /* encoded */ |
| 637 | __le64 *snaps; /* encoded */ | 645 | __le64 *snaps; /* encoded */ |
| 638 | __le64 *prior_parent_snaps; /* encoded */ | 646 | __le64 *prior_parent_snaps; /* encoded */ |
| 639 | struct ceph_snap_realm *realm; | 647 | struct ceph_snap_realm *realm = NULL; |
| 648 | struct ceph_snap_realm *first_realm = NULL; | ||
| 640 | int invalidate = 0; | 649 | int invalidate = 0; |
| 641 | int err = -ENOMEM; | 650 | int err = -ENOMEM; |
| 642 | LIST_HEAD(dirty_realms); | 651 | LIST_HEAD(dirty_realms); |
| @@ -704,13 +713,18 @@ more: | |||
| 704 | dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino, | 713 | dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino, |
| 705 | realm, invalidate, p, e); | 714 | realm, invalidate, p, e); |
| 706 | 715 | ||
| 707 | if (p < e) | ||
| 708 | goto more; | ||
| 709 | |||
| 710 | /* invalidate when we reach the _end_ (root) of the trace */ | 716 | /* invalidate when we reach the _end_ (root) of the trace */ |
| 711 | if (invalidate) | 717 | if (invalidate && p >= e) |
| 712 | rebuild_snap_realms(realm); | 718 | rebuild_snap_realms(realm); |
| 713 | 719 | ||
| 720 | if (!first_realm) | ||
| 721 | first_realm = realm; | ||
| 722 | else | ||
| 723 | ceph_put_snap_realm(mdsc, realm); | ||
| 724 | |||
| 725 | if (p < e) | ||
| 726 | goto more; | ||
| 727 | |||
| 714 | /* | 728 | /* |
| 715 | * queue cap snaps _after_ we've built the new snap contexts, | 729 | * queue cap snaps _after_ we've built the new snap contexts, |
| 716 | * so that i_head_snapc can be set appropriately. | 730 | * so that i_head_snapc can be set appropriately. |
| @@ -721,12 +735,21 @@ more: | |||
| 721 | queue_realm_cap_snaps(realm); | 735 | queue_realm_cap_snaps(realm); |
| 722 | } | 736 | } |
| 723 | 737 | ||
| 738 | if (realm_ret) | ||
| 739 | *realm_ret = first_realm; | ||
| 740 | else | ||
| 741 | ceph_put_snap_realm(mdsc, first_realm); | ||
| 742 | |||
| 724 | __cleanup_empty_realms(mdsc); | 743 | __cleanup_empty_realms(mdsc); |
| 725 | return 0; | 744 | return 0; |
| 726 | 745 | ||
| 727 | bad: | 746 | bad: |
| 728 | err = -EINVAL; | 747 | err = -EINVAL; |
| 729 | fail: | 748 | fail: |
| 749 | if (realm && !IS_ERR(realm)) | ||
| 750 | ceph_put_snap_realm(mdsc, realm); | ||
| 751 | if (first_realm) | ||
| 752 | ceph_put_snap_realm(mdsc, first_realm); | ||
| 730 | pr_err("update_snap_trace error %d\n", err); | 753 | pr_err("update_snap_trace error %d\n", err); |
| 731 | return err; | 754 | return err; |
| 732 | } | 755 | } |
| @@ -844,7 +867,6 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, | |||
| 844 | if (IS_ERR(realm)) | 867 | if (IS_ERR(realm)) |
| 845 | goto out; | 868 | goto out; |
| 846 | } | 869 | } |
| 847 | ceph_get_snap_realm(mdsc, realm); | ||
| 848 | 870 | ||
| 849 | dout("splitting snap_realm %llx %p\n", realm->ino, realm); | 871 | dout("splitting snap_realm %llx %p\n", realm->ino, realm); |
| 850 | for (i = 0; i < num_split_inos; i++) { | 872 | for (i = 0; i < num_split_inos; i++) { |
| @@ -905,7 +927,7 @@ skip_inode: | |||
| 905 | /* we may have taken some of the old realm's children. */ | 927 | /* we may have taken some of the old realm's children. */ |
| 906 | for (i = 0; i < num_split_realms; i++) { | 928 | for (i = 0; i < num_split_realms; i++) { |
| 907 | struct ceph_snap_realm *child = | 929 | struct ceph_snap_realm *child = |
| 908 | ceph_lookup_snap_realm(mdsc, | 930 | __lookup_snap_realm(mdsc, |
| 909 | le64_to_cpu(split_realms[i])); | 931 | le64_to_cpu(split_realms[i])); |
| 910 | if (!child) | 932 | if (!child) |
| 911 | continue; | 933 | continue; |
| @@ -918,7 +940,7 @@ skip_inode: | |||
| 918 | * snap, we can avoid queueing cap_snaps. | 940 | * snap, we can avoid queueing cap_snaps. |
| 919 | */ | 941 | */ |
| 920 | ceph_update_snap_trace(mdsc, p, e, | 942 | ceph_update_snap_trace(mdsc, p, e, |
| 921 | op == CEPH_SNAP_OP_DESTROY); | 943 | op == CEPH_SNAP_OP_DESTROY, NULL); |
| 922 | 944 | ||
| 923 | if (op == CEPH_SNAP_OP_SPLIT) | 945 | if (op == CEPH_SNAP_OP_SPLIT) |
| 924 | /* we took a reference when we created the realm, above */ | 946 | /* we took a reference when we created the realm, above */ |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 5ae62587a71d..a63997b8bcff 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
| @@ -414,6 +414,10 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) | |||
| 414 | seq_puts(m, ",noshare"); | 414 | seq_puts(m, ",noshare"); |
| 415 | if (opt->flags & CEPH_OPT_NOCRC) | 415 | if (opt->flags & CEPH_OPT_NOCRC) |
| 416 | seq_puts(m, ",nocrc"); | 416 | seq_puts(m, ",nocrc"); |
| 417 | if (opt->flags & CEPH_OPT_NOMSGAUTH) | ||
| 418 | seq_puts(m, ",nocephx_require_signatures"); | ||
| 419 | if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0) | ||
| 420 | seq_puts(m, ",notcp_nodelay"); | ||
| 417 | 421 | ||
| 418 | if (opt->name) | 422 | if (opt->name) |
| 419 | seq_printf(m, ",name=%s", opt->name); | 423 | seq_printf(m, ",name=%s", opt->name); |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index e1aa32d0759d..04c8124ed30e 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
| @@ -693,7 +693,8 @@ extern void ceph_get_snap_realm(struct ceph_mds_client *mdsc, | |||
| 693 | extern void ceph_put_snap_realm(struct ceph_mds_client *mdsc, | 693 | extern void ceph_put_snap_realm(struct ceph_mds_client *mdsc, |
| 694 | struct ceph_snap_realm *realm); | 694 | struct ceph_snap_realm *realm); |
| 695 | extern int ceph_update_snap_trace(struct ceph_mds_client *m, | 695 | extern int ceph_update_snap_trace(struct ceph_mds_client *m, |
| 696 | void *p, void *e, bool deletion); | 696 | void *p, void *e, bool deletion, |
| 697 | struct ceph_snap_realm **realm_ret); | ||
| 697 | extern void ceph_handle_snap(struct ceph_mds_client *mdsc, | 698 | extern void ceph_handle_snap(struct ceph_mds_client *mdsc, |
| 698 | struct ceph_mds_session *session, | 699 | struct ceph_mds_session *session, |
| 699 | struct ceph_msg *msg); | 700 | struct ceph_msg *msg); |
| @@ -892,7 +893,9 @@ extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, | |||
| 892 | int ceph_uninline_data(struct file *filp, struct page *locked_page); | 893 | int ceph_uninline_data(struct file *filp, struct page *locked_page); |
| 893 | /* dir.c */ | 894 | /* dir.c */ |
| 894 | extern const struct file_operations ceph_dir_fops; | 895 | extern const struct file_operations ceph_dir_fops; |
| 896 | extern const struct file_operations ceph_snapdir_fops; | ||
| 895 | extern const struct inode_operations ceph_dir_iops; | 897 | extern const struct inode_operations ceph_dir_iops; |
| 898 | extern const struct inode_operations ceph_snapdir_iops; | ||
| 896 | extern const struct dentry_operations ceph_dentry_ops, ceph_snap_dentry_ops, | 899 | extern const struct dentry_operations ceph_dentry_ops, ceph_snap_dentry_ops, |
| 897 | ceph_snapdir_dentry_ops; | 900 | ceph_snapdir_dentry_ops; |
| 898 | 901 | ||
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index c0dadaac26e3..31eb03d0c766 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h | |||
| @@ -158,17 +158,6 @@ enum { | |||
| 158 | }; | 158 | }; |
| 159 | 159 | ||
| 160 | 160 | ||
| 161 | /* pool operations */ | ||
| 162 | enum { | ||
| 163 | POOL_OP_CREATE = 0x01, | ||
| 164 | POOL_OP_DELETE = 0x02, | ||
| 165 | POOL_OP_AUID_CHANGE = 0x03, | ||
| 166 | POOL_OP_CREATE_SNAP = 0x11, | ||
| 167 | POOL_OP_DELETE_SNAP = 0x12, | ||
| 168 | POOL_OP_CREATE_UNMANAGED_SNAP = 0x21, | ||
| 169 | POOL_OP_DELETE_UNMANAGED_SNAP = 0x22, | ||
| 170 | }; | ||
| 171 | |||
| 172 | struct ceph_mon_request_header { | 161 | struct ceph_mon_request_header { |
| 173 | __le64 have_version; | 162 | __le64 have_version; |
| 174 | __le16 session_mon; | 163 | __le16 session_mon; |
| @@ -191,31 +180,6 @@ struct ceph_mon_statfs_reply { | |||
| 191 | struct ceph_statfs st; | 180 | struct ceph_statfs st; |
| 192 | } __attribute__ ((packed)); | 181 | } __attribute__ ((packed)); |
| 193 | 182 | ||
| 194 | const char *ceph_pool_op_name(int op); | ||
| 195 | |||
| 196 | struct ceph_mon_poolop { | ||
| 197 | struct ceph_mon_request_header monhdr; | ||
| 198 | struct ceph_fsid fsid; | ||
| 199 | __le32 pool; | ||
| 200 | __le32 op; | ||
| 201 | __le64 auid; | ||
| 202 | __le64 snapid; | ||
| 203 | __le32 name_len; | ||
| 204 | } __attribute__ ((packed)); | ||
| 205 | |||
| 206 | struct ceph_mon_poolop_reply { | ||
| 207 | struct ceph_mon_request_header monhdr; | ||
| 208 | struct ceph_fsid fsid; | ||
| 209 | __le32 reply_code; | ||
| 210 | __le32 epoch; | ||
| 211 | char has_data; | ||
| 212 | char data[0]; | ||
| 213 | } __attribute__ ((packed)); | ||
| 214 | |||
| 215 | struct ceph_mon_unmanaged_snap { | ||
| 216 | __le64 snapid; | ||
| 217 | } __attribute__ ((packed)); | ||
| 218 | |||
| 219 | struct ceph_osd_getmap { | 183 | struct ceph_osd_getmap { |
| 220 | struct ceph_mon_request_header monhdr; | 184 | struct ceph_mon_request_header monhdr; |
| 221 | struct ceph_fsid fsid; | 185 | struct ceph_fsid fsid; |
| @@ -307,6 +271,7 @@ enum { | |||
| 307 | CEPH_SESSION_RECALL_STATE, | 271 | CEPH_SESSION_RECALL_STATE, |
| 308 | CEPH_SESSION_FLUSHMSG, | 272 | CEPH_SESSION_FLUSHMSG, |
| 309 | CEPH_SESSION_FLUSHMSG_ACK, | 273 | CEPH_SESSION_FLUSHMSG_ACK, |
| 274 | CEPH_SESSION_FORCE_RO, | ||
| 310 | }; | 275 | }; |
| 311 | 276 | ||
| 312 | extern const char *ceph_session_op_name(int op); | 277 | extern const char *ceph_session_op_name(int op); |
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 8b11a79ca1cb..16fff9608848 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h | |||
| @@ -30,8 +30,9 @@ | |||
| 30 | #define CEPH_OPT_MYIP (1<<2) /* specified my ip */ | 30 | #define CEPH_OPT_MYIP (1<<2) /* specified my ip */ |
| 31 | #define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */ | 31 | #define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */ |
| 32 | #define CEPH_OPT_NOMSGAUTH (1<<4) /* not require cephx message signature */ | 32 | #define CEPH_OPT_NOMSGAUTH (1<<4) /* not require cephx message signature */ |
| 33 | #define CEPH_OPT_TCP_NODELAY (1<<5) /* TCP_NODELAY on TCP sockets */ | ||
| 33 | 34 | ||
| 34 | #define CEPH_OPT_DEFAULT (0) | 35 | #define CEPH_OPT_DEFAULT (CEPH_OPT_TCP_NODELAY) |
| 35 | 36 | ||
| 36 | #define ceph_set_opt(client, opt) \ | 37 | #define ceph_set_opt(client, opt) \ |
| 37 | (client)->options->flags |= CEPH_OPT_##opt; | 38 | (client)->options->flags |= CEPH_OPT_##opt; |
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index d9d396c16503..e15499422fdc 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h | |||
| @@ -57,6 +57,7 @@ struct ceph_messenger { | |||
| 57 | 57 | ||
| 58 | atomic_t stopping; | 58 | atomic_t stopping; |
| 59 | bool nocrc; | 59 | bool nocrc; |
| 60 | bool tcp_nodelay; | ||
| 60 | 61 | ||
| 61 | /* | 62 | /* |
| 62 | * the global_seq counts connections i (attempt to) initiate | 63 | * the global_seq counts connections i (attempt to) initiate |
| @@ -264,7 +265,8 @@ extern void ceph_messenger_init(struct ceph_messenger *msgr, | |||
| 264 | struct ceph_entity_addr *myaddr, | 265 | struct ceph_entity_addr *myaddr, |
| 265 | u64 supported_features, | 266 | u64 supported_features, |
| 266 | u64 required_features, | 267 | u64 required_features, |
| 267 | bool nocrc); | 268 | bool nocrc, |
| 269 | bool tcp_nodelay); | ||
| 268 | 270 | ||
| 269 | extern void ceph_con_init(struct ceph_connection *con, void *private, | 271 | extern void ceph_con_init(struct ceph_connection *con, void *private, |
| 270 | const struct ceph_connection_operations *ops, | 272 | const struct ceph_connection_operations *ops, |
diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h index deb47e45ac7c..81810dc21f06 100644 --- a/include/linux/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h | |||
| @@ -40,7 +40,7 @@ struct ceph_mon_request { | |||
| 40 | }; | 40 | }; |
| 41 | 41 | ||
| 42 | /* | 42 | /* |
| 43 | * ceph_mon_generic_request is being used for the statfs, poolop and | 43 | * ceph_mon_generic_request is being used for the statfs and |
| 44 | * mon_get_version requests which are being done a bit differently | 44 | * mon_get_version requests which are being done a bit differently |
| 45 | * because we need to get data back to the caller | 45 | * because we need to get data back to the caller |
| 46 | */ | 46 | */ |
| @@ -50,7 +50,6 @@ struct ceph_mon_generic_request { | |||
| 50 | struct rb_node node; | 50 | struct rb_node node; |
| 51 | int result; | 51 | int result; |
| 52 | void *buf; | 52 | void *buf; |
| 53 | int buf_len; | ||
| 54 | struct completion completion; | 53 | struct completion completion; |
| 55 | struct ceph_msg *request; /* original request */ | 54 | struct ceph_msg *request; /* original request */ |
| 56 | struct ceph_msg *reply; /* and reply */ | 55 | struct ceph_msg *reply; /* and reply */ |
| @@ -117,10 +116,4 @@ extern int ceph_monc_open_session(struct ceph_mon_client *monc); | |||
| 117 | 116 | ||
| 118 | extern int ceph_monc_validate_auth(struct ceph_mon_client *monc); | 117 | extern int ceph_monc_validate_auth(struct ceph_mon_client *monc); |
| 119 | 118 | ||
| 120 | extern int ceph_monc_create_snapid(struct ceph_mon_client *monc, | ||
| 121 | u32 pool, u64 *snapid); | ||
| 122 | |||
| 123 | extern int ceph_monc_delete_snapid(struct ceph_mon_client *monc, | ||
| 124 | u32 pool, u64 snapid); | ||
| 125 | |||
| 126 | #endif | 119 | #endif |
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 5d5ab67f516d..ec565508e904 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c | |||
| @@ -239,6 +239,8 @@ enum { | |||
| 239 | Opt_nocrc, | 239 | Opt_nocrc, |
| 240 | Opt_cephx_require_signatures, | 240 | Opt_cephx_require_signatures, |
| 241 | Opt_nocephx_require_signatures, | 241 | Opt_nocephx_require_signatures, |
| 242 | Opt_tcp_nodelay, | ||
| 243 | Opt_notcp_nodelay, | ||
| 242 | }; | 244 | }; |
| 243 | 245 | ||
| 244 | static match_table_t opt_tokens = { | 246 | static match_table_t opt_tokens = { |
| @@ -259,6 +261,8 @@ static match_table_t opt_tokens = { | |||
| 259 | {Opt_nocrc, "nocrc"}, | 261 | {Opt_nocrc, "nocrc"}, |
| 260 | {Opt_cephx_require_signatures, "cephx_require_signatures"}, | 262 | {Opt_cephx_require_signatures, "cephx_require_signatures"}, |
| 261 | {Opt_nocephx_require_signatures, "nocephx_require_signatures"}, | 263 | {Opt_nocephx_require_signatures, "nocephx_require_signatures"}, |
| 264 | {Opt_tcp_nodelay, "tcp_nodelay"}, | ||
| 265 | {Opt_notcp_nodelay, "notcp_nodelay"}, | ||
| 262 | {-1, NULL} | 266 | {-1, NULL} |
| 263 | }; | 267 | }; |
| 264 | 268 | ||
| @@ -457,6 +461,7 @@ ceph_parse_options(char *options, const char *dev_name, | |||
| 457 | case Opt_nocrc: | 461 | case Opt_nocrc: |
| 458 | opt->flags |= CEPH_OPT_NOCRC; | 462 | opt->flags |= CEPH_OPT_NOCRC; |
| 459 | break; | 463 | break; |
| 464 | |||
| 460 | case Opt_cephx_require_signatures: | 465 | case Opt_cephx_require_signatures: |
| 461 | opt->flags &= ~CEPH_OPT_NOMSGAUTH; | 466 | opt->flags &= ~CEPH_OPT_NOMSGAUTH; |
| 462 | break; | 467 | break; |
| @@ -464,6 +469,13 @@ ceph_parse_options(char *options, const char *dev_name, | |||
| 464 | opt->flags |= CEPH_OPT_NOMSGAUTH; | 469 | opt->flags |= CEPH_OPT_NOMSGAUTH; |
| 465 | break; | 470 | break; |
| 466 | 471 | ||
| 472 | case Opt_tcp_nodelay: | ||
| 473 | opt->flags |= CEPH_OPT_TCP_NODELAY; | ||
| 474 | break; | ||
| 475 | case Opt_notcp_nodelay: | ||
| 476 | opt->flags &= ~CEPH_OPT_TCP_NODELAY; | ||
| 477 | break; | ||
| 478 | |||
| 467 | default: | 479 | default: |
| 468 | BUG_ON(token); | 480 | BUG_ON(token); |
| 469 | } | 481 | } |
| @@ -518,10 +530,12 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private, | |||
| 518 | /* msgr */ | 530 | /* msgr */ |
| 519 | if (ceph_test_opt(client, MYIP)) | 531 | if (ceph_test_opt(client, MYIP)) |
| 520 | myaddr = &client->options->my_addr; | 532 | myaddr = &client->options->my_addr; |
| 533 | |||
| 521 | ceph_messenger_init(&client->msgr, myaddr, | 534 | ceph_messenger_init(&client->msgr, myaddr, |
| 522 | client->supported_features, | 535 | client->supported_features, |
| 523 | client->required_features, | 536 | client->required_features, |
| 524 | ceph_test_opt(client, NOCRC)); | 537 | ceph_test_opt(client, NOCRC), |
| 538 | ceph_test_opt(client, TCP_NODELAY)); | ||
| 525 | 539 | ||
| 526 | /* subsystems */ | 540 | /* subsystems */ |
| 527 | err = ceph_monc_init(&client->monc, client); | 541 | err = ceph_monc_init(&client->monc, client); |
diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c index 30560202f57b..139a9cb19b0c 100644 --- a/net/ceph/ceph_strings.c +++ b/net/ceph/ceph_strings.c | |||
| @@ -42,17 +42,3 @@ const char *ceph_osd_state_name(int s) | |||
| 42 | return "???"; | 42 | return "???"; |
| 43 | } | 43 | } |
| 44 | } | 44 | } |
| 45 | |||
| 46 | const char *ceph_pool_op_name(int op) | ||
| 47 | { | ||
| 48 | switch (op) { | ||
| 49 | case POOL_OP_CREATE: return "create"; | ||
| 50 | case POOL_OP_DELETE: return "delete"; | ||
| 51 | case POOL_OP_AUID_CHANGE: return "auid change"; | ||
| 52 | case POOL_OP_CREATE_SNAP: return "create snap"; | ||
| 53 | case POOL_OP_DELETE_SNAP: return "delete snap"; | ||
| 54 | case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap"; | ||
| 55 | case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap"; | ||
| 56 | } | ||
| 57 | return "???"; | ||
| 58 | } | ||
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index d2d525529f87..14d9995097cc 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c | |||
| @@ -127,8 +127,6 @@ static int monc_show(struct seq_file *s, void *p) | |||
| 127 | op = le16_to_cpu(req->request->hdr.type); | 127 | op = le16_to_cpu(req->request->hdr.type); |
| 128 | if (op == CEPH_MSG_STATFS) | 128 | if (op == CEPH_MSG_STATFS) |
| 129 | seq_printf(s, "%llu statfs\n", req->tid); | 129 | seq_printf(s, "%llu statfs\n", req->tid); |
| 130 | else if (op == CEPH_MSG_POOLOP) | ||
| 131 | seq_printf(s, "%llu poolop\n", req->tid); | ||
| 132 | else if (op == CEPH_MSG_MON_GET_VERSION) | 130 | else if (op == CEPH_MSG_MON_GET_VERSION) |
| 133 | seq_printf(s, "%llu mon_get_version", req->tid); | 131 | seq_printf(s, "%llu mon_get_version", req->tid); |
| 134 | else | 132 | else |
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 33a2f201e460..6b3f54ed65ba 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
| @@ -510,6 +510,16 @@ static int ceph_tcp_connect(struct ceph_connection *con) | |||
| 510 | return ret; | 510 | return ret; |
| 511 | } | 511 | } |
| 512 | 512 | ||
| 513 | if (con->msgr->tcp_nodelay) { | ||
| 514 | int optval = 1; | ||
| 515 | |||
| 516 | ret = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, | ||
| 517 | (char *)&optval, sizeof(optval)); | ||
| 518 | if (ret) | ||
| 519 | pr_err("kernel_setsockopt(TCP_NODELAY) failed: %d", | ||
| 520 | ret); | ||
| 521 | } | ||
| 522 | |||
| 513 | sk_set_memalloc(sock->sk); | 523 | sk_set_memalloc(sock->sk); |
| 514 | 524 | ||
| 515 | con->sock = sock; | 525 | con->sock = sock; |
| @@ -2922,7 +2932,8 @@ void ceph_messenger_init(struct ceph_messenger *msgr, | |||
| 2922 | struct ceph_entity_addr *myaddr, | 2932 | struct ceph_entity_addr *myaddr, |
| 2923 | u64 supported_features, | 2933 | u64 supported_features, |
| 2924 | u64 required_features, | 2934 | u64 required_features, |
| 2925 | bool nocrc) | 2935 | bool nocrc, |
| 2936 | bool tcp_nodelay) | ||
| 2926 | { | 2937 | { |
| 2927 | msgr->supported_features = supported_features; | 2938 | msgr->supported_features = supported_features; |
| 2928 | msgr->required_features = required_features; | 2939 | msgr->required_features = required_features; |
| @@ -2937,6 +2948,7 @@ void ceph_messenger_init(struct ceph_messenger *msgr, | |||
| 2937 | get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce)); | 2948 | get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce)); |
| 2938 | encode_my_addr(msgr); | 2949 | encode_my_addr(msgr); |
| 2939 | msgr->nocrc = nocrc; | 2950 | msgr->nocrc = nocrc; |
| 2951 | msgr->tcp_nodelay = tcp_nodelay; | ||
| 2940 | 2952 | ||
| 2941 | atomic_set(&msgr->stopping, 0); | 2953 | atomic_set(&msgr->stopping, 0); |
| 2942 | 2954 | ||
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index f2148e22b148..2b3cf05e87b0 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c | |||
| @@ -410,7 +410,7 @@ out_unlocked: | |||
| 410 | } | 410 | } |
| 411 | 411 | ||
| 412 | /* | 412 | /* |
| 413 | * generic requests (e.g., statfs, poolop) | 413 | * generic requests (currently statfs, mon_get_version) |
| 414 | */ | 414 | */ |
| 415 | static struct ceph_mon_generic_request *__lookup_generic_req( | 415 | static struct ceph_mon_generic_request *__lookup_generic_req( |
| 416 | struct ceph_mon_client *monc, u64 tid) | 416 | struct ceph_mon_client *monc, u64 tid) |
| @@ -569,7 +569,7 @@ static void handle_statfs_reply(struct ceph_mon_client *monc, | |||
| 569 | return; | 569 | return; |
| 570 | 570 | ||
| 571 | bad: | 571 | bad: |
| 572 | pr_err("corrupt generic reply, tid %llu\n", tid); | 572 | pr_err("corrupt statfs reply, tid %llu\n", tid); |
| 573 | ceph_msg_dump(msg); | 573 | ceph_msg_dump(msg); |
| 574 | } | 574 | } |
| 575 | 575 | ||
| @@ -588,7 +588,6 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) | |||
| 588 | 588 | ||
| 589 | kref_init(&req->kref); | 589 | kref_init(&req->kref); |
| 590 | req->buf = buf; | 590 | req->buf = buf; |
| 591 | req->buf_len = sizeof(*buf); | ||
| 592 | init_completion(&req->completion); | 591 | init_completion(&req->completion); |
| 593 | 592 | ||
| 594 | err = -ENOMEM; | 593 | err = -ENOMEM; |
| @@ -611,7 +610,7 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) | |||
| 611 | err = do_generic_request(monc, req); | 610 | err = do_generic_request(monc, req); |
| 612 | 611 | ||
| 613 | out: | 612 | out: |
| 614 | kref_put(&req->kref, release_generic_request); | 613 | put_generic_request(req); |
| 615 | return err; | 614 | return err; |
| 616 | } | 615 | } |
| 617 | EXPORT_SYMBOL(ceph_monc_do_statfs); | 616 | EXPORT_SYMBOL(ceph_monc_do_statfs); |
| @@ -647,7 +646,7 @@ static void handle_get_version_reply(struct ceph_mon_client *monc, | |||
| 647 | 646 | ||
| 648 | return; | 647 | return; |
| 649 | bad: | 648 | bad: |
| 650 | pr_err("corrupt mon_get_version reply\n"); | 649 | pr_err("corrupt mon_get_version reply, tid %llu\n", tid); |
| 651 | ceph_msg_dump(msg); | 650 | ceph_msg_dump(msg); |
| 652 | } | 651 | } |
| 653 | 652 | ||
| @@ -670,7 +669,6 @@ int ceph_monc_do_get_version(struct ceph_mon_client *monc, const char *what, | |||
| 670 | 669 | ||
| 671 | kref_init(&req->kref); | 670 | kref_init(&req->kref); |
| 672 | req->buf = newest; | 671 | req->buf = newest; |
| 673 | req->buf_len = sizeof(*newest); | ||
| 674 | init_completion(&req->completion); | 672 | init_completion(&req->completion); |
| 675 | 673 | ||
| 676 | req->request = ceph_msg_new(CEPH_MSG_MON_GET_VERSION, | 674 | req->request = ceph_msg_new(CEPH_MSG_MON_GET_VERSION, |
| @@ -701,134 +699,12 @@ int ceph_monc_do_get_version(struct ceph_mon_client *monc, const char *what, | |||
| 701 | 699 | ||
| 702 | mutex_unlock(&monc->mutex); | 700 | mutex_unlock(&monc->mutex); |
| 703 | out: | 701 | out: |
| 704 | kref_put(&req->kref, release_generic_request); | 702 | put_generic_request(req); |
| 705 | return err; | 703 | return err; |
| 706 | } | 704 | } |
| 707 | EXPORT_SYMBOL(ceph_monc_do_get_version); | 705 | EXPORT_SYMBOL(ceph_monc_do_get_version); |
| 708 | 706 | ||
| 709 | /* | 707 | /* |
| 710 | * pool ops | ||
| 711 | */ | ||
| 712 | static int get_poolop_reply_buf(const char *src, size_t src_len, | ||
| 713 | char *dst, size_t dst_len) | ||
| 714 | { | ||
| 715 | u32 buf_len; | ||
| 716 | |||
| 717 | if (src_len != sizeof(u32) + dst_len) | ||
| 718 | return -EINVAL; | ||
| 719 | |||
| 720 | buf_len = le32_to_cpu(*(__le32 *)src); | ||
| 721 | if (buf_len != dst_len) | ||
| 722 | return -EINVAL; | ||
| 723 | |||
| 724 | memcpy(dst, src + sizeof(u32), dst_len); | ||
| 725 | return 0; | ||
| 726 | } | ||
| 727 | |||
| 728 | static void handle_poolop_reply(struct ceph_mon_client *monc, | ||
| 729 | struct ceph_msg *msg) | ||
| 730 | { | ||
| 731 | struct ceph_mon_generic_request *req; | ||
| 732 | struct ceph_mon_poolop_reply *reply = msg->front.iov_base; | ||
| 733 | u64 tid = le64_to_cpu(msg->hdr.tid); | ||
| 734 | |||
| 735 | if (msg->front.iov_len < sizeof(*reply)) | ||
| 736 | goto bad; | ||
| 737 | dout("handle_poolop_reply %p tid %llu\n", msg, tid); | ||
| 738 | |||
| 739 | mutex_lock(&monc->mutex); | ||
| 740 | req = __lookup_generic_req(monc, tid); | ||
| 741 | if (req) { | ||
| 742 | if (req->buf_len && | ||
| 743 | get_poolop_reply_buf(msg->front.iov_base + sizeof(*reply), | ||
| 744 | msg->front.iov_len - sizeof(*reply), | ||
| 745 | req->buf, req->buf_len) < 0) { | ||
| 746 | mutex_unlock(&monc->mutex); | ||
| 747 | goto bad; | ||
| 748 | } | ||
| 749 | req->result = le32_to_cpu(reply->reply_code); | ||
| 750 | get_generic_request(req); | ||
| 751 | } | ||
| 752 | mutex_unlock(&monc->mutex); | ||
| 753 | if (req) { | ||
| 754 | complete(&req->completion); | ||
| 755 | put_generic_request(req); | ||
| 756 | } | ||
| 757 | return; | ||
| 758 | |||
| 759 | bad: | ||
| 760 | pr_err("corrupt generic reply, tid %llu\n", tid); | ||
| 761 | ceph_msg_dump(msg); | ||
| 762 | } | ||
| 763 | |||
| 764 | /* | ||
| 765 | * Do a synchronous pool op. | ||
| 766 | */ | ||
| 767 | static int do_poolop(struct ceph_mon_client *monc, u32 op, | ||
| 768 | u32 pool, u64 snapid, | ||
| 769 | char *buf, int len) | ||
| 770 | { | ||
| 771 | struct ceph_mon_generic_request *req; | ||
| 772 | struct ceph_mon_poolop *h; | ||
| 773 | int err; | ||
| 774 | |||
| 775 | req = kzalloc(sizeof(*req), GFP_NOFS); | ||
| 776 | if (!req) | ||
| 777 | return -ENOMEM; | ||
| 778 | |||
| 779 | kref_init(&req->kref); | ||
| 780 | req->buf = buf; | ||
| 781 | req->buf_len = len; | ||
| 782 | init_completion(&req->completion); | ||
| 783 | |||
| 784 | err = -ENOMEM; | ||
| 785 | req->request = ceph_msg_new(CEPH_MSG_POOLOP, sizeof(*h), GFP_NOFS, | ||
| 786 | true); | ||
| 787 | if (!req->request) | ||
| 788 | goto out; | ||
| 789 | req->reply = ceph_msg_new(CEPH_MSG_POOLOP_REPLY, 1024, GFP_NOFS, | ||
| 790 | true); | ||
| 791 | if (!req->reply) | ||
| 792 | goto out; | ||
| 793 | |||
| 794 | /* fill out request */ | ||
| 795 | req->request->hdr.version = cpu_to_le16(2); | ||
| 796 | h = req->request->front.iov_base; | ||
| 797 | h->monhdr.have_version = 0; | ||
| 798 | h->monhdr.session_mon = cpu_to_le16(-1); | ||
| 799 | h->monhdr.session_mon_tid = 0; | ||
| 800 | h->fsid = monc->monmap->fsid; | ||
| 801 | h->pool = cpu_to_le32(pool); | ||
| 802 | h->op = cpu_to_le32(op); | ||
| 803 | h->auid = 0; | ||
| 804 | h->snapid = cpu_to_le64(snapid); | ||
| 805 | h->name_len = 0; | ||
| 806 | |||
| 807 | err = do_generic_request(monc, req); | ||
| 808 | |||
| 809 | out: | ||
| 810 | kref_put(&req->kref, release_generic_request); | ||
| 811 | return err; | ||
| 812 | } | ||
| 813 | |||
| 814 | int ceph_monc_create_snapid(struct ceph_mon_client *monc, | ||
| 815 | u32 pool, u64 *snapid) | ||
| 816 | { | ||
| 817 | return do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP, | ||
| 818 | pool, 0, (char *)snapid, sizeof(*snapid)); | ||
| 819 | |||
| 820 | } | ||
| 821 | EXPORT_SYMBOL(ceph_monc_create_snapid); | ||
| 822 | |||
| 823 | int ceph_monc_delete_snapid(struct ceph_mon_client *monc, | ||
| 824 | u32 pool, u64 snapid) | ||
| 825 | { | ||
| 826 | return do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP, | ||
| 827 | pool, snapid, NULL, 0); | ||
| 828 | |||
| 829 | } | ||
| 830 | |||
| 831 | /* | ||
| 832 | * Resend pending generic requests. | 708 | * Resend pending generic requests. |
| 833 | */ | 709 | */ |
| 834 | static void __resend_generic_request(struct ceph_mon_client *monc) | 710 | static void __resend_generic_request(struct ceph_mon_client *monc) |
| @@ -1112,10 +988,6 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) | |||
| 1112 | handle_get_version_reply(monc, msg); | 988 | handle_get_version_reply(monc, msg); |
| 1113 | break; | 989 | break; |
| 1114 | 990 | ||
| 1115 | case CEPH_MSG_POOLOP_REPLY: | ||
| 1116 | handle_poolop_reply(monc, msg); | ||
| 1117 | break; | ||
| 1118 | |||
| 1119 | case CEPH_MSG_MON_MAP: | 991 | case CEPH_MSG_MON_MAP: |
| 1120 | ceph_monc_handle_map(monc, msg); | 992 | ceph_monc_handle_map(monc, msg); |
| 1121 | break; | 993 | break; |
| @@ -1154,7 +1026,6 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, | |||
| 1154 | case CEPH_MSG_MON_SUBSCRIBE_ACK: | 1026 | case CEPH_MSG_MON_SUBSCRIBE_ACK: |
| 1155 | m = ceph_msg_get(monc->m_subscribe_ack); | 1027 | m = ceph_msg_get(monc->m_subscribe_ack); |
| 1156 | break; | 1028 | break; |
| 1157 | case CEPH_MSG_POOLOP_REPLY: | ||
| 1158 | case CEPH_MSG_STATFS_REPLY: | 1029 | case CEPH_MSG_STATFS_REPLY: |
| 1159 | return get_generic_reply(con, hdr, skip); | 1030 | return get_generic_reply(con, hdr, skip); |
| 1160 | case CEPH_MSG_AUTH_REPLY: | 1031 | case CEPH_MSG_AUTH_REPLY: |
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 53299c7b0ca4..41a4abc7e98e 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c | |||
| @@ -1035,10 +1035,11 @@ static void put_osd(struct ceph_osd *osd) | |||
| 1035 | { | 1035 | { |
| 1036 | dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref), | 1036 | dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref), |
| 1037 | atomic_read(&osd->o_ref) - 1); | 1037 | atomic_read(&osd->o_ref) - 1); |
| 1038 | if (atomic_dec_and_test(&osd->o_ref) && osd->o_auth.authorizer) { | 1038 | if (atomic_dec_and_test(&osd->o_ref)) { |
| 1039 | struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth; | 1039 | struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth; |
| 1040 | 1040 | ||
| 1041 | ceph_auth_destroy_authorizer(ac, osd->o_auth.authorizer); | 1041 | if (osd->o_auth.authorizer) |
| 1042 | ceph_auth_destroy_authorizer(ac, osd->o_auth.authorizer); | ||
| 1042 | kfree(osd); | 1043 | kfree(osd); |
| 1043 | } | 1044 | } |
| 1044 | } | 1045 | } |
| @@ -1048,14 +1049,24 @@ static void put_osd(struct ceph_osd *osd) | |||
| 1048 | */ | 1049 | */ |
| 1049 | static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) | 1050 | static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) |
| 1050 | { | 1051 | { |
| 1051 | dout("__remove_osd %p\n", osd); | 1052 | dout("%s %p osd%d\n", __func__, osd, osd->o_osd); |
| 1052 | WARN_ON(!list_empty(&osd->o_requests)); | 1053 | WARN_ON(!list_empty(&osd->o_requests)); |
| 1053 | WARN_ON(!list_empty(&osd->o_linger_requests)); | 1054 | WARN_ON(!list_empty(&osd->o_linger_requests)); |
| 1054 | 1055 | ||
| 1055 | rb_erase(&osd->o_node, &osdc->osds); | ||
| 1056 | list_del_init(&osd->o_osd_lru); | 1056 | list_del_init(&osd->o_osd_lru); |
| 1057 | ceph_con_close(&osd->o_con); | 1057 | rb_erase(&osd->o_node, &osdc->osds); |
| 1058 | put_osd(osd); | 1058 | RB_CLEAR_NODE(&osd->o_node); |
| 1059 | } | ||
| 1060 | |||
| 1061 | static void remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) | ||
| 1062 | { | ||
| 1063 | dout("%s %p osd%d\n", __func__, osd, osd->o_osd); | ||
| 1064 | |||
| 1065 | if (!RB_EMPTY_NODE(&osd->o_node)) { | ||
| 1066 | ceph_con_close(&osd->o_con); | ||
| 1067 | __remove_osd(osdc, osd); | ||
| 1068 | put_osd(osd); | ||
| 1069 | } | ||
| 1059 | } | 1070 | } |
| 1060 | 1071 | ||
| 1061 | static void remove_all_osds(struct ceph_osd_client *osdc) | 1072 | static void remove_all_osds(struct ceph_osd_client *osdc) |
| @@ -1065,7 +1076,7 @@ static void remove_all_osds(struct ceph_osd_client *osdc) | |||
| 1065 | while (!RB_EMPTY_ROOT(&osdc->osds)) { | 1076 | while (!RB_EMPTY_ROOT(&osdc->osds)) { |
| 1066 | struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds), | 1077 | struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds), |
| 1067 | struct ceph_osd, o_node); | 1078 | struct ceph_osd, o_node); |
| 1068 | __remove_osd(osdc, osd); | 1079 | remove_osd(osdc, osd); |
| 1069 | } | 1080 | } |
| 1070 | mutex_unlock(&osdc->request_mutex); | 1081 | mutex_unlock(&osdc->request_mutex); |
| 1071 | } | 1082 | } |
| @@ -1106,7 +1117,7 @@ static void remove_old_osds(struct ceph_osd_client *osdc) | |||
| 1106 | list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) { | 1117 | list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) { |
| 1107 | if (time_before(jiffies, osd->lru_ttl)) | 1118 | if (time_before(jiffies, osd->lru_ttl)) |
| 1108 | break; | 1119 | break; |
| 1109 | __remove_osd(osdc, osd); | 1120 | remove_osd(osdc, osd); |
| 1110 | } | 1121 | } |
| 1111 | mutex_unlock(&osdc->request_mutex); | 1122 | mutex_unlock(&osdc->request_mutex); |
| 1112 | } | 1123 | } |
| @@ -1121,8 +1132,7 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) | |||
| 1121 | dout("__reset_osd %p osd%d\n", osd, osd->o_osd); | 1132 | dout("__reset_osd %p osd%d\n", osd, osd->o_osd); |
| 1122 | if (list_empty(&osd->o_requests) && | 1133 | if (list_empty(&osd->o_requests) && |
| 1123 | list_empty(&osd->o_linger_requests)) { | 1134 | list_empty(&osd->o_linger_requests)) { |
| 1124 | __remove_osd(osdc, osd); | 1135 | remove_osd(osdc, osd); |
| 1125 | |||
| 1126 | return -ENODEV; | 1136 | return -ENODEV; |
| 1127 | } | 1137 | } |
| 1128 | 1138 | ||
| @@ -1926,6 +1936,7 @@ static void reset_changed_osds(struct ceph_osd_client *osdc) | |||
| 1926 | { | 1936 | { |
| 1927 | struct rb_node *p, *n; | 1937 | struct rb_node *p, *n; |
| 1928 | 1938 | ||
| 1939 | dout("%s %p\n", __func__, osdc); | ||
| 1929 | for (p = rb_first(&osdc->osds); p; p = n) { | 1940 | for (p = rb_first(&osdc->osds); p; p = n) { |
| 1930 | struct ceph_osd *osd = rb_entry(p, struct ceph_osd, o_node); | 1941 | struct ceph_osd *osd = rb_entry(p, struct ceph_osd, o_node); |
| 1931 | 1942 | ||
