diff options
| -rw-r--r-- | drivers/block/rbd.c | 65 | ||||
| -rw-r--r-- | fs/ceph/caps.c | 9 | ||||
| -rw-r--r-- | fs/ceph/inode.c | 9 | ||||
| -rw-r--r-- | fs/ceph/locks.c | 177 | ||||
| -rw-r--r-- | fs/ceph/mds_client.c | 96 | ||||
| -rw-r--r-- | fs/ceph/super.c | 5 | ||||
| -rw-r--r-- | fs/ceph/super.h | 4 | ||||
| -rw-r--r-- | net/ceph/ceph_hash.c | 12 | ||||
| -rw-r--r-- | net/ceph/crypto.c | 4 | ||||
| -rw-r--r-- | net/ceph/messenger.c | 1 | ||||
| -rw-r--r-- | net/ceph/mon_client.c | 5 |
11 files changed, 237 insertions, 150 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index adc877dfef5c..38fc5f397fde 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c | |||
| @@ -348,7 +348,6 @@ struct rbd_client_id { | |||
| 348 | struct rbd_mapping { | 348 | struct rbd_mapping { |
| 349 | u64 size; | 349 | u64 size; |
| 350 | u64 features; | 350 | u64 features; |
| 351 | bool read_only; | ||
| 352 | }; | 351 | }; |
| 353 | 352 | ||
| 354 | /* | 353 | /* |
| @@ -450,12 +449,11 @@ static DEFINE_IDA(rbd_dev_id_ida); | |||
| 450 | static struct workqueue_struct *rbd_wq; | 449 | static struct workqueue_struct *rbd_wq; |
| 451 | 450 | ||
| 452 | /* | 451 | /* |
| 453 | * Default to false for now, as single-major requires >= 0.75 version of | 452 | * single-major requires >= 0.75 version of userspace rbd utility. |
| 454 | * userspace rbd utility. | ||
| 455 | */ | 453 | */ |
| 456 | static bool single_major = false; | 454 | static bool single_major = true; |
| 457 | module_param(single_major, bool, S_IRUGO); | 455 | module_param(single_major, bool, S_IRUGO); |
| 458 | MODULE_PARM_DESC(single_major, "Use a single major number for all rbd devices (default: false)"); | 456 | MODULE_PARM_DESC(single_major, "Use a single major number for all rbd devices (default: true)"); |
| 459 | 457 | ||
| 460 | static int rbd_img_request_submit(struct rbd_img_request *img_request); | 458 | static int rbd_img_request_submit(struct rbd_img_request *img_request); |
| 461 | 459 | ||
| @@ -608,9 +606,6 @@ static int rbd_open(struct block_device *bdev, fmode_t mode) | |||
| 608 | struct rbd_device *rbd_dev = bdev->bd_disk->private_data; | 606 | struct rbd_device *rbd_dev = bdev->bd_disk->private_data; |
| 609 | bool removing = false; | 607 | bool removing = false; |
| 610 | 608 | ||
| 611 | if ((mode & FMODE_WRITE) && rbd_dev->mapping.read_only) | ||
| 612 | return -EROFS; | ||
| 613 | |||
| 614 | spin_lock_irq(&rbd_dev->lock); | 609 | spin_lock_irq(&rbd_dev->lock); |
| 615 | if (test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags)) | 610 | if (test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags)) |
| 616 | removing = true; | 611 | removing = true; |
| @@ -640,46 +635,24 @@ static void rbd_release(struct gendisk *disk, fmode_t mode) | |||
| 640 | 635 | ||
| 641 | static int rbd_ioctl_set_ro(struct rbd_device *rbd_dev, unsigned long arg) | 636 | static int rbd_ioctl_set_ro(struct rbd_device *rbd_dev, unsigned long arg) |
| 642 | { | 637 | { |
| 643 | int ret = 0; | 638 | int ro; |
| 644 | int val; | ||
| 645 | bool ro; | ||
| 646 | bool ro_changed = false; | ||
| 647 | 639 | ||
| 648 | /* get_user() may sleep, so call it before taking rbd_dev->lock */ | 640 | if (get_user(ro, (int __user *)arg)) |
| 649 | if (get_user(val, (int __user *)(arg))) | ||
| 650 | return -EFAULT; | 641 | return -EFAULT; |
| 651 | 642 | ||
| 652 | ro = val ? true : false; | 643 | /* Snapshots can't be marked read-write */ |
| 653 | /* Snapshot doesn't allow to write*/ | ||
| 654 | if (rbd_dev->spec->snap_id != CEPH_NOSNAP && !ro) | 644 | if (rbd_dev->spec->snap_id != CEPH_NOSNAP && !ro) |
| 655 | return -EROFS; | 645 | return -EROFS; |
| 656 | 646 | ||
| 657 | spin_lock_irq(&rbd_dev->lock); | 647 | /* Let blkdev_roset() handle it */ |
| 658 | /* prevent others open this device */ | 648 | return -ENOTTY; |
| 659 | if (rbd_dev->open_count > 1) { | ||
| 660 | ret = -EBUSY; | ||
| 661 | goto out; | ||
| 662 | } | ||
| 663 | |||
| 664 | if (rbd_dev->mapping.read_only != ro) { | ||
| 665 | rbd_dev->mapping.read_only = ro; | ||
| 666 | ro_changed = true; | ||
| 667 | } | ||
| 668 | |||
| 669 | out: | ||
| 670 | spin_unlock_irq(&rbd_dev->lock); | ||
| 671 | /* set_disk_ro() may sleep, so call it after releasing rbd_dev->lock */ | ||
| 672 | if (ret == 0 && ro_changed) | ||
| 673 | set_disk_ro(rbd_dev->disk, ro ? 1 : 0); | ||
| 674 | |||
| 675 | return ret; | ||
| 676 | } | 649 | } |
| 677 | 650 | ||
| 678 | static int rbd_ioctl(struct block_device *bdev, fmode_t mode, | 651 | static int rbd_ioctl(struct block_device *bdev, fmode_t mode, |
| 679 | unsigned int cmd, unsigned long arg) | 652 | unsigned int cmd, unsigned long arg) |
| 680 | { | 653 | { |
| 681 | struct rbd_device *rbd_dev = bdev->bd_disk->private_data; | 654 | struct rbd_device *rbd_dev = bdev->bd_disk->private_data; |
| 682 | int ret = 0; | 655 | int ret; |
| 683 | 656 | ||
| 684 | switch (cmd) { | 657 | switch (cmd) { |
| 685 | case BLKROSET: | 658 | case BLKROSET: |
| @@ -4050,15 +4023,8 @@ static void rbd_queue_workfn(struct work_struct *work) | |||
| 4050 | goto err_rq; | 4023 | goto err_rq; |
| 4051 | } | 4024 | } |
| 4052 | 4025 | ||
| 4053 | /* Only reads are allowed to a read-only device */ | 4026 | rbd_assert(op_type == OBJ_OP_READ || |
| 4054 | 4027 | rbd_dev->spec->snap_id == CEPH_NOSNAP); | |
| 4055 | if (op_type != OBJ_OP_READ) { | ||
| 4056 | if (rbd_dev->mapping.read_only) { | ||
| 4057 | result = -EROFS; | ||
| 4058 | goto err_rq; | ||
| 4059 | } | ||
| 4060 | rbd_assert(rbd_dev->spec->snap_id == CEPH_NOSNAP); | ||
| 4061 | } | ||
| 4062 | 4028 | ||
| 4063 | /* | 4029 | /* |
| 4064 | * Quit early if the mapped snapshot no longer exists. It's | 4030 | * Quit early if the mapped snapshot no longer exists. It's |
| @@ -4423,7 +4389,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) | |||
| 4423 | /* enable the discard support */ | 4389 | /* enable the discard support */ |
| 4424 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); | 4390 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); |
| 4425 | q->limits.discard_granularity = segment_size; | 4391 | q->limits.discard_granularity = segment_size; |
| 4426 | q->limits.discard_alignment = segment_size; | ||
| 4427 | blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE); | 4392 | blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE); |
| 4428 | blk_queue_max_write_zeroes_sectors(q, segment_size / SECTOR_SIZE); | 4393 | blk_queue_max_write_zeroes_sectors(q, segment_size / SECTOR_SIZE); |
| 4429 | 4394 | ||
| @@ -5994,7 +5959,7 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev) | |||
| 5994 | goto err_out_disk; | 5959 | goto err_out_disk; |
| 5995 | 5960 | ||
| 5996 | set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE); | 5961 | set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE); |
| 5997 | set_disk_ro(rbd_dev->disk, rbd_dev->mapping.read_only); | 5962 | set_disk_ro(rbd_dev->disk, rbd_dev->opts->read_only); |
| 5998 | 5963 | ||
| 5999 | ret = dev_set_name(&rbd_dev->dev, "%d", rbd_dev->dev_id); | 5964 | ret = dev_set_name(&rbd_dev->dev, "%d", rbd_dev->dev_id); |
| 6000 | if (ret) | 5965 | if (ret) |
| @@ -6145,7 +6110,6 @@ static ssize_t do_rbd_add(struct bus_type *bus, | |||
| 6145 | struct rbd_options *rbd_opts = NULL; | 6110 | struct rbd_options *rbd_opts = NULL; |
| 6146 | struct rbd_spec *spec = NULL; | 6111 | struct rbd_spec *spec = NULL; |
| 6147 | struct rbd_client *rbdc; | 6112 | struct rbd_client *rbdc; |
| 6148 | bool read_only; | ||
| 6149 | int rc; | 6113 | int rc; |
| 6150 | 6114 | ||
| 6151 | if (!try_module_get(THIS_MODULE)) | 6115 | if (!try_module_get(THIS_MODULE)) |
| @@ -6194,11 +6158,8 @@ static ssize_t do_rbd_add(struct bus_type *bus, | |||
| 6194 | } | 6158 | } |
| 6195 | 6159 | ||
| 6196 | /* If we are mapping a snapshot it must be marked read-only */ | 6160 | /* If we are mapping a snapshot it must be marked read-only */ |
| 6197 | |||
| 6198 | read_only = rbd_dev->opts->read_only; | ||
| 6199 | if (rbd_dev->spec->snap_id != CEPH_NOSNAP) | 6161 | if (rbd_dev->spec->snap_id != CEPH_NOSNAP) |
| 6200 | read_only = true; | 6162 | rbd_dev->opts->read_only = true; |
| 6201 | rbd_dev->mapping.read_only = read_only; | ||
| 6202 | 6163 | ||
| 6203 | rc = rbd_dev_device_setup(rbd_dev); | 6164 | rc = rbd_dev_device_setup(rbd_dev); |
| 6204 | if (rc) | 6165 | if (rc) |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index ff5d32cf9578..a14b2c974c9e 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
| @@ -1160,7 +1160,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
| 1160 | struct ceph_inode_info *ci = cap->ci; | 1160 | struct ceph_inode_info *ci = cap->ci; |
| 1161 | struct inode *inode = &ci->vfs_inode; | 1161 | struct inode *inode = &ci->vfs_inode; |
| 1162 | struct cap_msg_args arg; | 1162 | struct cap_msg_args arg; |
| 1163 | int held, revoking, dropping; | 1163 | int held, revoking; |
| 1164 | int wake = 0; | 1164 | int wake = 0; |
| 1165 | int delayed = 0; | 1165 | int delayed = 0; |
| 1166 | int ret; | 1166 | int ret; |
| @@ -1168,7 +1168,6 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
| 1168 | held = cap->issued | cap->implemented; | 1168 | held = cap->issued | cap->implemented; |
| 1169 | revoking = cap->implemented & ~cap->issued; | 1169 | revoking = cap->implemented & ~cap->issued; |
| 1170 | retain &= ~revoking; | 1170 | retain &= ~revoking; |
| 1171 | dropping = cap->issued & ~retain; | ||
| 1172 | 1171 | ||
| 1173 | dout("__send_cap %p cap %p session %p %s -> %s (revoking %s)\n", | 1172 | dout("__send_cap %p cap %p session %p %s -> %s (revoking %s)\n", |
| 1174 | inode, cap, cap->session, | 1173 | inode, cap, cap->session, |
| @@ -1712,7 +1711,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, | |||
| 1712 | 1711 | ||
| 1713 | /* if we are unmounting, flush any unused caps immediately. */ | 1712 | /* if we are unmounting, flush any unused caps immediately. */ |
| 1714 | if (mdsc->stopping) | 1713 | if (mdsc->stopping) |
| 1715 | is_delayed = 1; | 1714 | is_delayed = true; |
| 1716 | 1715 | ||
| 1717 | spin_lock(&ci->i_ceph_lock); | 1716 | spin_lock(&ci->i_ceph_lock); |
| 1718 | 1717 | ||
| @@ -3189,8 +3188,8 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, | |||
| 3189 | int dirty = le32_to_cpu(m->dirty); | 3188 | int dirty = le32_to_cpu(m->dirty); |
| 3190 | int cleaned = 0; | 3189 | int cleaned = 0; |
| 3191 | bool drop = false; | 3190 | bool drop = false; |
| 3192 | bool wake_ci = 0; | 3191 | bool wake_ci = false; |
| 3193 | bool wake_mdsc = 0; | 3192 | bool wake_mdsc = false; |
| 3194 | 3193 | ||
| 3195 | list_for_each_entry_safe(cf, tmp_cf, &ci->i_cap_flush_list, i_list) { | 3194 | list_for_each_entry_safe(cf, tmp_cf, &ci->i_cap_flush_list, i_list) { |
| 3196 | if (cf->tid == flush_tid) | 3195 | if (cf->tid == flush_tid) |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index f2550a076edc..ab81652198c4 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
| @@ -493,6 +493,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) | |||
| 493 | ci->i_wb_ref = 0; | 493 | ci->i_wb_ref = 0; |
| 494 | ci->i_wrbuffer_ref = 0; | 494 | ci->i_wrbuffer_ref = 0; |
| 495 | ci->i_wrbuffer_ref_head = 0; | 495 | ci->i_wrbuffer_ref_head = 0; |
| 496 | atomic_set(&ci->i_filelock_ref, 0); | ||
| 496 | ci->i_shared_gen = 0; | 497 | ci->i_shared_gen = 0; |
| 497 | ci->i_rdcache_gen = 0; | 498 | ci->i_rdcache_gen = 0; |
| 498 | ci->i_rdcache_revoking = 0; | 499 | ci->i_rdcache_revoking = 0; |
| @@ -786,7 +787,6 @@ static int fill_inode(struct inode *inode, struct page *locked_page, | |||
| 786 | 787 | ||
| 787 | /* update inode */ | 788 | /* update inode */ |
| 788 | ci->i_version = le64_to_cpu(info->version); | 789 | ci->i_version = le64_to_cpu(info->version); |
| 789 | inode->i_version++; | ||
| 790 | inode->i_rdev = le32_to_cpu(info->rdev); | 790 | inode->i_rdev = le32_to_cpu(info->rdev); |
| 791 | inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; | 791 | inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; |
| 792 | 792 | ||
| @@ -1185,6 +1185,7 @@ retry_lookup: | |||
| 1185 | ceph_snap(d_inode(dn)) != tvino.snap)) { | 1185 | ceph_snap(d_inode(dn)) != tvino.snap)) { |
| 1186 | dout(" dn %p points to wrong inode %p\n", | 1186 | dout(" dn %p points to wrong inode %p\n", |
| 1187 | dn, d_inode(dn)); | 1187 | dn, d_inode(dn)); |
| 1188 | ceph_dir_clear_ordered(dir); | ||
| 1188 | d_delete(dn); | 1189 | d_delete(dn); |
| 1189 | dput(dn); | 1190 | dput(dn); |
| 1190 | goto retry_lookup; | 1191 | goto retry_lookup; |
| @@ -1322,6 +1323,7 @@ retry_lookup: | |||
| 1322 | dout(" %p links to %p %llx.%llx, not %llx.%llx\n", | 1323 | dout(" %p links to %p %llx.%llx, not %llx.%llx\n", |
| 1323 | dn, d_inode(dn), ceph_vinop(d_inode(dn)), | 1324 | dn, d_inode(dn), ceph_vinop(d_inode(dn)), |
| 1324 | ceph_vinop(in)); | 1325 | ceph_vinop(in)); |
| 1326 | ceph_dir_clear_ordered(dir); | ||
| 1325 | d_invalidate(dn); | 1327 | d_invalidate(dn); |
| 1326 | have_lease = false; | 1328 | have_lease = false; |
| 1327 | } | 1329 | } |
| @@ -1573,6 +1575,7 @@ retry_lookup: | |||
| 1573 | ceph_snap(d_inode(dn)) != tvino.snap)) { | 1575 | ceph_snap(d_inode(dn)) != tvino.snap)) { |
| 1574 | dout(" dn %p points to wrong inode %p\n", | 1576 | dout(" dn %p points to wrong inode %p\n", |
| 1575 | dn, d_inode(dn)); | 1577 | dn, d_inode(dn)); |
| 1578 | __ceph_dir_clear_ordered(ci); | ||
| 1576 | d_delete(dn); | 1579 | d_delete(dn); |
| 1577 | dput(dn); | 1580 | dput(dn); |
| 1578 | goto retry_lookup; | 1581 | goto retry_lookup; |
| @@ -1597,7 +1600,9 @@ retry_lookup: | |||
| 1597 | &req->r_caps_reservation); | 1600 | &req->r_caps_reservation); |
| 1598 | if (ret < 0) { | 1601 | if (ret < 0) { |
| 1599 | pr_err("fill_inode badness on %p\n", in); | 1602 | pr_err("fill_inode badness on %p\n", in); |
| 1600 | if (d_really_is_negative(dn)) | 1603 | if (d_really_is_positive(dn)) |
| 1604 | __ceph_dir_clear_ordered(ci); | ||
| 1605 | else | ||
| 1601 | iput(in); | 1606 | iput(in); |
| 1602 | d_drop(dn); | 1607 | d_drop(dn); |
| 1603 | err = ret; | 1608 | err = ret; |
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index e7cce412f2cf..9e66f69ee8a5 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c | |||
| @@ -30,19 +30,52 @@ void __init ceph_flock_init(void) | |||
| 30 | get_random_bytes(&lock_secret, sizeof(lock_secret)); | 30 | get_random_bytes(&lock_secret, sizeof(lock_secret)); |
| 31 | } | 31 | } |
| 32 | 32 | ||
| 33 | static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src) | ||
| 34 | { | ||
| 35 | struct inode *inode = file_inode(src->fl_file); | ||
| 36 | atomic_inc(&ceph_inode(inode)->i_filelock_ref); | ||
| 37 | } | ||
| 38 | |||
| 39 | static void ceph_fl_release_lock(struct file_lock *fl) | ||
| 40 | { | ||
| 41 | struct inode *inode = file_inode(fl->fl_file); | ||
| 42 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
| 43 | if (atomic_dec_and_test(&ci->i_filelock_ref)) { | ||
| 44 | /* clear error when all locks are released */ | ||
| 45 | spin_lock(&ci->i_ceph_lock); | ||
| 46 | ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK; | ||
| 47 | spin_unlock(&ci->i_ceph_lock); | ||
| 48 | } | ||
| 49 | } | ||
| 50 | |||
| 51 | static const struct file_lock_operations ceph_fl_lock_ops = { | ||
| 52 | .fl_copy_lock = ceph_fl_copy_lock, | ||
| 53 | .fl_release_private = ceph_fl_release_lock, | ||
| 54 | }; | ||
| 55 | |||
| 33 | /** | 56 | /** |
| 34 | * Implement fcntl and flock locking functions. | 57 | * Implement fcntl and flock locking functions. |
| 35 | */ | 58 | */ |
| 36 | static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, | 59 | static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode, |
| 37 | int cmd, u8 wait, struct file_lock *fl) | 60 | int cmd, u8 wait, struct file_lock *fl) |
| 38 | { | 61 | { |
| 39 | struct inode *inode = file_inode(file); | ||
| 40 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; | 62 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
| 41 | struct ceph_mds_request *req; | 63 | struct ceph_mds_request *req; |
| 42 | int err; | 64 | int err; |
| 43 | u64 length = 0; | 65 | u64 length = 0; |
| 44 | u64 owner; | 66 | u64 owner; |
| 45 | 67 | ||
| 68 | if (operation == CEPH_MDS_OP_SETFILELOCK) { | ||
| 69 | /* | ||
| 70 | * increasing i_filelock_ref closes race window between | ||
| 71 | * handling request reply and adding file_lock struct to | ||
| 72 | * inode. Otherwise, auth caps may get trimmed in the | ||
| 73 | * window. Caller function will decrease the counter. | ||
| 74 | */ | ||
| 75 | fl->fl_ops = &ceph_fl_lock_ops; | ||
| 76 | atomic_inc(&ceph_inode(inode)->i_filelock_ref); | ||
| 77 | } | ||
| 78 | |||
| 46 | if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK) | 79 | if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK) |
| 47 | wait = 0; | 80 | wait = 0; |
| 48 | 81 | ||
| @@ -180,10 +213,12 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc, | |||
| 180 | */ | 213 | */ |
| 181 | int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | 214 | int ceph_lock(struct file *file, int cmd, struct file_lock *fl) |
| 182 | { | 215 | { |
| 183 | u8 lock_cmd; | 216 | struct inode *inode = file_inode(file); |
| 184 | int err; | 217 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 185 | u8 wait = 0; | 218 | int err = 0; |
| 186 | u16 op = CEPH_MDS_OP_SETFILELOCK; | 219 | u16 op = CEPH_MDS_OP_SETFILELOCK; |
| 220 | u8 wait = 0; | ||
| 221 | u8 lock_cmd; | ||
| 187 | 222 | ||
| 188 | if (!(fl->fl_flags & FL_POSIX)) | 223 | if (!(fl->fl_flags & FL_POSIX)) |
| 189 | return -ENOLCK; | 224 | return -ENOLCK; |
| @@ -199,6 +234,26 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | |||
| 199 | else if (IS_SETLKW(cmd)) | 234 | else if (IS_SETLKW(cmd)) |
| 200 | wait = 1; | 235 | wait = 1; |
| 201 | 236 | ||
| 237 | spin_lock(&ci->i_ceph_lock); | ||
| 238 | if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) { | ||
| 239 | err = -EIO; | ||
| 240 | } else if (op == CEPH_MDS_OP_SETFILELOCK) { | ||
| 241 | /* | ||
| 242 | * increasing i_filelock_ref closes race window between | ||
| 243 | * handling request reply and adding file_lock struct to | ||
| 244 | * inode. Otherwise, i_auth_cap may get trimmed in the | ||
| 245 | * window. Caller function will decrease the counter. | ||
| 246 | */ | ||
| 247 | fl->fl_ops = &ceph_fl_lock_ops; | ||
| 248 | atomic_inc(&ci->i_filelock_ref); | ||
| 249 | } | ||
| 250 | spin_unlock(&ci->i_ceph_lock); | ||
| 251 | if (err < 0) { | ||
| 252 | if (op == CEPH_MDS_OP_SETFILELOCK && F_UNLCK == fl->fl_type) | ||
| 253 | posix_lock_file(file, fl, NULL); | ||
| 254 | return err; | ||
| 255 | } | ||
| 256 | |||
| 202 | if (F_RDLCK == fl->fl_type) | 257 | if (F_RDLCK == fl->fl_type) |
| 203 | lock_cmd = CEPH_LOCK_SHARED; | 258 | lock_cmd = CEPH_LOCK_SHARED; |
| 204 | else if (F_WRLCK == fl->fl_type) | 259 | else if (F_WRLCK == fl->fl_type) |
| @@ -206,16 +261,16 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | |||
| 206 | else | 261 | else |
| 207 | lock_cmd = CEPH_LOCK_UNLOCK; | 262 | lock_cmd = CEPH_LOCK_UNLOCK; |
| 208 | 263 | ||
| 209 | err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl); | 264 | err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl); |
| 210 | if (!err) { | 265 | if (!err) { |
| 211 | if (op != CEPH_MDS_OP_GETFILELOCK) { | 266 | if (op == CEPH_MDS_OP_SETFILELOCK) { |
| 212 | dout("mds locked, locking locally"); | 267 | dout("mds locked, locking locally"); |
| 213 | err = posix_lock_file(file, fl, NULL); | 268 | err = posix_lock_file(file, fl, NULL); |
| 214 | if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { | 269 | if (err) { |
| 215 | /* undo! This should only happen if | 270 | /* undo! This should only happen if |
| 216 | * the kernel detects local | 271 | * the kernel detects local |
| 217 | * deadlock. */ | 272 | * deadlock. */ |
| 218 | ceph_lock_message(CEPH_LOCK_FCNTL, op, file, | 273 | ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, |
| 219 | CEPH_LOCK_UNLOCK, 0, fl); | 274 | CEPH_LOCK_UNLOCK, 0, fl); |
| 220 | dout("got %d on posix_lock_file, undid lock", | 275 | dout("got %d on posix_lock_file, undid lock", |
| 221 | err); | 276 | err); |
| @@ -227,9 +282,11 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | |||
| 227 | 282 | ||
| 228 | int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | 283 | int ceph_flock(struct file *file, int cmd, struct file_lock *fl) |
| 229 | { | 284 | { |
| 230 | u8 lock_cmd; | 285 | struct inode *inode = file_inode(file); |
| 231 | int err; | 286 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 287 | int err = 0; | ||
| 232 | u8 wait = 0; | 288 | u8 wait = 0; |
| 289 | u8 lock_cmd; | ||
| 233 | 290 | ||
| 234 | if (!(fl->fl_flags & FL_FLOCK)) | 291 | if (!(fl->fl_flags & FL_FLOCK)) |
| 235 | return -ENOLCK; | 292 | return -ENOLCK; |
| @@ -239,6 +296,21 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | |||
| 239 | 296 | ||
| 240 | dout("ceph_flock, fl_file: %p", fl->fl_file); | 297 | dout("ceph_flock, fl_file: %p", fl->fl_file); |
| 241 | 298 | ||
| 299 | spin_lock(&ci->i_ceph_lock); | ||
| 300 | if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) { | ||
| 301 | err = -EIO; | ||
| 302 | } else { | ||
| 303 | /* see comment in ceph_lock */ | ||
| 304 | fl->fl_ops = &ceph_fl_lock_ops; | ||
| 305 | atomic_inc(&ci->i_filelock_ref); | ||
| 306 | } | ||
| 307 | spin_unlock(&ci->i_ceph_lock); | ||
| 308 | if (err < 0) { | ||
| 309 | if (F_UNLCK == fl->fl_type) | ||
| 310 | locks_lock_file_wait(file, fl); | ||
| 311 | return err; | ||
| 312 | } | ||
| 313 | |||
| 242 | if (IS_SETLKW(cmd)) | 314 | if (IS_SETLKW(cmd)) |
| 243 | wait = 1; | 315 | wait = 1; |
| 244 | 316 | ||
| @@ -250,13 +322,13 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | |||
| 250 | lock_cmd = CEPH_LOCK_UNLOCK; | 322 | lock_cmd = CEPH_LOCK_UNLOCK; |
| 251 | 323 | ||
| 252 | err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, | 324 | err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, |
| 253 | file, lock_cmd, wait, fl); | 325 | inode, lock_cmd, wait, fl); |
| 254 | if (!err) { | 326 | if (!err) { |
| 255 | err = locks_lock_file_wait(file, fl); | 327 | err = locks_lock_file_wait(file, fl); |
| 256 | if (err) { | 328 | if (err) { |
| 257 | ceph_lock_message(CEPH_LOCK_FLOCK, | 329 | ceph_lock_message(CEPH_LOCK_FLOCK, |
| 258 | CEPH_MDS_OP_SETFILELOCK, | 330 | CEPH_MDS_OP_SETFILELOCK, |
| 259 | file, CEPH_LOCK_UNLOCK, 0, fl); | 331 | inode, CEPH_LOCK_UNLOCK, 0, fl); |
| 260 | dout("got %d on locks_lock_file_wait, undid lock", err); | 332 | dout("got %d on locks_lock_file_wait, undid lock", err); |
| 261 | } | 333 | } |
| 262 | } | 334 | } |
| @@ -288,6 +360,37 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) | |||
| 288 | *flock_count, *fcntl_count); | 360 | *flock_count, *fcntl_count); |
| 289 | } | 361 | } |
| 290 | 362 | ||
| 363 | /* | ||
| 364 | * Given a pointer to a lock, convert it to a ceph filelock | ||
| 365 | */ | ||
| 366 | static int lock_to_ceph_filelock(struct file_lock *lock, | ||
| 367 | struct ceph_filelock *cephlock) | ||
| 368 | { | ||
| 369 | int err = 0; | ||
| 370 | cephlock->start = cpu_to_le64(lock->fl_start); | ||
| 371 | cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); | ||
| 372 | cephlock->client = cpu_to_le64(0); | ||
| 373 | cephlock->pid = cpu_to_le64((u64)lock->fl_pid); | ||
| 374 | cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner)); | ||
| 375 | |||
| 376 | switch (lock->fl_type) { | ||
| 377 | case F_RDLCK: | ||
| 378 | cephlock->type = CEPH_LOCK_SHARED; | ||
| 379 | break; | ||
| 380 | case F_WRLCK: | ||
| 381 | cephlock->type = CEPH_LOCK_EXCL; | ||
| 382 | break; | ||
| 383 | case F_UNLCK: | ||
| 384 | cephlock->type = CEPH_LOCK_UNLOCK; | ||
| 385 | break; | ||
| 386 | default: | ||
| 387 | dout("Have unknown lock type %d", lock->fl_type); | ||
| 388 | err = -EINVAL; | ||
| 389 | } | ||
| 390 | |||
| 391 | return err; | ||
| 392 | } | ||
| 393 | |||
| 291 | /** | 394 | /** |
| 292 | * Encode the flock and fcntl locks for the given inode into the ceph_filelock | 395 | * Encode the flock and fcntl locks for the given inode into the ceph_filelock |
| 293 | * array. Must be called with inode->i_lock already held. | 396 | * array. Must be called with inode->i_lock already held. |
| @@ -356,50 +459,22 @@ int ceph_locks_to_pagelist(struct ceph_filelock *flocks, | |||
| 356 | if (err) | 459 | if (err) |
| 357 | goto out_fail; | 460 | goto out_fail; |
| 358 | 461 | ||
| 359 | err = ceph_pagelist_append(pagelist, flocks, | 462 | if (num_fcntl_locks > 0) { |
| 360 | num_fcntl_locks * sizeof(*flocks)); | 463 | err = ceph_pagelist_append(pagelist, flocks, |
| 361 | if (err) | 464 | num_fcntl_locks * sizeof(*flocks)); |
| 362 | goto out_fail; | 465 | if (err) |
| 466 | goto out_fail; | ||
| 467 | } | ||
| 363 | 468 | ||
| 364 | nlocks = cpu_to_le32(num_flock_locks); | 469 | nlocks = cpu_to_le32(num_flock_locks); |
| 365 | err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks)); | 470 | err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks)); |
| 366 | if (err) | 471 | if (err) |
| 367 | goto out_fail; | 472 | goto out_fail; |
| 368 | 473 | ||
| 369 | err = ceph_pagelist_append(pagelist, | 474 | if (num_flock_locks > 0) { |
| 370 | &flocks[num_fcntl_locks], | 475 | err = ceph_pagelist_append(pagelist, &flocks[num_fcntl_locks], |
| 371 | num_flock_locks * sizeof(*flocks)); | 476 | num_flock_locks * sizeof(*flocks)); |
| 372 | out_fail: | ||
| 373 | return err; | ||
| 374 | } | ||
| 375 | |||
| 376 | /* | ||
| 377 | * Given a pointer to a lock, convert it to a ceph filelock | ||
| 378 | */ | ||
| 379 | int lock_to_ceph_filelock(struct file_lock *lock, | ||
| 380 | struct ceph_filelock *cephlock) | ||
| 381 | { | ||
| 382 | int err = 0; | ||
| 383 | cephlock->start = cpu_to_le64(lock->fl_start); | ||
| 384 | cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); | ||
| 385 | cephlock->client = cpu_to_le64(0); | ||
| 386 | cephlock->pid = cpu_to_le64((u64)lock->fl_pid); | ||
| 387 | cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner)); | ||
| 388 | |||
| 389 | switch (lock->fl_type) { | ||
| 390 | case F_RDLCK: | ||
| 391 | cephlock->type = CEPH_LOCK_SHARED; | ||
| 392 | break; | ||
| 393 | case F_WRLCK: | ||
| 394 | cephlock->type = CEPH_LOCK_EXCL; | ||
| 395 | break; | ||
| 396 | case F_UNLCK: | ||
| 397 | cephlock->type = CEPH_LOCK_UNLOCK; | ||
| 398 | break; | ||
| 399 | default: | ||
| 400 | dout("Have unknown lock type %d", lock->fl_type); | ||
| 401 | err = -EINVAL; | ||
| 402 | } | 477 | } |
| 403 | 478 | out_fail: | |
| 404 | return err; | 479 | return err; |
| 405 | } | 480 | } |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 0687ab3c3267..ab69dcb70e8a 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
| @@ -1039,22 +1039,23 @@ void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc, | |||
| 1039 | * session caps | 1039 | * session caps |
| 1040 | */ | 1040 | */ |
| 1041 | 1041 | ||
| 1042 | /* caller holds s_cap_lock, we drop it */ | 1042 | static void detach_cap_releases(struct ceph_mds_session *session, |
| 1043 | static void cleanup_cap_releases(struct ceph_mds_client *mdsc, | 1043 | struct list_head *target) |
| 1044 | struct ceph_mds_session *session) | ||
| 1045 | __releases(session->s_cap_lock) | ||
| 1046 | { | 1044 | { |
| 1047 | LIST_HEAD(tmp_list); | 1045 | lockdep_assert_held(&session->s_cap_lock); |
| 1048 | list_splice_init(&session->s_cap_releases, &tmp_list); | 1046 | |
| 1047 | list_splice_init(&session->s_cap_releases, target); | ||
| 1049 | session->s_num_cap_releases = 0; | 1048 | session->s_num_cap_releases = 0; |
| 1050 | spin_unlock(&session->s_cap_lock); | 1049 | dout("dispose_cap_releases mds%d\n", session->s_mds); |
| 1050 | } | ||
| 1051 | 1051 | ||
| 1052 | dout("cleanup_cap_releases mds%d\n", session->s_mds); | 1052 | static void dispose_cap_releases(struct ceph_mds_client *mdsc, |
| 1053 | while (!list_empty(&tmp_list)) { | 1053 | struct list_head *dispose) |
| 1054 | { | ||
| 1055 | while (!list_empty(dispose)) { | ||
| 1054 | struct ceph_cap *cap; | 1056 | struct ceph_cap *cap; |
| 1055 | /* zero out the in-progress message */ | 1057 | /* zero out the in-progress message */ |
| 1056 | cap = list_first_entry(&tmp_list, | 1058 | cap = list_first_entry(dispose, struct ceph_cap, session_caps); |
| 1057 | struct ceph_cap, session_caps); | ||
| 1058 | list_del(&cap->session_caps); | 1059 | list_del(&cap->session_caps); |
| 1059 | ceph_put_cap(mdsc, cap); | 1060 | ceph_put_cap(mdsc, cap); |
| 1060 | } | 1061 | } |
| @@ -1215,6 +1216,13 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
| 1215 | } | 1216 | } |
| 1216 | spin_unlock(&mdsc->cap_dirty_lock); | 1217 | spin_unlock(&mdsc->cap_dirty_lock); |
| 1217 | 1218 | ||
| 1219 | if (atomic_read(&ci->i_filelock_ref) > 0) { | ||
| 1220 | /* make further file lock syscall return -EIO */ | ||
| 1221 | ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK; | ||
| 1222 | pr_warn_ratelimited(" dropping file locks for %p %lld\n", | ||
| 1223 | inode, ceph_ino(inode)); | ||
| 1224 | } | ||
| 1225 | |||
| 1218 | if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) { | 1226 | if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) { |
| 1219 | list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove); | 1227 | list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove); |
| 1220 | ci->i_prealloc_cap_flush = NULL; | 1228 | ci->i_prealloc_cap_flush = NULL; |
| @@ -1244,6 +1252,8 @@ static void remove_session_caps(struct ceph_mds_session *session) | |||
| 1244 | { | 1252 | { |
| 1245 | struct ceph_fs_client *fsc = session->s_mdsc->fsc; | 1253 | struct ceph_fs_client *fsc = session->s_mdsc->fsc; |
| 1246 | struct super_block *sb = fsc->sb; | 1254 | struct super_block *sb = fsc->sb; |
| 1255 | LIST_HEAD(dispose); | ||
| 1256 | |||
| 1247 | dout("remove_session_caps on %p\n", session); | 1257 | dout("remove_session_caps on %p\n", session); |
| 1248 | iterate_session_caps(session, remove_session_caps_cb, fsc); | 1258 | iterate_session_caps(session, remove_session_caps_cb, fsc); |
| 1249 | 1259 | ||
| @@ -1278,10 +1288,12 @@ static void remove_session_caps(struct ceph_mds_session *session) | |||
| 1278 | } | 1288 | } |
| 1279 | 1289 | ||
| 1280 | // drop cap expires and unlock s_cap_lock | 1290 | // drop cap expires and unlock s_cap_lock |
| 1281 | cleanup_cap_releases(session->s_mdsc, session); | 1291 | detach_cap_releases(session, &dispose); |
| 1282 | 1292 | ||
| 1283 | BUG_ON(session->s_nr_caps > 0); | 1293 | BUG_ON(session->s_nr_caps > 0); |
| 1284 | BUG_ON(!list_empty(&session->s_cap_flushing)); | 1294 | BUG_ON(!list_empty(&session->s_cap_flushing)); |
| 1295 | spin_unlock(&session->s_cap_lock); | ||
| 1296 | dispose_cap_releases(session->s_mdsc, &dispose); | ||
| 1285 | } | 1297 | } |
| 1286 | 1298 | ||
| 1287 | /* | 1299 | /* |
| @@ -1462,6 +1474,11 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) | |||
| 1462 | goto out; | 1474 | goto out; |
| 1463 | if ((used | wanted) & CEPH_CAP_ANY_WR) | 1475 | if ((used | wanted) & CEPH_CAP_ANY_WR) |
| 1464 | goto out; | 1476 | goto out; |
| 1477 | /* Note: it's possible that i_filelock_ref becomes non-zero | ||
| 1478 | * after dropping auth caps. It doesn't hurt because reply | ||
| 1479 | * of lock mds request will re-add auth caps. */ | ||
| 1480 | if (atomic_read(&ci->i_filelock_ref) > 0) | ||
| 1481 | goto out; | ||
| 1465 | } | 1482 | } |
| 1466 | /* The inode has cached pages, but it's no longer used. | 1483 | /* The inode has cached pages, but it's no longer used. |
| 1467 | * we can safely drop it */ | 1484 | * we can safely drop it */ |
| @@ -2827,7 +2844,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
| 2827 | struct ceph_mds_cap_reconnect v2; | 2844 | struct ceph_mds_cap_reconnect v2; |
| 2828 | struct ceph_mds_cap_reconnect_v1 v1; | 2845 | struct ceph_mds_cap_reconnect_v1 v1; |
| 2829 | } rec; | 2846 | } rec; |
| 2830 | struct ceph_inode_info *ci; | 2847 | struct ceph_inode_info *ci = cap->ci; |
| 2831 | struct ceph_reconnect_state *recon_state = arg; | 2848 | struct ceph_reconnect_state *recon_state = arg; |
| 2832 | struct ceph_pagelist *pagelist = recon_state->pagelist; | 2849 | struct ceph_pagelist *pagelist = recon_state->pagelist; |
| 2833 | char *path; | 2850 | char *path; |
| @@ -2836,8 +2853,6 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
| 2836 | u64 snap_follows; | 2853 | u64 snap_follows; |
| 2837 | struct dentry *dentry; | 2854 | struct dentry *dentry; |
| 2838 | 2855 | ||
| 2839 | ci = cap->ci; | ||
| 2840 | |||
| 2841 | dout(" adding %p ino %llx.%llx cap %p %lld %s\n", | 2856 | dout(" adding %p ino %llx.%llx cap %p %lld %s\n", |
| 2842 | inode, ceph_vinop(inode), cap, cap->cap_id, | 2857 | inode, ceph_vinop(inode), cap, cap->cap_id, |
| 2843 | ceph_cap_string(cap->issued)); | 2858 | ceph_cap_string(cap->issued)); |
| @@ -2870,7 +2885,8 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
| 2870 | rec.v2.issued = cpu_to_le32(cap->issued); | 2885 | rec.v2.issued = cpu_to_le32(cap->issued); |
| 2871 | rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); | 2886 | rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); |
| 2872 | rec.v2.pathbase = cpu_to_le64(pathbase); | 2887 | rec.v2.pathbase = cpu_to_le64(pathbase); |
| 2873 | rec.v2.flock_len = 0; | 2888 | rec.v2.flock_len = (__force __le32) |
| 2889 | ((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1); | ||
| 2874 | } else { | 2890 | } else { |
| 2875 | rec.v1.cap_id = cpu_to_le64(cap->cap_id); | 2891 | rec.v1.cap_id = cpu_to_le64(cap->cap_id); |
| 2876 | rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); | 2892 | rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); |
| @@ -2894,26 +2910,37 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
| 2894 | 2910 | ||
| 2895 | if (recon_state->msg_version >= 2) { | 2911 | if (recon_state->msg_version >= 2) { |
| 2896 | int num_fcntl_locks, num_flock_locks; | 2912 | int num_fcntl_locks, num_flock_locks; |
| 2897 | struct ceph_filelock *flocks; | 2913 | struct ceph_filelock *flocks = NULL; |
| 2898 | size_t struct_len, total_len = 0; | 2914 | size_t struct_len, total_len = 0; |
| 2899 | u8 struct_v = 0; | 2915 | u8 struct_v = 0; |
| 2900 | 2916 | ||
| 2901 | encode_again: | 2917 | encode_again: |
| 2902 | ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); | 2918 | if (rec.v2.flock_len) { |
| 2903 | flocks = kmalloc((num_fcntl_locks+num_flock_locks) * | 2919 | ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); |
| 2904 | sizeof(struct ceph_filelock), GFP_NOFS); | 2920 | } else { |
| 2905 | if (!flocks) { | 2921 | num_fcntl_locks = 0; |
| 2906 | err = -ENOMEM; | 2922 | num_flock_locks = 0; |
| 2907 | goto out_free; | ||
| 2908 | } | 2923 | } |
| 2909 | err = ceph_encode_locks_to_buffer(inode, flocks, | 2924 | if (num_fcntl_locks + num_flock_locks > 0) { |
| 2910 | num_fcntl_locks, | 2925 | flocks = kmalloc((num_fcntl_locks + num_flock_locks) * |
| 2911 | num_flock_locks); | 2926 | sizeof(struct ceph_filelock), GFP_NOFS); |
| 2912 | if (err) { | 2927 | if (!flocks) { |
| 2928 | err = -ENOMEM; | ||
| 2929 | goto out_free; | ||
| 2930 | } | ||
| 2931 | err = ceph_encode_locks_to_buffer(inode, flocks, | ||
| 2932 | num_fcntl_locks, | ||
| 2933 | num_flock_locks); | ||
| 2934 | if (err) { | ||
| 2935 | kfree(flocks); | ||
| 2936 | flocks = NULL; | ||
| 2937 | if (err == -ENOSPC) | ||
| 2938 | goto encode_again; | ||
| 2939 | goto out_free; | ||
| 2940 | } | ||
| 2941 | } else { | ||
| 2913 | kfree(flocks); | 2942 | kfree(flocks); |
| 2914 | if (err == -ENOSPC) | 2943 | flocks = NULL; |
| 2915 | goto encode_again; | ||
| 2916 | goto out_free; | ||
| 2917 | } | 2944 | } |
| 2918 | 2945 | ||
| 2919 | if (recon_state->msg_version >= 3) { | 2946 | if (recon_state->msg_version >= 3) { |
| @@ -2993,6 +3020,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
| 2993 | int s_nr_caps; | 3020 | int s_nr_caps; |
| 2994 | struct ceph_pagelist *pagelist; | 3021 | struct ceph_pagelist *pagelist; |
| 2995 | struct ceph_reconnect_state recon_state; | 3022 | struct ceph_reconnect_state recon_state; |
| 3023 | LIST_HEAD(dispose); | ||
| 2996 | 3024 | ||
| 2997 | pr_info("mds%d reconnect start\n", mds); | 3025 | pr_info("mds%d reconnect start\n", mds); |
| 2998 | 3026 | ||
| @@ -3026,7 +3054,9 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
| 3026 | */ | 3054 | */ |
| 3027 | session->s_cap_reconnect = 1; | 3055 | session->s_cap_reconnect = 1; |
| 3028 | /* drop old cap expires; we're about to reestablish that state */ | 3056 | /* drop old cap expires; we're about to reestablish that state */ |
| 3029 | cleanup_cap_releases(mdsc, session); | 3057 | detach_cap_releases(session, &dispose); |
| 3058 | spin_unlock(&session->s_cap_lock); | ||
| 3059 | dispose_cap_releases(mdsc, &dispose); | ||
| 3030 | 3060 | ||
| 3031 | /* trim unused caps to reduce MDS's cache rejoin time */ | 3061 | /* trim unused caps to reduce MDS's cache rejoin time */ |
| 3032 | if (mdsc->fsc->sb->s_root) | 3062 | if (mdsc->fsc->sb->s_root) |
| @@ -3857,14 +3887,14 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) | |||
| 3857 | goto err_out; | 3887 | goto err_out; |
| 3858 | } | 3888 | } |
| 3859 | return; | 3889 | return; |
| 3890 | |||
| 3860 | bad: | 3891 | bad: |
| 3861 | pr_err("error decoding fsmap\n"); | 3892 | pr_err("error decoding fsmap\n"); |
| 3862 | err_out: | 3893 | err_out: |
| 3863 | mutex_lock(&mdsc->mutex); | 3894 | mutex_lock(&mdsc->mutex); |
| 3864 | mdsc->mdsmap_err = -ENOENT; | 3895 | mdsc->mdsmap_err = err; |
| 3865 | __wake_requests(mdsc, &mdsc->waiting_for_map); | 3896 | __wake_requests(mdsc, &mdsc->waiting_for_map); |
| 3866 | mutex_unlock(&mdsc->mutex); | 3897 | mutex_unlock(&mdsc->mutex); |
| 3867 | return; | ||
| 3868 | } | 3898 | } |
| 3869 | 3899 | ||
| 3870 | /* | 3900 | /* |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index e4082afedcb1..fe9fbb3f13f7 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
| @@ -84,8 +84,9 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 84 | buf->f_ffree = -1; | 84 | buf->f_ffree = -1; |
| 85 | buf->f_namelen = NAME_MAX; | 85 | buf->f_namelen = NAME_MAX; |
| 86 | 86 | ||
| 87 | /* leave fsid little-endian, regardless of host endianness */ | 87 | /* Must convert the fsid, for consistent values across arches */ |
| 88 | fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1); | 88 | fsid = le64_to_cpu(*(__le64 *)(&monmap->fsid)) ^ |
| 89 | le64_to_cpu(*((__le64 *)&monmap->fsid + 1)); | ||
| 89 | buf->f_fsid.val[0] = fsid & 0xffffffff; | 90 | buf->f_fsid.val[0] = fsid & 0xffffffff; |
| 90 | buf->f_fsid.val[1] = fsid >> 32; | 91 | buf->f_fsid.val[1] = fsid >> 32; |
| 91 | 92 | ||
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 3e27a28aa44a..2beeec07fa76 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
| @@ -352,6 +352,7 @@ struct ceph_inode_info { | |||
| 352 | int i_pin_ref; | 352 | int i_pin_ref; |
| 353 | int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref; | 353 | int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref; |
| 354 | int i_wrbuffer_ref, i_wrbuffer_ref_head; | 354 | int i_wrbuffer_ref, i_wrbuffer_ref_head; |
| 355 | atomic_t i_filelock_ref; | ||
| 355 | u32 i_shared_gen; /* increment each time we get FILE_SHARED */ | 356 | u32 i_shared_gen; /* increment each time we get FILE_SHARED */ |
| 356 | u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */ | 357 | u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */ |
| 357 | u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */ | 358 | u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */ |
| @@ -487,6 +488,8 @@ static inline struct inode *ceph_find_inode(struct super_block *sb, | |||
| 487 | #define CEPH_I_KICK_FLUSH (1 << 9) /* kick flushing caps */ | 488 | #define CEPH_I_KICK_FLUSH (1 << 9) /* kick flushing caps */ |
| 488 | #define CEPH_I_FLUSH_SNAPS (1 << 10) /* need flush snapss */ | 489 | #define CEPH_I_FLUSH_SNAPS (1 << 10) /* need flush snapss */ |
| 489 | #define CEPH_I_ERROR_WRITE (1 << 11) /* have seen write errors */ | 490 | #define CEPH_I_ERROR_WRITE (1 << 11) /* have seen write errors */ |
| 491 | #define CEPH_I_ERROR_FILELOCK (1 << 12) /* have seen file lock errors */ | ||
| 492 | |||
| 490 | 493 | ||
| 491 | /* | 494 | /* |
| 492 | * We set the ERROR_WRITE bit when we start seeing write errors on an inode | 495 | * We set the ERROR_WRITE bit when we start seeing write errors on an inode |
| @@ -1011,7 +1014,6 @@ extern int ceph_encode_locks_to_buffer(struct inode *inode, | |||
| 1011 | extern int ceph_locks_to_pagelist(struct ceph_filelock *flocks, | 1014 | extern int ceph_locks_to_pagelist(struct ceph_filelock *flocks, |
| 1012 | struct ceph_pagelist *pagelist, | 1015 | struct ceph_pagelist *pagelist, |
| 1013 | int num_fcntl_locks, int num_flock_locks); | 1016 | int num_fcntl_locks, int num_flock_locks); |
| 1014 | extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c); | ||
| 1015 | 1017 | ||
| 1016 | /* debugfs.c */ | 1018 | /* debugfs.c */ |
| 1017 | extern int ceph_fs_debugfs_init(struct ceph_fs_client *client); | 1019 | extern int ceph_fs_debugfs_init(struct ceph_fs_client *client); |
diff --git a/net/ceph/ceph_hash.c b/net/ceph/ceph_hash.c index 67bb1f11e613..9a5850f264ed 100644 --- a/net/ceph/ceph_hash.c +++ b/net/ceph/ceph_hash.c | |||
| @@ -47,28 +47,38 @@ unsigned int ceph_str_hash_rjenkins(const char *str, unsigned int length) | |||
| 47 | 47 | ||
| 48 | /* handle the last 11 bytes */ | 48 | /* handle the last 11 bytes */ |
| 49 | c = c + length; | 49 | c = c + length; |
| 50 | switch (len) { /* all the case statements fall through */ | 50 | switch (len) { |
| 51 | case 11: | 51 | case 11: |
| 52 | c = c + ((__u32)k[10] << 24); | 52 | c = c + ((__u32)k[10] << 24); |
| 53 | /* fall through */ | ||
| 53 | case 10: | 54 | case 10: |
| 54 | c = c + ((__u32)k[9] << 16); | 55 | c = c + ((__u32)k[9] << 16); |
| 56 | /* fall through */ | ||
| 55 | case 9: | 57 | case 9: |
| 56 | c = c + ((__u32)k[8] << 8); | 58 | c = c + ((__u32)k[8] << 8); |
| 57 | /* the first byte of c is reserved for the length */ | 59 | /* the first byte of c is reserved for the length */ |
| 60 | /* fall through */ | ||
| 58 | case 8: | 61 | case 8: |
| 59 | b = b + ((__u32)k[7] << 24); | 62 | b = b + ((__u32)k[7] << 24); |
| 63 | /* fall through */ | ||
| 60 | case 7: | 64 | case 7: |
| 61 | b = b + ((__u32)k[6] << 16); | 65 | b = b + ((__u32)k[6] << 16); |
| 66 | /* fall through */ | ||
| 62 | case 6: | 67 | case 6: |
| 63 | b = b + ((__u32)k[5] << 8); | 68 | b = b + ((__u32)k[5] << 8); |
| 69 | /* fall through */ | ||
| 64 | case 5: | 70 | case 5: |
| 65 | b = b + k[4]; | 71 | b = b + k[4]; |
| 72 | /* fall through */ | ||
| 66 | case 4: | 73 | case 4: |
| 67 | a = a + ((__u32)k[3] << 24); | 74 | a = a + ((__u32)k[3] << 24); |
| 75 | /* fall through */ | ||
| 68 | case 3: | 76 | case 3: |
| 69 | a = a + ((__u32)k[2] << 16); | 77 | a = a + ((__u32)k[2] << 16); |
| 78 | /* fall through */ | ||
| 70 | case 2: | 79 | case 2: |
| 71 | a = a + ((__u32)k[1] << 8); | 80 | a = a + ((__u32)k[1] << 8); |
| 81 | /* fall through */ | ||
| 72 | case 1: | 82 | case 1: |
| 73 | a = a + k[0]; | 83 | a = a + k[0]; |
| 74 | /* case 0: nothing left to add */ | 84 | /* case 0: nothing left to add */ |
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 489610ac1cdd..bf9d079cbafd 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c | |||
| @@ -37,7 +37,9 @@ static int set_secret(struct ceph_crypto_key *key, void *buf) | |||
| 37 | return -ENOTSUPP; | 37 | return -ENOTSUPP; |
| 38 | } | 38 | } |
| 39 | 39 | ||
| 40 | WARN_ON(!key->len); | 40 | if (!key->len) |
| 41 | return -EINVAL; | ||
| 42 | |||
| 41 | key->key = kmemdup(buf, key->len, GFP_NOIO); | 43 | key->key = kmemdup(buf, key->len, GFP_NOIO); |
| 42 | if (!key->key) { | 44 | if (!key->key) { |
| 43 | ret = -ENOMEM; | 45 | ret = -ENOMEM; |
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index ad93342c90d7..8a4d3758030b 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
| @@ -430,6 +430,7 @@ static void ceph_sock_state_change(struct sock *sk) | |||
| 430 | switch (sk->sk_state) { | 430 | switch (sk->sk_state) { |
| 431 | case TCP_CLOSE: | 431 | case TCP_CLOSE: |
| 432 | dout("%s TCP_CLOSE\n", __func__); | 432 | dout("%s TCP_CLOSE\n", __func__); |
| 433 | /* fall through */ | ||
| 433 | case TCP_CLOSE_WAIT: | 434 | case TCP_CLOSE_WAIT: |
| 434 | dout("%s TCP_CLOSE_WAIT\n", __func__); | 435 | dout("%s TCP_CLOSE_WAIT\n", __func__); |
| 435 | con_sock_state_closing(con); | 436 | con_sock_state_closing(con); |
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 9ae1bab8c05d..1547107f4854 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c | |||
| @@ -1279,9 +1279,10 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, | |||
| 1279 | 1279 | ||
| 1280 | /* | 1280 | /* |
| 1281 | * Older OSDs don't set reply tid even if the orignal | 1281 | * Older OSDs don't set reply tid even if the orignal |
| 1282 | * request had a non-zero tid. Workaround this weirdness | 1282 | * request had a non-zero tid. Work around this weirdness |
| 1283 | * by falling through to the allocate case. | 1283 | * by allocating a new message. |
| 1284 | */ | 1284 | */ |
| 1285 | /* fall through */ | ||
| 1285 | case CEPH_MSG_MON_MAP: | 1286 | case CEPH_MSG_MON_MAP: |
| 1286 | case CEPH_MSG_MDS_MAP: | 1287 | case CEPH_MSG_MDS_MAP: |
| 1287 | case CEPH_MSG_OSD_MAP: | 1288 | case CEPH_MSG_OSD_MAP: |
