diff options
author | Yan, Zheng <zyan@redhat.com> | 2016-03-06 20:35:06 -0500 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2016-07-27 20:55:38 -0400 |
commit | 779fe0fb8e1883d5c479ac6bd85fbd237deed1f7 (patch) | |
tree | ce9876a5f4201a7aa9f2daef838ccf7ff1827128 | |
parent | cd08e0a274ba6215b79c83809b331e8af17196ba (diff) |
ceph: rados pool namespace support
This patch adds codes that decode pool namespace information in
cap message and request reply. Pool namespace is saved in i_layout,
it will be passed to libceph when doing read/write.
Signed-off-by: Yan, Zheng <zyan@redhat.com>
-rw-r--r-- | fs/ceph/addr.c | 67 | ||||
-rw-r--r-- | fs/ceph/caps.c | 46 | ||||
-rw-r--r-- | fs/ceph/inode.c | 20 | ||||
-rw-r--r-- | fs/ceph/ioctl.c | 3 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 19 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 3 | ||||
-rw-r--r-- | fs/ceph/super.h | 1 | ||||
-rw-r--r-- | fs/ceph/xattr.c | 77 |
8 files changed, 159 insertions, 77 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 3f8efd866fec..d5b6f959a3c3 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -1730,7 +1730,8 @@ enum { | |||
1730 | POOL_WRITE = 2, | 1730 | POOL_WRITE = 2, |
1731 | }; | 1731 | }; |
1732 | 1732 | ||
1733 | static int __ceph_pool_perm_get(struct ceph_inode_info *ci, s64 pool) | 1733 | static int __ceph_pool_perm_get(struct ceph_inode_info *ci, |
1734 | s64 pool, struct ceph_string *pool_ns) | ||
1734 | { | 1735 | { |
1735 | struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode); | 1736 | struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode); |
1736 | struct ceph_mds_client *mdsc = fsc->mdsc; | 1737 | struct ceph_mds_client *mdsc = fsc->mdsc; |
@@ -1738,6 +1739,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, s64 pool) | |||
1738 | struct rb_node **p, *parent; | 1739 | struct rb_node **p, *parent; |
1739 | struct ceph_pool_perm *perm; | 1740 | struct ceph_pool_perm *perm; |
1740 | struct page **pages; | 1741 | struct page **pages; |
1742 | size_t pool_ns_len; | ||
1741 | int err = 0, err2 = 0, have = 0; | 1743 | int err = 0, err2 = 0, have = 0; |
1742 | 1744 | ||
1743 | down_read(&mdsc->pool_perm_rwsem); | 1745 | down_read(&mdsc->pool_perm_rwsem); |
@@ -1749,17 +1751,31 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, s64 pool) | |||
1749 | else if (pool > perm->pool) | 1751 | else if (pool > perm->pool) |
1750 | p = &(*p)->rb_right; | 1752 | p = &(*p)->rb_right; |
1751 | else { | 1753 | else { |
1752 | have = perm->perm; | 1754 | int ret = ceph_compare_string(pool_ns, |
1753 | break; | 1755 | perm->pool_ns, |
1756 | perm->pool_ns_len); | ||
1757 | if (ret < 0) | ||
1758 | p = &(*p)->rb_left; | ||
1759 | else if (ret > 0) | ||
1760 | p = &(*p)->rb_right; | ||
1761 | else { | ||
1762 | have = perm->perm; | ||
1763 | break; | ||
1764 | } | ||
1754 | } | 1765 | } |
1755 | } | 1766 | } |
1756 | up_read(&mdsc->pool_perm_rwsem); | 1767 | up_read(&mdsc->pool_perm_rwsem); |
1757 | if (*p) | 1768 | if (*p) |
1758 | goto out; | 1769 | goto out; |
1759 | 1770 | ||
1760 | dout("__ceph_pool_perm_get pool %lld no perm cached\n", pool); | 1771 | if (pool_ns) |
1772 | dout("__ceph_pool_perm_get pool %lld ns %.*s no perm cached\n", | ||
1773 | pool, (int)pool_ns->len, pool_ns->str); | ||
1774 | else | ||
1775 | dout("__ceph_pool_perm_get pool %lld no perm cached\n", pool); | ||
1761 | 1776 | ||
1762 | down_write(&mdsc->pool_perm_rwsem); | 1777 | down_write(&mdsc->pool_perm_rwsem); |
1778 | p = &mdsc->pool_perm_tree.rb_node; | ||
1763 | parent = NULL; | 1779 | parent = NULL; |
1764 | while (*p) { | 1780 | while (*p) { |
1765 | parent = *p; | 1781 | parent = *p; |
@@ -1769,8 +1785,17 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, s64 pool) | |||
1769 | else if (pool > perm->pool) | 1785 | else if (pool > perm->pool) |
1770 | p = &(*p)->rb_right; | 1786 | p = &(*p)->rb_right; |
1771 | else { | 1787 | else { |
1772 | have = perm->perm; | 1788 | int ret = ceph_compare_string(pool_ns, |
1773 | break; | 1789 | perm->pool_ns, |
1790 | perm->pool_ns_len); | ||
1791 | if (ret < 0) | ||
1792 | p = &(*p)->rb_left; | ||
1793 | else if (ret > 0) | ||
1794 | p = &(*p)->rb_right; | ||
1795 | else { | ||
1796 | have = perm->perm; | ||
1797 | break; | ||
1798 | } | ||
1774 | } | 1799 | } |
1775 | } | 1800 | } |
1776 | if (*p) { | 1801 | if (*p) { |
@@ -1788,6 +1813,8 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, s64 pool) | |||
1788 | rd_req->r_flags = CEPH_OSD_FLAG_READ; | 1813 | rd_req->r_flags = CEPH_OSD_FLAG_READ; |
1789 | osd_req_op_init(rd_req, 0, CEPH_OSD_OP_STAT, 0); | 1814 | osd_req_op_init(rd_req, 0, CEPH_OSD_OP_STAT, 0); |
1790 | rd_req->r_base_oloc.pool = pool; | 1815 | rd_req->r_base_oloc.pool = pool; |
1816 | if (pool_ns) | ||
1817 | rd_req->r_base_oloc.pool_ns = ceph_get_string(pool_ns); | ||
1791 | ceph_oid_printf(&rd_req->r_base_oid, "%llx.00000000", ci->i_vino.ino); | 1818 | ceph_oid_printf(&rd_req->r_base_oid, "%llx.00000000", ci->i_vino.ino); |
1792 | 1819 | ||
1793 | err = ceph_osdc_alloc_messages(rd_req, GFP_NOFS); | 1820 | err = ceph_osdc_alloc_messages(rd_req, GFP_NOFS); |
@@ -1841,7 +1868,8 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, s64 pool) | |||
1841 | goto out_unlock; | 1868 | goto out_unlock; |
1842 | } | 1869 | } |
1843 | 1870 | ||
1844 | perm = kmalloc(sizeof(*perm), GFP_NOFS); | 1871 | pool_ns_len = pool_ns ? pool_ns->len : 0; |
1872 | perm = kmalloc(sizeof(*perm) + pool_ns_len + 1, GFP_NOFS); | ||
1845 | if (!perm) { | 1873 | if (!perm) { |
1846 | err = -ENOMEM; | 1874 | err = -ENOMEM; |
1847 | goto out_unlock; | 1875 | goto out_unlock; |
@@ -1849,6 +1877,11 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, s64 pool) | |||
1849 | 1877 | ||
1850 | perm->pool = pool; | 1878 | perm->pool = pool; |
1851 | perm->perm = have; | 1879 | perm->perm = have; |
1880 | perm->pool_ns_len = pool_ns_len; | ||
1881 | if (pool_ns_len > 0) | ||
1882 | memcpy(perm->pool_ns, pool_ns->str, pool_ns_len); | ||
1883 | perm->pool_ns[pool_ns_len] = 0; | ||
1884 | |||
1852 | rb_link_node(&perm->node, parent, p); | 1885 | rb_link_node(&perm->node, parent, p); |
1853 | rb_insert_color(&perm->node, &mdsc->pool_perm_tree); | 1886 | rb_insert_color(&perm->node, &mdsc->pool_perm_tree); |
1854 | err = 0; | 1887 | err = 0; |
@@ -1860,19 +1893,20 @@ out_unlock: | |||
1860 | out: | 1893 | out: |
1861 | if (!err) | 1894 | if (!err) |
1862 | err = have; | 1895 | err = have; |
1863 | dout("__ceph_pool_perm_get pool %lld result = %d\n", pool, err); | 1896 | if (pool_ns) |
1897 | dout("__ceph_pool_perm_get pool %lld ns %.*s result = %d\n", | ||
1898 | pool, (int)pool_ns->len, pool_ns->str, err); | ||
1899 | else | ||
1900 | dout("__ceph_pool_perm_get pool %lld result = %d\n", pool, err); | ||
1864 | return err; | 1901 | return err; |
1865 | } | 1902 | } |
1866 | 1903 | ||
1867 | int ceph_pool_perm_check(struct ceph_inode_info *ci, int need) | 1904 | int ceph_pool_perm_check(struct ceph_inode_info *ci, int need) |
1868 | { | 1905 | { |
1869 | s64 pool; | 1906 | s64 pool; |
1907 | struct ceph_string *pool_ns; | ||
1870 | int ret, flags; | 1908 | int ret, flags; |
1871 | 1909 | ||
1872 | /* does not support pool namespace yet */ | ||
1873 | if (ci->i_pool_ns_len) | ||
1874 | return -EIO; | ||
1875 | |||
1876 | if (ceph_test_mount_opt(ceph_inode_to_client(&ci->vfs_inode), | 1910 | if (ceph_test_mount_opt(ceph_inode_to_client(&ci->vfs_inode), |
1877 | NOPOOLPERM)) | 1911 | NOPOOLPERM)) |
1878 | return 0; | 1912 | return 0; |
@@ -1896,7 +1930,9 @@ check: | |||
1896 | return 0; | 1930 | return 0; |
1897 | } | 1931 | } |
1898 | 1932 | ||
1899 | ret = __ceph_pool_perm_get(ci, pool); | 1933 | pool_ns = ceph_try_get_string(ci->i_layout.pool_ns); |
1934 | ret = __ceph_pool_perm_get(ci, pool, pool_ns); | ||
1935 | ceph_put_string(pool_ns); | ||
1900 | if (ret < 0) | 1936 | if (ret < 0) |
1901 | return ret; | 1937 | return ret; |
1902 | 1938 | ||
@@ -1907,8 +1943,9 @@ check: | |||
1907 | flags |= CEPH_I_POOL_WR; | 1943 | flags |= CEPH_I_POOL_WR; |
1908 | 1944 | ||
1909 | spin_lock(&ci->i_ceph_lock); | 1945 | spin_lock(&ci->i_ceph_lock); |
1910 | if (pool == ci->i_layout.pool_id) { | 1946 | if (pool == ci->i_layout.pool_id && |
1911 | ci->i_ceph_flags = flags; | 1947 | pool_ns == rcu_dereference_raw(ci->i_layout.pool_ns)) { |
1948 | ci->i_ceph_flags |= flags; | ||
1912 | } else { | 1949 | } else { |
1913 | pool = ci->i_layout.pool_id; | 1950 | pool = ci->i_layout.pool_id; |
1914 | flags = ci->i_ceph_flags; | 1951 | flags = ci->i_ceph_flags; |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index f24722dce167..0a9406a8a794 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -2779,12 +2779,11 @@ static void invalidate_aliases(struct inode *inode) | |||
2779 | */ | 2779 | */ |
2780 | static void handle_cap_grant(struct ceph_mds_client *mdsc, | 2780 | static void handle_cap_grant(struct ceph_mds_client *mdsc, |
2781 | struct inode *inode, struct ceph_mds_caps *grant, | 2781 | struct inode *inode, struct ceph_mds_caps *grant, |
2782 | u64 inline_version, | 2782 | struct ceph_string **pns, u64 inline_version, |
2783 | void *inline_data, int inline_len, | 2783 | void *inline_data, u32 inline_len, |
2784 | struct ceph_buffer *xattr_buf, | 2784 | struct ceph_buffer *xattr_buf, |
2785 | struct ceph_mds_session *session, | 2785 | struct ceph_mds_session *session, |
2786 | struct ceph_cap *cap, int issued, | 2786 | struct ceph_cap *cap, int issued) |
2787 | u32 pool_ns_len) | ||
2788 | __releases(ci->i_ceph_lock) | 2787 | __releases(ci->i_ceph_lock) |
2789 | __releases(mdsc->snap_rwsem) | 2788 | __releases(mdsc->snap_rwsem) |
2790 | { | 2789 | { |
@@ -2896,11 +2895,18 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, | |||
2896 | if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) { | 2895 | if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) { |
2897 | /* file layout may have changed */ | 2896 | /* file layout may have changed */ |
2898 | s64 old_pool = ci->i_layout.pool_id; | 2897 | s64 old_pool = ci->i_layout.pool_id; |
2898 | struct ceph_string *old_ns; | ||
2899 | |||
2899 | ceph_file_layout_from_legacy(&ci->i_layout, &grant->layout); | 2900 | ceph_file_layout_from_legacy(&ci->i_layout, &grant->layout); |
2900 | ci->i_pool_ns_len = pool_ns_len; | 2901 | old_ns = rcu_dereference_protected(ci->i_layout.pool_ns, |
2901 | if (ci->i_layout.pool_id != old_pool) | 2902 | lockdep_is_held(&ci->i_ceph_lock)); |
2903 | rcu_assign_pointer(ci->i_layout.pool_ns, *pns); | ||
2904 | |||
2905 | if (ci->i_layout.pool_id != old_pool || *pns != old_ns) | ||
2902 | ci->i_ceph_flags &= ~CEPH_I_POOL_PERM; | 2906 | ci->i_ceph_flags &= ~CEPH_I_POOL_PERM; |
2903 | 2907 | ||
2908 | *pns = old_ns; | ||
2909 | |||
2904 | /* size/truncate_seq? */ | 2910 | /* size/truncate_seq? */ |
2905 | queue_trunc = ceph_fill_file_size(inode, issued, | 2911 | queue_trunc = ceph_fill_file_size(inode, issued, |
2906 | le32_to_cpu(grant->truncate_seq), | 2912 | le32_to_cpu(grant->truncate_seq), |
@@ -3423,20 +3429,18 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
3423 | struct ceph_cap *cap; | 3429 | struct ceph_cap *cap; |
3424 | struct ceph_mds_caps *h; | 3430 | struct ceph_mds_caps *h; |
3425 | struct ceph_mds_cap_peer *peer = NULL; | 3431 | struct ceph_mds_cap_peer *peer = NULL; |
3426 | struct ceph_snap_realm *realm; | 3432 | struct ceph_snap_realm *realm = NULL; |
3433 | struct ceph_string *pool_ns = NULL; | ||
3427 | int mds = session->s_mds; | 3434 | int mds = session->s_mds; |
3428 | int op, issued; | 3435 | int op, issued; |
3429 | u32 seq, mseq; | 3436 | u32 seq, mseq; |
3430 | struct ceph_vino vino; | 3437 | struct ceph_vino vino; |
3431 | u64 cap_id; | ||
3432 | u64 size, max_size; | ||
3433 | u64 tid; | 3438 | u64 tid; |
3434 | u64 inline_version = 0; | 3439 | u64 inline_version = 0; |
3435 | void *inline_data = NULL; | 3440 | void *inline_data = NULL; |
3436 | u32 inline_len = 0; | 3441 | u32 inline_len = 0; |
3437 | void *snaptrace; | 3442 | void *snaptrace; |
3438 | size_t snaptrace_len; | 3443 | size_t snaptrace_len; |
3439 | u32 pool_ns_len = 0; | ||
3440 | void *p, *end; | 3444 | void *p, *end; |
3441 | 3445 | ||
3442 | dout("handle_caps from mds%d\n", mds); | 3446 | dout("handle_caps from mds%d\n", mds); |
@@ -3450,11 +3454,8 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
3450 | op = le32_to_cpu(h->op); | 3454 | op = le32_to_cpu(h->op); |
3451 | vino.ino = le64_to_cpu(h->ino); | 3455 | vino.ino = le64_to_cpu(h->ino); |
3452 | vino.snap = CEPH_NOSNAP; | 3456 | vino.snap = CEPH_NOSNAP; |
3453 | cap_id = le64_to_cpu(h->cap_id); | ||
3454 | seq = le32_to_cpu(h->seq); | 3457 | seq = le32_to_cpu(h->seq); |
3455 | mseq = le32_to_cpu(h->migrate_seq); | 3458 | mseq = le32_to_cpu(h->migrate_seq); |
3456 | size = le64_to_cpu(h->size); | ||
3457 | max_size = le64_to_cpu(h->max_size); | ||
3458 | 3459 | ||
3459 | snaptrace = h + 1; | 3460 | snaptrace = h + 1; |
3460 | snaptrace_len = le32_to_cpu(h->snap_trace_len); | 3461 | snaptrace_len = le32_to_cpu(h->snap_trace_len); |
@@ -3493,6 +3494,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
3493 | u64 flush_tid; | 3494 | u64 flush_tid; |
3494 | u32 caller_uid, caller_gid; | 3495 | u32 caller_uid, caller_gid; |
3495 | u32 osd_epoch_barrier; | 3496 | u32 osd_epoch_barrier; |
3497 | u32 pool_ns_len; | ||
3496 | /* version >= 5 */ | 3498 | /* version >= 5 */ |
3497 | ceph_decode_32_safe(&p, end, osd_epoch_barrier, bad); | 3499 | ceph_decode_32_safe(&p, end, osd_epoch_barrier, bad); |
3498 | /* version >= 6 */ | 3500 | /* version >= 6 */ |
@@ -3502,6 +3504,11 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
3502 | ceph_decode_32_safe(&p, end, caller_gid, bad); | 3504 | ceph_decode_32_safe(&p, end, caller_gid, bad); |
3503 | /* version >= 8 */ | 3505 | /* version >= 8 */ |
3504 | ceph_decode_32_safe(&p, end, pool_ns_len, bad); | 3506 | ceph_decode_32_safe(&p, end, pool_ns_len, bad); |
3507 | if (pool_ns_len > 0) { | ||
3508 | ceph_decode_need(&p, end, pool_ns_len, bad); | ||
3509 | pool_ns = ceph_find_or_create_string(p, pool_ns_len); | ||
3510 | p += pool_ns_len; | ||
3511 | } | ||
3505 | } | 3512 | } |
3506 | 3513 | ||
3507 | /* lookup ino */ | 3514 | /* lookup ino */ |
@@ -3522,7 +3529,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
3522 | cap = ceph_get_cap(mdsc, NULL); | 3529 | cap = ceph_get_cap(mdsc, NULL); |
3523 | cap->cap_ino = vino.ino; | 3530 | cap->cap_ino = vino.ino; |
3524 | cap->queue_release = 1; | 3531 | cap->queue_release = 1; |
3525 | cap->cap_id = cap_id; | 3532 | cap->cap_id = le64_to_cpu(h->cap_id); |
3526 | cap->mseq = mseq; | 3533 | cap->mseq = mseq; |
3527 | cap->seq = seq; | 3534 | cap->seq = seq; |
3528 | spin_lock(&session->s_cap_lock); | 3535 | spin_lock(&session->s_cap_lock); |
@@ -3557,10 +3564,9 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
3557 | } | 3564 | } |
3558 | handle_cap_import(mdsc, inode, h, peer, session, | 3565 | handle_cap_import(mdsc, inode, h, peer, session, |
3559 | &cap, &issued); | 3566 | &cap, &issued); |
3560 | handle_cap_grant(mdsc, inode, h, | 3567 | handle_cap_grant(mdsc, inode, h, &pool_ns, |
3561 | inline_version, inline_data, inline_len, | 3568 | inline_version, inline_data, inline_len, |
3562 | msg->middle, session, cap, issued, | 3569 | msg->middle, session, cap, issued); |
3563 | pool_ns_len); | ||
3564 | if (realm) | 3570 | if (realm) |
3565 | ceph_put_snap_realm(mdsc, realm); | 3571 | ceph_put_snap_realm(mdsc, realm); |
3566 | goto done_unlocked; | 3572 | goto done_unlocked; |
@@ -3582,10 +3588,9 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
3582 | case CEPH_CAP_OP_GRANT: | 3588 | case CEPH_CAP_OP_GRANT: |
3583 | __ceph_caps_issued(ci, &issued); | 3589 | __ceph_caps_issued(ci, &issued); |
3584 | issued |= __ceph_caps_dirty(ci); | 3590 | issued |= __ceph_caps_dirty(ci); |
3585 | handle_cap_grant(mdsc, inode, h, | 3591 | handle_cap_grant(mdsc, inode, h, &pool_ns, |
3586 | inline_version, inline_data, inline_len, | 3592 | inline_version, inline_data, inline_len, |
3587 | msg->middle, session, cap, issued, | 3593 | msg->middle, session, cap, issued); |
3588 | pool_ns_len); | ||
3589 | goto done_unlocked; | 3594 | goto done_unlocked; |
3590 | 3595 | ||
3591 | case CEPH_CAP_OP_FLUSH_ACK: | 3596 | case CEPH_CAP_OP_FLUSH_ACK: |
@@ -3616,6 +3621,7 @@ done: | |||
3616 | mutex_unlock(&session->s_mutex); | 3621 | mutex_unlock(&session->s_mutex); |
3617 | done_unlocked: | 3622 | done_unlocked: |
3618 | iput(inode); | 3623 | iput(inode); |
3624 | ceph_put_string(pool_ns); | ||
3619 | return; | 3625 | return; |
3620 | 3626 | ||
3621 | bad: | 3627 | bad: |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index d035e0ab6029..dc032566ed71 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -447,7 +447,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb) | |||
447 | 447 | ||
448 | memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout)); | 448 | memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout)); |
449 | RCU_INIT_POINTER(ci->i_layout.pool_ns, NULL); | 449 | RCU_INIT_POINTER(ci->i_layout.pool_ns, NULL); |
450 | ci->i_pool_ns_len = 0; | ||
451 | 450 | ||
452 | ci->i_fragtree = RB_ROOT; | 451 | ci->i_fragtree = RB_ROOT; |
453 | mutex_init(&ci->i_fragtree_mutex); | 452 | mutex_init(&ci->i_fragtree_mutex); |
@@ -571,7 +570,7 @@ void ceph_destroy_inode(struct inode *inode) | |||
571 | if (ci->i_xattrs.prealloc_blob) | 570 | if (ci->i_xattrs.prealloc_blob) |
572 | ceph_buffer_put(ci->i_xattrs.prealloc_blob); | 571 | ceph_buffer_put(ci->i_xattrs.prealloc_blob); |
573 | 572 | ||
574 | ceph_put_string(ci->i_layout.pool_ns); | 573 | ceph_put_string(rcu_dereference_raw(ci->i_layout.pool_ns)); |
575 | 574 | ||
576 | call_rcu(&inode->i_rcu, ceph_i_callback); | 575 | call_rcu(&inode->i_rcu, ceph_i_callback); |
577 | } | 576 | } |
@@ -736,6 +735,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page, | |||
736 | int issued = 0, implemented, new_issued; | 735 | int issued = 0, implemented, new_issued; |
737 | struct timespec mtime, atime, ctime; | 736 | struct timespec mtime, atime, ctime; |
738 | struct ceph_buffer *xattr_blob = NULL; | 737 | struct ceph_buffer *xattr_blob = NULL; |
738 | struct ceph_string *pool_ns = NULL; | ||
739 | struct ceph_cap *new_cap = NULL; | 739 | struct ceph_cap *new_cap = NULL; |
740 | int err = 0; | 740 | int err = 0; |
741 | bool wake = false; | 741 | bool wake = false; |
@@ -763,6 +763,10 @@ static int fill_inode(struct inode *inode, struct page *locked_page, | |||
763 | iinfo->xattr_len); | 763 | iinfo->xattr_len); |
764 | } | 764 | } |
765 | 765 | ||
766 | if (iinfo->pool_ns_len > 0) | ||
767 | pool_ns = ceph_find_or_create_string(iinfo->pool_ns_data, | ||
768 | iinfo->pool_ns_len); | ||
769 | |||
766 | spin_lock(&ci->i_ceph_lock); | 770 | spin_lock(&ci->i_ceph_lock); |
767 | 771 | ||
768 | /* | 772 | /* |
@@ -818,11 +822,18 @@ static int fill_inode(struct inode *inode, struct page *locked_page, | |||
818 | if (new_version || | 822 | if (new_version || |
819 | (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) { | 823 | (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) { |
820 | s64 old_pool = ci->i_layout.pool_id; | 824 | s64 old_pool = ci->i_layout.pool_id; |
825 | struct ceph_string *old_ns; | ||
826 | |||
821 | ceph_file_layout_from_legacy(&ci->i_layout, &info->layout); | 827 | ceph_file_layout_from_legacy(&ci->i_layout, &info->layout); |
822 | ci->i_pool_ns_len = iinfo->pool_ns_len; | 828 | old_ns = rcu_dereference_protected(ci->i_layout.pool_ns, |
823 | if (ci->i_layout.pool_id != old_pool) | 829 | lockdep_is_held(&ci->i_ceph_lock)); |
830 | rcu_assign_pointer(ci->i_layout.pool_ns, pool_ns); | ||
831 | |||
832 | if (ci->i_layout.pool_id != old_pool || pool_ns != old_ns) | ||
824 | ci->i_ceph_flags &= ~CEPH_I_POOL_PERM; | 833 | ci->i_ceph_flags &= ~CEPH_I_POOL_PERM; |
825 | 834 | ||
835 | pool_ns = old_ns; | ||
836 | |||
826 | queue_trunc = ceph_fill_file_size(inode, issued, | 837 | queue_trunc = ceph_fill_file_size(inode, issued, |
827 | le32_to_cpu(info->truncate_seq), | 838 | le32_to_cpu(info->truncate_seq), |
828 | le64_to_cpu(info->truncate_size), | 839 | le64_to_cpu(info->truncate_size), |
@@ -989,6 +1000,7 @@ out: | |||
989 | ceph_put_cap(mdsc, new_cap); | 1000 | ceph_put_cap(mdsc, new_cap); |
990 | if (xattr_blob) | 1001 | if (xattr_blob) |
991 | ceph_buffer_put(xattr_blob); | 1002 | ceph_buffer_put(xattr_blob); |
1003 | ceph_put_string(pool_ns); | ||
992 | return err; | 1004 | return err; |
993 | } | 1005 | } |
994 | 1006 | ||
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 843dd31a02cd..6a30101b55ef 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c | |||
@@ -213,9 +213,12 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) | |||
213 | ceph_ino(inode), dl.object_no); | 213 | ceph_ino(inode), dl.object_no); |
214 | 214 | ||
215 | oloc.pool = ci->i_layout.pool_id; | 215 | oloc.pool = ci->i_layout.pool_id; |
216 | oloc.pool_ns = ceph_try_get_string(ci->i_layout.pool_ns); | ||
216 | ceph_oid_printf(&oid, "%s", dl.object_name); | 217 | ceph_oid_printf(&oid, "%s", dl.object_name); |
217 | 218 | ||
218 | r = ceph_object_locator_to_pg(osdc->osdmap, &oid, &oloc, &pgid); | 219 | r = ceph_object_locator_to_pg(osdc->osdmap, &oid, &oloc, &pgid); |
220 | |||
221 | ceph_oloc_destroy(&oloc); | ||
219 | if (r < 0) { | 222 | if (r < 0) { |
220 | up_read(&osdc->lock); | 223 | up_read(&osdc->lock); |
221 | return r; | 224 | return r; |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 2103b823bec0..46641bbc8056 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -100,12 +100,15 @@ static int parse_reply_info_in(void **p, void *end, | |||
100 | } else | 100 | } else |
101 | info->inline_version = CEPH_INLINE_NONE; | 101 | info->inline_version = CEPH_INLINE_NONE; |
102 | 102 | ||
103 | info->pool_ns_len = 0; | ||
104 | info->pool_ns_data = NULL; | ||
103 | if (features & CEPH_FEATURE_FS_FILE_LAYOUT_V2) { | 105 | if (features & CEPH_FEATURE_FS_FILE_LAYOUT_V2) { |
104 | ceph_decode_32_safe(p, end, info->pool_ns_len, bad); | 106 | ceph_decode_32_safe(p, end, info->pool_ns_len, bad); |
105 | ceph_decode_need(p, end, info->pool_ns_len, bad); | 107 | if (info->pool_ns_len > 0) { |
106 | *p += info->pool_ns_len; | 108 | ceph_decode_need(p, end, info->pool_ns_len, bad); |
107 | } else { | 109 | info->pool_ns_data = *p; |
108 | info->pool_ns_len = 0; | 110 | *p += info->pool_ns_len; |
111 | } | ||
109 | } | 112 | } |
110 | 113 | ||
111 | return 0; | 114 | return 0; |
@@ -2292,14 +2295,6 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, | |||
2292 | ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir), | 2295 | ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir), |
2293 | CEPH_CAP_PIN); | 2296 | CEPH_CAP_PIN); |
2294 | 2297 | ||
2295 | /* deny access to directories with pool_ns layouts */ | ||
2296 | if (req->r_inode && S_ISDIR(req->r_inode->i_mode) && | ||
2297 | ceph_inode(req->r_inode)->i_pool_ns_len) | ||
2298 | return -EIO; | ||
2299 | if (req->r_locked_dir && | ||
2300 | ceph_inode(req->r_locked_dir)->i_pool_ns_len) | ||
2301 | return -EIO; | ||
2302 | |||
2303 | /* issue */ | 2298 | /* issue */ |
2304 | mutex_lock(&mdsc->mutex); | 2299 | mutex_lock(&mdsc->mutex); |
2305 | __register_request(mdsc, req, dir); | 2300 | __register_request(mdsc, req, dir); |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 75ecf967d0b7..2ce8e9f9bfc9 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -45,6 +45,7 @@ struct ceph_mds_reply_info_in { | |||
45 | u32 inline_len; | 45 | u32 inline_len; |
46 | char *inline_data; | 46 | char *inline_data; |
47 | u32 pool_ns_len; | 47 | u32 pool_ns_len; |
48 | char *pool_ns_data; | ||
48 | }; | 49 | }; |
49 | 50 | ||
50 | struct ceph_mds_reply_dir_entry { | 51 | struct ceph_mds_reply_dir_entry { |
@@ -277,6 +278,8 @@ struct ceph_pool_perm { | |||
277 | struct rb_node node; | 278 | struct rb_node node; |
278 | int perm; | 279 | int perm; |
279 | s64 pool; | 280 | s64 pool; |
281 | size_t pool_ns_len; | ||
282 | char pool_ns[]; | ||
280 | }; | 283 | }; |
281 | 284 | ||
282 | /* | 285 | /* |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 0168b49fb6ad..7ceab18c8ee2 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -287,7 +287,6 @@ struct ceph_inode_info { | |||
287 | 287 | ||
288 | struct ceph_dir_layout i_dir_layout; | 288 | struct ceph_dir_layout i_dir_layout; |
289 | struct ceph_file_layout i_layout; | 289 | struct ceph_file_layout i_layout; |
290 | size_t i_pool_ns_len; | ||
291 | char *i_symlink; | 290 | char *i_symlink; |
292 | 291 | ||
293 | /* for dirs */ | 292 | /* for dirs */ |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 5377c9c7a0c5..adc231892b0d 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
@@ -57,56 +57,69 @@ struct ceph_vxattr { | |||
57 | 57 | ||
58 | static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci) | 58 | static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci) |
59 | { | 59 | { |
60 | size_t s; | 60 | struct ceph_file_layout *fl = &ci->i_layout; |
61 | char *p = (char *)&ci->i_layout; | 61 | return (fl->stripe_unit > 0 || fl->stripe_count > 0 || |
62 | 62 | fl->object_size > 0 || fl->pool_id >= 0 || | |
63 | for (s = 0; s < sizeof(ci->i_layout); s++, p++) | 63 | rcu_dereference_raw(fl->pool_ns) != NULL); |
64 | if (*p) | ||
65 | return true; | ||
66 | return false; | ||
67 | } | 64 | } |
68 | 65 | ||
69 | static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, | 66 | static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, |
70 | size_t size) | 67 | size_t size) |
71 | { | 68 | { |
72 | int ret; | ||
73 | struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); | 69 | struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); |
74 | struct ceph_osd_client *osdc = &fsc->client->osdc; | 70 | struct ceph_osd_client *osdc = &fsc->client->osdc; |
71 | struct ceph_string *pool_ns; | ||
75 | s64 pool = ci->i_layout.pool_id; | 72 | s64 pool = ci->i_layout.pool_id; |
76 | const char *pool_name; | 73 | const char *pool_name; |
74 | const char *ns_field = " pool_namespace="; | ||
77 | char buf[128]; | 75 | char buf[128]; |
76 | size_t len, total_len = 0; | ||
77 | int ret; | ||
78 | |||
79 | pool_ns = ceph_try_get_string(ci->i_layout.pool_ns); | ||
78 | 80 | ||
79 | dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode); | 81 | dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode); |
80 | down_read(&osdc->lock); | 82 | down_read(&osdc->lock); |
81 | pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); | 83 | pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); |
82 | if (pool_name) { | 84 | if (pool_name) { |
83 | size_t len = strlen(pool_name); | 85 | len = snprintf(buf, sizeof(buf), |
84 | ret = snprintf(buf, sizeof(buf), | ||
85 | "stripe_unit=%u stripe_count=%u object_size=%u pool=", | 86 | "stripe_unit=%u stripe_count=%u object_size=%u pool=", |
86 | ci->i_layout.stripe_unit, ci->i_layout.stripe_count, | 87 | ci->i_layout.stripe_unit, ci->i_layout.stripe_count, |
87 | ci->i_layout.object_size); | 88 | ci->i_layout.object_size); |
88 | if (!size) { | 89 | total_len = len + strlen(pool_name); |
89 | ret += len; | ||
90 | } else if (ret + len > size) { | ||
91 | ret = -ERANGE; | ||
92 | } else { | ||
93 | memcpy(val, buf, ret); | ||
94 | memcpy(val + ret, pool_name, len); | ||
95 | ret += len; | ||
96 | } | ||
97 | } else { | 90 | } else { |
98 | ret = snprintf(buf, sizeof(buf), | 91 | len = snprintf(buf, sizeof(buf), |
99 | "stripe_unit=%u stripe_count=%u object_size=%u pool=%lld", | 92 | "stripe_unit=%u stripe_count=%u object_size=%u pool=%lld", |
100 | ci->i_layout.stripe_unit, ci->i_layout.stripe_count, | 93 | ci->i_layout.stripe_unit, ci->i_layout.stripe_count, |
101 | ci->i_layout.object_size, (unsigned long long)pool); | 94 | ci->i_layout.object_size, (unsigned long long)pool); |
102 | if (size) { | 95 | total_len = len; |
103 | if (ret <= size) | 96 | } |
104 | memcpy(val, buf, ret); | 97 | |
105 | else | 98 | if (pool_ns) |
106 | ret = -ERANGE; | 99 | total_len += strlen(ns_field) + pool_ns->len; |
100 | |||
101 | if (!size) { | ||
102 | ret = total_len; | ||
103 | } else if (total_len > size) { | ||
104 | ret = -ERANGE; | ||
105 | } else { | ||
106 | memcpy(val, buf, len); | ||
107 | ret = len; | ||
108 | if (pool_name) { | ||
109 | len = strlen(pool_name); | ||
110 | memcpy(val + ret, pool_name, len); | ||
111 | ret += len; | ||
112 | } | ||
113 | if (pool_ns) { | ||
114 | len = strlen(ns_field); | ||
115 | memcpy(val + ret, ns_field, len); | ||
116 | ret += len; | ||
117 | memcpy(val + ret, pool_ns->str, pool_ns->len); | ||
118 | ret += pool_ns->len; | ||
107 | } | 119 | } |
108 | } | 120 | } |
109 | up_read(&osdc->lock); | 121 | up_read(&osdc->lock); |
122 | ceph_put_string(pool_ns); | ||
110 | return ret; | 123 | return ret; |
111 | } | 124 | } |
112 | 125 | ||
@@ -147,6 +160,18 @@ static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci, | |||
147 | return ret; | 160 | return ret; |
148 | } | 161 | } |
149 | 162 | ||
163 | static size_t ceph_vxattrcb_layout_pool_namespace(struct ceph_inode_info *ci, | ||
164 | char *val, size_t size) | ||
165 | { | ||
166 | int ret = 0; | ||
167 | struct ceph_string *ns = ceph_try_get_string(ci->i_layout.pool_ns); | ||
168 | if (ns) { | ||
169 | ret = snprintf(val, size, "%.*s", (int)ns->len, ns->str); | ||
170 | ceph_put_string(ns); | ||
171 | } | ||
172 | return ret; | ||
173 | } | ||
174 | |||
150 | /* directories */ | 175 | /* directories */ |
151 | 176 | ||
152 | static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val, | 177 | static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val, |
@@ -235,6 +260,7 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = { | |||
235 | XATTR_LAYOUT_FIELD(dir, layout, stripe_count), | 260 | XATTR_LAYOUT_FIELD(dir, layout, stripe_count), |
236 | XATTR_LAYOUT_FIELD(dir, layout, object_size), | 261 | XATTR_LAYOUT_FIELD(dir, layout, object_size), |
237 | XATTR_LAYOUT_FIELD(dir, layout, pool), | 262 | XATTR_LAYOUT_FIELD(dir, layout, pool), |
263 | XATTR_LAYOUT_FIELD(dir, layout, pool_namespace), | ||
238 | XATTR_NAME_CEPH(dir, entries), | 264 | XATTR_NAME_CEPH(dir, entries), |
239 | XATTR_NAME_CEPH(dir, files), | 265 | XATTR_NAME_CEPH(dir, files), |
240 | XATTR_NAME_CEPH(dir, subdirs), | 266 | XATTR_NAME_CEPH(dir, subdirs), |
@@ -262,6 +288,7 @@ static struct ceph_vxattr ceph_file_vxattrs[] = { | |||
262 | XATTR_LAYOUT_FIELD(file, layout, stripe_count), | 288 | XATTR_LAYOUT_FIELD(file, layout, stripe_count), |
263 | XATTR_LAYOUT_FIELD(file, layout, object_size), | 289 | XATTR_LAYOUT_FIELD(file, layout, object_size), |
264 | XATTR_LAYOUT_FIELD(file, layout, pool), | 290 | XATTR_LAYOUT_FIELD(file, layout, pool), |
291 | XATTR_LAYOUT_FIELD(file, layout, pool_namespace), | ||
265 | { .name = NULL, 0 } /* Required table terminator */ | 292 | { .name = NULL, 0 } /* Required table terminator */ |
266 | }; | 293 | }; |
267 | static size_t ceph_file_vxattrs_name_size; /* total size of all names */ | 294 | static size_t ceph_file_vxattrs_name_size; /* total size of all names */ |