diff options
-rw-r--r-- | drivers/block/rbd.c | 19 | ||||
-rw-r--r-- | fs/ceph/Makefile | 23 | ||||
-rw-r--r-- | fs/ceph/debugfs.c | 9 | ||||
-rw-r--r-- | fs/ceph/dir.c | 20 | ||||
-rw-r--r-- | fs/ceph/export.c | 2 | ||||
-rw-r--r-- | fs/ceph/inode.c | 4 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 56 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 2 | ||||
-rw-r--r-- | fs/ceph/super.c | 13 | ||||
-rw-r--r-- | fs/ceph/super.h | 2 | ||||
-rw-r--r-- | include/linux/ceph/ceph_fs.h | 16 | ||||
-rw-r--r-- | include/linux/ceph/messenger.h | 5 | ||||
-rw-r--r-- | net/ceph/ceph_hash.c | 3 | ||||
-rw-r--r-- | net/ceph/messenger.c | 46 | ||||
-rw-r--r-- | net/ceph/osdmap.c | 4 |
15 files changed, 116 insertions, 108 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 008d4a00b50d..e1e38b11f48a 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c | |||
@@ -1790,18 +1790,29 @@ static ssize_t rbd_add(struct bus_type *bus, const char *buf, size_t count) | |||
1790 | 1790 | ||
1791 | rc = rbd_bus_add_dev(rbd_dev); | 1791 | rc = rbd_bus_add_dev(rbd_dev); |
1792 | if (rc) | 1792 | if (rc) |
1793 | goto err_out_disk; | 1793 | goto err_out_blkdev; |
1794 | |||
1794 | /* set up and announce blkdev mapping */ | 1795 | /* set up and announce blkdev mapping */ |
1795 | rc = rbd_init_disk(rbd_dev); | 1796 | rc = rbd_init_disk(rbd_dev); |
1796 | if (rc) | 1797 | if (rc) |
1797 | goto err_out_blkdev; | 1798 | goto err_out_bus; |
1798 | 1799 | ||
1799 | return count; | 1800 | return count; |
1800 | 1801 | ||
1802 | err_out_bus: | ||
1803 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
1804 | list_del_init(&rbd_dev->node); | ||
1805 | mutex_unlock(&ctl_mutex); | ||
1806 | |||
1807 | /* this will also clean up rest of rbd_dev stuff */ | ||
1808 | |||
1809 | rbd_bus_del_dev(rbd_dev); | ||
1810 | kfree(options); | ||
1811 | kfree(mon_dev_name); | ||
1812 | return rc; | ||
1813 | |||
1801 | err_out_blkdev: | 1814 | err_out_blkdev: |
1802 | unregister_blkdev(rbd_dev->major, rbd_dev->name); | 1815 | unregister_blkdev(rbd_dev->major, rbd_dev->name); |
1803 | err_out_disk: | ||
1804 | rbd_free_disk(rbd_dev); | ||
1805 | err_out_client: | 1816 | err_out_client: |
1806 | rbd_put_client(rbd_dev); | 1817 | rbd_put_client(rbd_dev); |
1807 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | 1818 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); |
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile index 9e6c4f2e8ff1..bd352125e829 100644 --- a/fs/ceph/Makefile +++ b/fs/ceph/Makefile | |||
@@ -2,31 +2,10 @@ | |||
2 | # Makefile for CEPH filesystem. | 2 | # Makefile for CEPH filesystem. |
3 | # | 3 | # |
4 | 4 | ||
5 | ifneq ($(KERNELRELEASE),) | ||
6 | |||
7 | obj-$(CONFIG_CEPH_FS) += ceph.o | 5 | obj-$(CONFIG_CEPH_FS) += ceph.o |
8 | 6 | ||
9 | ceph-objs := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ | 7 | ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ |
10 | export.o caps.o snap.o xattr.o \ | 8 | export.o caps.o snap.o xattr.o \ |
11 | mds_client.o mdsmap.o strings.o ceph_frag.o \ | 9 | mds_client.o mdsmap.o strings.o ceph_frag.o \ |
12 | debugfs.o | 10 | debugfs.o |
13 | 11 | ||
14 | else | ||
15 | #Otherwise we were called directly from the command | ||
16 | # line; invoke the kernel build system. | ||
17 | |||
18 | KERNELDIR ?= /lib/modules/$(shell uname -r)/build | ||
19 | PWD := $(shell pwd) | ||
20 | |||
21 | default: all | ||
22 | |||
23 | all: | ||
24 | $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_FS=m modules | ||
25 | |||
26 | modules_install: | ||
27 | $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_FS=m modules_install | ||
28 | |||
29 | clean: | ||
30 | $(MAKE) -C $(KERNELDIR) M=$(PWD) clean | ||
31 | |||
32 | endif | ||
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 7ae1b3d55b58..08f65faac112 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c | |||
@@ -60,10 +60,13 @@ static int mdsc_show(struct seq_file *s, void *p) | |||
60 | for (rp = rb_first(&mdsc->request_tree); rp; rp = rb_next(rp)) { | 60 | for (rp = rb_first(&mdsc->request_tree); rp; rp = rb_next(rp)) { |
61 | req = rb_entry(rp, struct ceph_mds_request, r_node); | 61 | req = rb_entry(rp, struct ceph_mds_request, r_node); |
62 | 62 | ||
63 | if (req->r_request) | 63 | if (req->r_request && req->r_session) |
64 | seq_printf(s, "%lld\tmds%d\t", req->r_tid, req->r_mds); | 64 | seq_printf(s, "%lld\tmds%d\t", req->r_tid, |
65 | else | 65 | req->r_session->s_mds); |
66 | else if (!req->r_request) | ||
66 | seq_printf(s, "%lld\t(no request)\t", req->r_tid); | 67 | seq_printf(s, "%lld\t(no request)\t", req->r_tid); |
68 | else | ||
69 | seq_printf(s, "%lld\t(no session)\t", req->r_tid); | ||
67 | 70 | ||
68 | seq_printf(s, "%s", ceph_mds_op_name(req->r_op)); | 71 | seq_printf(s, "%s", ceph_mds_op_name(req->r_op)); |
69 | 72 | ||
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index fa7ca04ee816..0bc68de8edd7 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -1224,6 +1224,26 @@ void ceph_dentry_lru_del(struct dentry *dn) | |||
1224 | } | 1224 | } |
1225 | } | 1225 | } |
1226 | 1226 | ||
1227 | /* | ||
1228 | * Return name hash for a given dentry. This is dependent on | ||
1229 | * the parent directory's hash function. | ||
1230 | */ | ||
1231 | unsigned ceph_dentry_hash(struct dentry *dn) | ||
1232 | { | ||
1233 | struct inode *dir = dn->d_parent->d_inode; | ||
1234 | struct ceph_inode_info *dci = ceph_inode(dir); | ||
1235 | |||
1236 | switch (dci->i_dir_layout.dl_dir_hash) { | ||
1237 | case 0: /* for backward compat */ | ||
1238 | case CEPH_STR_HASH_LINUX: | ||
1239 | return dn->d_name.hash; | ||
1240 | |||
1241 | default: | ||
1242 | return ceph_str_hash(dci->i_dir_layout.dl_dir_hash, | ||
1243 | dn->d_name.name, dn->d_name.len); | ||
1244 | } | ||
1245 | } | ||
1246 | |||
1227 | const struct file_operations ceph_dir_fops = { | 1247 | const struct file_operations ceph_dir_fops = { |
1228 | .read = ceph_read_dir, | 1248 | .read = ceph_read_dir, |
1229 | .readdir = ceph_readdir, | 1249 | .readdir = ceph_readdir, |
diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 2297d9426992..e41056174bf8 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c | |||
@@ -59,7 +59,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, | |||
59 | dout("encode_fh %p connectable\n", dentry); | 59 | dout("encode_fh %p connectable\n", dentry); |
60 | cfh->ino = ceph_ino(dentry->d_inode); | 60 | cfh->ino = ceph_ino(dentry->d_inode); |
61 | cfh->parent_ino = ceph_ino(parent->d_inode); | 61 | cfh->parent_ino = ceph_ino(parent->d_inode); |
62 | cfh->parent_name_hash = parent->d_name.hash; | 62 | cfh->parent_name_hash = ceph_dentry_hash(parent); |
63 | *max_len = connected_handle_length; | 63 | *max_len = connected_handle_length; |
64 | type = 2; | 64 | type = 2; |
65 | } else if (*max_len >= handle_length) { | 65 | } else if (*max_len >= handle_length) { |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index e61de4f7b99d..e835eff551e3 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -297,6 +297,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb) | |||
297 | ci->i_release_count = 0; | 297 | ci->i_release_count = 0; |
298 | ci->i_symlink = NULL; | 298 | ci->i_symlink = NULL; |
299 | 299 | ||
300 | memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout)); | ||
301 | |||
300 | ci->i_fragtree = RB_ROOT; | 302 | ci->i_fragtree = RB_ROOT; |
301 | mutex_init(&ci->i_fragtree_mutex); | 303 | mutex_init(&ci->i_fragtree_mutex); |
302 | 304 | ||
@@ -689,6 +691,8 @@ static int fill_inode(struct inode *inode, | |||
689 | inode->i_op = &ceph_dir_iops; | 691 | inode->i_op = &ceph_dir_iops; |
690 | inode->i_fop = &ceph_dir_fops; | 692 | inode->i_fop = &ceph_dir_fops; |
691 | 693 | ||
694 | ci->i_dir_layout = iinfo->dir_layout; | ||
695 | |||
692 | ci->i_files = le64_to_cpu(info->files); | 696 | ci->i_files = le64_to_cpu(info->files); |
693 | ci->i_subdirs = le64_to_cpu(info->subdirs); | 697 | ci->i_subdirs = le64_to_cpu(info->subdirs); |
694 | ci->i_rbytes = le64_to_cpu(info->rbytes); | 698 | ci->i_rbytes = le64_to_cpu(info->rbytes); |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a50fca1e03be..1e30d194a8e3 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -60,7 +60,8 @@ static const struct ceph_connection_operations mds_con_ops; | |||
60 | * parse individual inode info | 60 | * parse individual inode info |
61 | */ | 61 | */ |
62 | static int parse_reply_info_in(void **p, void *end, | 62 | static int parse_reply_info_in(void **p, void *end, |
63 | struct ceph_mds_reply_info_in *info) | 63 | struct ceph_mds_reply_info_in *info, |
64 | int features) | ||
64 | { | 65 | { |
65 | int err = -EIO; | 66 | int err = -EIO; |
66 | 67 | ||
@@ -74,6 +75,12 @@ static int parse_reply_info_in(void **p, void *end, | |||
74 | info->symlink = *p; | 75 | info->symlink = *p; |
75 | *p += info->symlink_len; | 76 | *p += info->symlink_len; |
76 | 77 | ||
78 | if (features & CEPH_FEATURE_DIRLAYOUTHASH) | ||
79 | ceph_decode_copy_safe(p, end, &info->dir_layout, | ||
80 | sizeof(info->dir_layout), bad); | ||
81 | else | ||
82 | memset(&info->dir_layout, 0, sizeof(info->dir_layout)); | ||
83 | |||
77 | ceph_decode_32_safe(p, end, info->xattr_len, bad); | 84 | ceph_decode_32_safe(p, end, info->xattr_len, bad); |
78 | ceph_decode_need(p, end, info->xattr_len, bad); | 85 | ceph_decode_need(p, end, info->xattr_len, bad); |
79 | info->xattr_data = *p; | 86 | info->xattr_data = *p; |
@@ -88,12 +95,13 @@ bad: | |||
88 | * target inode. | 95 | * target inode. |
89 | */ | 96 | */ |
90 | static int parse_reply_info_trace(void **p, void *end, | 97 | static int parse_reply_info_trace(void **p, void *end, |
91 | struct ceph_mds_reply_info_parsed *info) | 98 | struct ceph_mds_reply_info_parsed *info, |
99 | int features) | ||
92 | { | 100 | { |
93 | int err; | 101 | int err; |
94 | 102 | ||
95 | if (info->head->is_dentry) { | 103 | if (info->head->is_dentry) { |
96 | err = parse_reply_info_in(p, end, &info->diri); | 104 | err = parse_reply_info_in(p, end, &info->diri, features); |
97 | if (err < 0) | 105 | if (err < 0) |
98 | goto out_bad; | 106 | goto out_bad; |
99 | 107 | ||
@@ -114,7 +122,7 @@ static int parse_reply_info_trace(void **p, void *end, | |||
114 | } | 122 | } |
115 | 123 | ||
116 | if (info->head->is_target) { | 124 | if (info->head->is_target) { |
117 | err = parse_reply_info_in(p, end, &info->targeti); | 125 | err = parse_reply_info_in(p, end, &info->targeti, features); |
118 | if (err < 0) | 126 | if (err < 0) |
119 | goto out_bad; | 127 | goto out_bad; |
120 | } | 128 | } |
@@ -134,7 +142,8 @@ out_bad: | |||
134 | * parse readdir results | 142 | * parse readdir results |
135 | */ | 143 | */ |
136 | static int parse_reply_info_dir(void **p, void *end, | 144 | static int parse_reply_info_dir(void **p, void *end, |
137 | struct ceph_mds_reply_info_parsed *info) | 145 | struct ceph_mds_reply_info_parsed *info, |
146 | int features) | ||
138 | { | 147 | { |
139 | u32 num, i = 0; | 148 | u32 num, i = 0; |
140 | int err; | 149 | int err; |
@@ -182,7 +191,7 @@ static int parse_reply_info_dir(void **p, void *end, | |||
182 | *p += sizeof(struct ceph_mds_reply_lease); | 191 | *p += sizeof(struct ceph_mds_reply_lease); |
183 | 192 | ||
184 | /* inode */ | 193 | /* inode */ |
185 | err = parse_reply_info_in(p, end, &info->dir_in[i]); | 194 | err = parse_reply_info_in(p, end, &info->dir_in[i], features); |
186 | if (err < 0) | 195 | if (err < 0) |
187 | goto out_bad; | 196 | goto out_bad; |
188 | i++; | 197 | i++; |
@@ -205,7 +214,8 @@ out_bad: | |||
205 | * parse fcntl F_GETLK results | 214 | * parse fcntl F_GETLK results |
206 | */ | 215 | */ |
207 | static int parse_reply_info_filelock(void **p, void *end, | 216 | static int parse_reply_info_filelock(void **p, void *end, |
208 | struct ceph_mds_reply_info_parsed *info) | 217 | struct ceph_mds_reply_info_parsed *info, |
218 | int features) | ||
209 | { | 219 | { |
210 | if (*p + sizeof(*info->filelock_reply) > end) | 220 | if (*p + sizeof(*info->filelock_reply) > end) |
211 | goto bad; | 221 | goto bad; |
@@ -225,19 +235,21 @@ bad: | |||
225 | * parse extra results | 235 | * parse extra results |
226 | */ | 236 | */ |
227 | static int parse_reply_info_extra(void **p, void *end, | 237 | static int parse_reply_info_extra(void **p, void *end, |
228 | struct ceph_mds_reply_info_parsed *info) | 238 | struct ceph_mds_reply_info_parsed *info, |
239 | int features) | ||
229 | { | 240 | { |
230 | if (info->head->op == CEPH_MDS_OP_GETFILELOCK) | 241 | if (info->head->op == CEPH_MDS_OP_GETFILELOCK) |
231 | return parse_reply_info_filelock(p, end, info); | 242 | return parse_reply_info_filelock(p, end, info, features); |
232 | else | 243 | else |
233 | return parse_reply_info_dir(p, end, info); | 244 | return parse_reply_info_dir(p, end, info, features); |
234 | } | 245 | } |
235 | 246 | ||
236 | /* | 247 | /* |
237 | * parse entire mds reply | 248 | * parse entire mds reply |
238 | */ | 249 | */ |
239 | static int parse_reply_info(struct ceph_msg *msg, | 250 | static int parse_reply_info(struct ceph_msg *msg, |
240 | struct ceph_mds_reply_info_parsed *info) | 251 | struct ceph_mds_reply_info_parsed *info, |
252 | int features) | ||
241 | { | 253 | { |
242 | void *p, *end; | 254 | void *p, *end; |
243 | u32 len; | 255 | u32 len; |
@@ -250,7 +262,7 @@ static int parse_reply_info(struct ceph_msg *msg, | |||
250 | /* trace */ | 262 | /* trace */ |
251 | ceph_decode_32_safe(&p, end, len, bad); | 263 | ceph_decode_32_safe(&p, end, len, bad); |
252 | if (len > 0) { | 264 | if (len > 0) { |
253 | err = parse_reply_info_trace(&p, p+len, info); | 265 | err = parse_reply_info_trace(&p, p+len, info, features); |
254 | if (err < 0) | 266 | if (err < 0) |
255 | goto out_bad; | 267 | goto out_bad; |
256 | } | 268 | } |
@@ -258,7 +270,7 @@ static int parse_reply_info(struct ceph_msg *msg, | |||
258 | /* extra */ | 270 | /* extra */ |
259 | ceph_decode_32_safe(&p, end, len, bad); | 271 | ceph_decode_32_safe(&p, end, len, bad); |
260 | if (len > 0) { | 272 | if (len > 0) { |
261 | err = parse_reply_info_extra(&p, p+len, info); | 273 | err = parse_reply_info_extra(&p, p+len, info, features); |
262 | if (err < 0) | 274 | if (err < 0) |
263 | goto out_bad; | 275 | goto out_bad; |
264 | } | 276 | } |
@@ -654,7 +666,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, | |||
654 | } else { | 666 | } else { |
655 | /* dir + name */ | 667 | /* dir + name */ |
656 | inode = dir; | 668 | inode = dir; |
657 | hash = req->r_dentry->d_name.hash; | 669 | hash = ceph_dentry_hash(req->r_dentry); |
658 | is_hash = true; | 670 | is_hash = true; |
659 | } | 671 | } |
660 | } | 672 | } |
@@ -1693,7 +1705,6 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, | |||
1693 | struct ceph_msg *msg; | 1705 | struct ceph_msg *msg; |
1694 | int flags = 0; | 1706 | int flags = 0; |
1695 | 1707 | ||
1696 | req->r_mds = mds; | ||
1697 | req->r_attempts++; | 1708 | req->r_attempts++; |
1698 | if (req->r_inode) { | 1709 | if (req->r_inode) { |
1699 | struct ceph_cap *cap = | 1710 | struct ceph_cap *cap = |
@@ -1780,6 +1791,8 @@ static int __do_request(struct ceph_mds_client *mdsc, | |||
1780 | goto finish; | 1791 | goto finish; |
1781 | } | 1792 | } |
1782 | 1793 | ||
1794 | put_request_session(req); | ||
1795 | |||
1783 | mds = __choose_mds(mdsc, req); | 1796 | mds = __choose_mds(mdsc, req); |
1784 | if (mds < 0 || | 1797 | if (mds < 0 || |
1785 | ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) { | 1798 | ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) { |
@@ -1797,6 +1810,8 @@ static int __do_request(struct ceph_mds_client *mdsc, | |||
1797 | goto finish; | 1810 | goto finish; |
1798 | } | 1811 | } |
1799 | } | 1812 | } |
1813 | req->r_session = get_session(session); | ||
1814 | |||
1800 | dout("do_request mds%d session %p state %s\n", mds, session, | 1815 | dout("do_request mds%d session %p state %s\n", mds, session, |
1801 | session_state_name(session->s_state)); | 1816 | session_state_name(session->s_state)); |
1802 | if (session->s_state != CEPH_MDS_SESSION_OPEN && | 1817 | if (session->s_state != CEPH_MDS_SESSION_OPEN && |
@@ -1809,7 +1824,6 @@ static int __do_request(struct ceph_mds_client *mdsc, | |||
1809 | } | 1824 | } |
1810 | 1825 | ||
1811 | /* send request */ | 1826 | /* send request */ |
1812 | req->r_session = get_session(session); | ||
1813 | req->r_resend_mds = -1; /* forget any previous mds hint */ | 1827 | req->r_resend_mds = -1; /* forget any previous mds hint */ |
1814 | 1828 | ||
1815 | if (req->r_request_started == 0) /* note request start time */ | 1829 | if (req->r_request_started == 0) /* note request start time */ |
@@ -1863,7 +1877,6 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds) | |||
1863 | if (req->r_session && | 1877 | if (req->r_session && |
1864 | req->r_session->s_mds == mds) { | 1878 | req->r_session->s_mds == mds) { |
1865 | dout(" kicking tid %llu\n", req->r_tid); | 1879 | dout(" kicking tid %llu\n", req->r_tid); |
1866 | put_request_session(req); | ||
1867 | __do_request(mdsc, req); | 1880 | __do_request(mdsc, req); |
1868 | } | 1881 | } |
1869 | } | 1882 | } |
@@ -2056,8 +2069,11 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
2056 | goto out; | 2069 | goto out; |
2057 | } else { | 2070 | } else { |
2058 | struct ceph_inode_info *ci = ceph_inode(req->r_inode); | 2071 | struct ceph_inode_info *ci = ceph_inode(req->r_inode); |
2059 | struct ceph_cap *cap = | 2072 | struct ceph_cap *cap = NULL; |
2060 | ceph_get_cap_for_mds(ci, req->r_mds);; | 2073 | |
2074 | if (req->r_session) | ||
2075 | cap = ceph_get_cap_for_mds(ci, | ||
2076 | req->r_session->s_mds); | ||
2061 | 2077 | ||
2062 | dout("already using auth"); | 2078 | dout("already using auth"); |
2063 | if ((!cap || cap != ci->i_auth_cap) || | 2079 | if ((!cap || cap != ci->i_auth_cap) || |
@@ -2101,7 +2117,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
2101 | 2117 | ||
2102 | dout("handle_reply tid %lld result %d\n", tid, result); | 2118 | dout("handle_reply tid %lld result %d\n", tid, result); |
2103 | rinfo = &req->r_reply_info; | 2119 | rinfo = &req->r_reply_info; |
2104 | err = parse_reply_info(msg, rinfo); | 2120 | err = parse_reply_info(msg, rinfo, session->s_con.peer_features); |
2105 | mutex_unlock(&mdsc->mutex); | 2121 | mutex_unlock(&mdsc->mutex); |
2106 | 2122 | ||
2107 | mutex_lock(&session->s_mutex); | 2123 | mutex_lock(&session->s_mutex); |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index aabe563b54db..4e3a9cc0bba6 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -35,6 +35,7 @@ struct ceph_cap; | |||
35 | */ | 35 | */ |
36 | struct ceph_mds_reply_info_in { | 36 | struct ceph_mds_reply_info_in { |
37 | struct ceph_mds_reply_inode *in; | 37 | struct ceph_mds_reply_inode *in; |
38 | struct ceph_dir_layout dir_layout; | ||
38 | u32 symlink_len; | 39 | u32 symlink_len; |
39 | char *symlink; | 40 | char *symlink; |
40 | u32 xattr_len; | 41 | u32 xattr_len; |
@@ -165,7 +166,6 @@ struct ceph_mds_request { | |||
165 | struct ceph_mds_client *r_mdsc; | 166 | struct ceph_mds_client *r_mdsc; |
166 | 167 | ||
167 | int r_op; /* mds op code */ | 168 | int r_op; /* mds op code */ |
168 | int r_mds; | ||
169 | 169 | ||
170 | /* operation on what? */ | 170 | /* operation on what? */ |
171 | struct inode *r_inode; /* arg1 */ | 171 | struct inode *r_inode; /* arg1 */ |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 08b460ae0539..bf6f0f34082a 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -428,7 +428,8 @@ struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, | |||
428 | goto fail; | 428 | goto fail; |
429 | } | 429 | } |
430 | fsc->client->extra_mon_dispatch = extra_mon_dispatch; | 430 | fsc->client->extra_mon_dispatch = extra_mon_dispatch; |
431 | fsc->client->supported_features |= CEPH_FEATURE_FLOCK; | 431 | fsc->client->supported_features |= CEPH_FEATURE_FLOCK | |
432 | CEPH_FEATURE_DIRLAYOUTHASH; | ||
432 | fsc->client->monc.want_mdsmap = 1; | 433 | fsc->client->monc.want_mdsmap = 1; |
433 | 434 | ||
434 | fsc->mount_options = fsopt; | 435 | fsc->mount_options = fsopt; |
@@ -443,13 +444,17 @@ struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, | |||
443 | goto fail_client; | 444 | goto fail_client; |
444 | 445 | ||
445 | err = -ENOMEM; | 446 | err = -ENOMEM; |
446 | fsc->wb_wq = create_workqueue("ceph-writeback"); | 447 | /* |
448 | * The number of concurrent works can be high but they don't need | ||
449 | * to be processed in parallel, limit concurrency. | ||
450 | */ | ||
451 | fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1); | ||
447 | if (fsc->wb_wq == NULL) | 452 | if (fsc->wb_wq == NULL) |
448 | goto fail_bdi; | 453 | goto fail_bdi; |
449 | fsc->pg_inv_wq = create_singlethread_workqueue("ceph-pg-invalid"); | 454 | fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1); |
450 | if (fsc->pg_inv_wq == NULL) | 455 | if (fsc->pg_inv_wq == NULL) |
451 | goto fail_wb_wq; | 456 | goto fail_wb_wq; |
452 | fsc->trunc_wq = create_singlethread_workqueue("ceph-trunc"); | 457 | fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1); |
453 | if (fsc->trunc_wq == NULL) | 458 | if (fsc->trunc_wq == NULL) |
454 | goto fail_pg_inv_wq; | 459 | goto fail_pg_inv_wq; |
455 | 460 | ||
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 4553d8829edb..20b907d76ae2 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -239,6 +239,7 @@ struct ceph_inode_info { | |||
239 | unsigned i_ceph_flags; | 239 | unsigned i_ceph_flags; |
240 | unsigned long i_release_count; | 240 | unsigned long i_release_count; |
241 | 241 | ||
242 | struct ceph_dir_layout i_dir_layout; | ||
242 | struct ceph_file_layout i_layout; | 243 | struct ceph_file_layout i_layout; |
243 | char *i_symlink; | 244 | char *i_symlink; |
244 | 245 | ||
@@ -768,6 +769,7 @@ extern void ceph_dentry_lru_add(struct dentry *dn); | |||
768 | extern void ceph_dentry_lru_touch(struct dentry *dn); | 769 | extern void ceph_dentry_lru_touch(struct dentry *dn); |
769 | extern void ceph_dentry_lru_del(struct dentry *dn); | 770 | extern void ceph_dentry_lru_del(struct dentry *dn); |
770 | extern void ceph_invalidate_dentry_lease(struct dentry *dentry); | 771 | extern void ceph_invalidate_dentry_lease(struct dentry *dentry); |
772 | extern unsigned ceph_dentry_hash(struct dentry *dn); | ||
771 | 773 | ||
772 | /* | 774 | /* |
773 | * our d_ops vary depending on whether the inode is live, | 775 | * our d_ops vary depending on whether the inode is live, |
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index c3c74aef289d..09dcc0c2ffd5 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h | |||
@@ -43,6 +43,10 @@ | |||
43 | #define CEPH_FEATURE_NOSRCADDR (1<<1) | 43 | #define CEPH_FEATURE_NOSRCADDR (1<<1) |
44 | #define CEPH_FEATURE_MONCLOCKCHECK (1<<2) | 44 | #define CEPH_FEATURE_MONCLOCKCHECK (1<<2) |
45 | #define CEPH_FEATURE_FLOCK (1<<3) | 45 | #define CEPH_FEATURE_FLOCK (1<<3) |
46 | #define CEPH_FEATURE_SUBSCRIBE2 (1<<4) | ||
47 | #define CEPH_FEATURE_MONNAMES (1<<5) | ||
48 | #define CEPH_FEATURE_RECONNECT_SEQ (1<<6) | ||
49 | #define CEPH_FEATURE_DIRLAYOUTHASH (1<<7) | ||
46 | 50 | ||
47 | 51 | ||
48 | /* | 52 | /* |
@@ -55,10 +59,10 @@ struct ceph_file_layout { | |||
55 | __le32 fl_stripe_count; /* over this many objects */ | 59 | __le32 fl_stripe_count; /* over this many objects */ |
56 | __le32 fl_object_size; /* until objects are this big, then move to | 60 | __le32 fl_object_size; /* until objects are this big, then move to |
57 | new objects */ | 61 | new objects */ |
58 | __le32 fl_cas_hash; /* 0 = none; 1 = sha256 */ | 62 | __le32 fl_cas_hash; /* UNUSED. 0 = none; 1 = sha256 */ |
59 | 63 | ||
60 | /* pg -> disk layout */ | 64 | /* pg -> disk layout */ |
61 | __le32 fl_object_stripe_unit; /* for per-object parity, if any */ | 65 | __le32 fl_object_stripe_unit; /* UNUSED. for per-object parity, if any */ |
62 | 66 | ||
63 | /* object -> pg layout */ | 67 | /* object -> pg layout */ |
64 | __le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */ | 68 | __le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */ |
@@ -69,6 +73,12 @@ struct ceph_file_layout { | |||
69 | 73 | ||
70 | int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); | 74 | int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); |
71 | 75 | ||
76 | struct ceph_dir_layout { | ||
77 | __u8 dl_dir_hash; /* see ceph_hash.h for ids */ | ||
78 | __u8 dl_unused1; | ||
79 | __u16 dl_unused2; | ||
80 | __u32 dl_unused3; | ||
81 | } __attribute__ ((packed)); | ||
72 | 82 | ||
73 | /* crypto algorithms */ | 83 | /* crypto algorithms */ |
74 | #define CEPH_CRYPTO_NONE 0x0 | 84 | #define CEPH_CRYPTO_NONE 0x0 |
@@ -457,7 +467,7 @@ struct ceph_mds_reply_inode { | |||
457 | struct ceph_timespec rctime; | 467 | struct ceph_timespec rctime; |
458 | struct ceph_frag_tree_head fragtree; /* (must be at end of struct) */ | 468 | struct ceph_frag_tree_head fragtree; /* (must be at end of struct) */ |
459 | } __attribute__ ((packed)); | 469 | } __attribute__ ((packed)); |
460 | /* followed by frag array, then symlink string, then xattr blob */ | 470 | /* followed by frag array, symlink string, dir layout, xattr blob */ |
461 | 471 | ||
462 | /* reply_lease follows dname, and reply_inode */ | 472 | /* reply_lease follows dname, and reply_inode */ |
463 | struct ceph_mds_reply_lease { | 473 | struct ceph_mds_reply_lease { |
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index a108b425fee2..c3011beac30d 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h | |||
@@ -110,17 +110,12 @@ struct ceph_msg_pos { | |||
110 | 110 | ||
111 | /* | 111 | /* |
112 | * ceph_connection state bit flags | 112 | * ceph_connection state bit flags |
113 | * | ||
114 | * QUEUED and BUSY are used together to ensure that only a single | ||
115 | * thread is currently opening, reading or writing data to the socket. | ||
116 | */ | 113 | */ |
117 | #define LOSSYTX 0 /* we can close channel or drop messages on errors */ | 114 | #define LOSSYTX 0 /* we can close channel or drop messages on errors */ |
118 | #define CONNECTING 1 | 115 | #define CONNECTING 1 |
119 | #define NEGOTIATING 2 | 116 | #define NEGOTIATING 2 |
120 | #define KEEPALIVE_PENDING 3 | 117 | #define KEEPALIVE_PENDING 3 |
121 | #define WRITE_PENDING 4 /* we have data ready to send */ | 118 | #define WRITE_PENDING 4 /* we have data ready to send */ |
122 | #define QUEUED 5 /* there is work queued on this connection */ | ||
123 | #define BUSY 6 /* work is being done */ | ||
124 | #define STANDBY 8 /* no outgoing messages, socket closed. we keep | 119 | #define STANDBY 8 /* no outgoing messages, socket closed. we keep |
125 | * the ceph_connection around to maintain shared | 120 | * the ceph_connection around to maintain shared |
126 | * state with the peer. */ | 121 | * state with the peer. */ |
diff --git a/net/ceph/ceph_hash.c b/net/ceph/ceph_hash.c index 815ef8826796..0a1b53bce76d 100644 --- a/net/ceph/ceph_hash.c +++ b/net/ceph/ceph_hash.c | |||
@@ -1,5 +1,6 @@ | |||
1 | 1 | ||
2 | #include <linux/ceph/types.h> | 2 | #include <linux/ceph/types.h> |
3 | #include <linux/module.h> | ||
3 | 4 | ||
4 | /* | 5 | /* |
5 | * Robert Jenkin's hash function. | 6 | * Robert Jenkin's hash function. |
@@ -104,6 +105,7 @@ unsigned ceph_str_hash(int type, const char *s, unsigned len) | |||
104 | return -1; | 105 | return -1; |
105 | } | 106 | } |
106 | } | 107 | } |
108 | EXPORT_SYMBOL(ceph_str_hash); | ||
107 | 109 | ||
108 | const char *ceph_str_hash_name(int type) | 110 | const char *ceph_str_hash_name(int type) |
109 | { | 111 | { |
@@ -116,3 +118,4 @@ const char *ceph_str_hash_name(int type) | |||
116 | return "unknown"; | 118 | return "unknown"; |
117 | } | 119 | } |
118 | } | 120 | } |
121 | EXPORT_SYMBOL(ceph_str_hash_name); | ||
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index b6ff4a1519ab..dff633d62e5b 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
@@ -96,7 +96,7 @@ struct workqueue_struct *ceph_msgr_wq; | |||
96 | 96 | ||
97 | int ceph_msgr_init(void) | 97 | int ceph_msgr_init(void) |
98 | { | 98 | { |
99 | ceph_msgr_wq = create_workqueue("ceph-msgr"); | 99 | ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0); |
100 | if (!ceph_msgr_wq) { | 100 | if (!ceph_msgr_wq) { |
101 | pr_err("msgr_init failed to create workqueue\n"); | 101 | pr_err("msgr_init failed to create workqueue\n"); |
102 | return -ENOMEM; | 102 | return -ENOMEM; |
@@ -1920,20 +1920,6 @@ bad_tag: | |||
1920 | /* | 1920 | /* |
1921 | * Atomically queue work on a connection. Bump @con reference to | 1921 | * Atomically queue work on a connection. Bump @con reference to |
1922 | * avoid races with connection teardown. | 1922 | * avoid races with connection teardown. |
1923 | * | ||
1924 | * There is some trickery going on with QUEUED and BUSY because we | ||
1925 | * only want a _single_ thread operating on each connection at any | ||
1926 | * point in time, but we want to use all available CPUs. | ||
1927 | * | ||
1928 | * The worker thread only proceeds if it can atomically set BUSY. It | ||
1929 | * clears QUEUED and does it's thing. When it thinks it's done, it | ||
1930 | * clears BUSY, then rechecks QUEUED.. if it's set again, it loops | ||
1931 | * (tries again to set BUSY). | ||
1932 | * | ||
1933 | * To queue work, we first set QUEUED, _then_ if BUSY isn't set, we | ||
1934 | * try to queue work. If that fails (work is already queued, or BUSY) | ||
1935 | * we give up (work also already being done or is queued) but leave QUEUED | ||
1936 | * set so that the worker thread will loop if necessary. | ||
1937 | */ | 1923 | */ |
1938 | static void queue_con(struct ceph_connection *con) | 1924 | static void queue_con(struct ceph_connection *con) |
1939 | { | 1925 | { |
@@ -1948,11 +1934,7 @@ static void queue_con(struct ceph_connection *con) | |||
1948 | return; | 1934 | return; |
1949 | } | 1935 | } |
1950 | 1936 | ||
1951 | set_bit(QUEUED, &con->state); | 1937 | if (!queue_delayed_work(ceph_msgr_wq, &con->work, 0)) { |
1952 | if (test_bit(BUSY, &con->state)) { | ||
1953 | dout("queue_con %p - already BUSY\n", con); | ||
1954 | con->ops->put(con); | ||
1955 | } else if (!queue_work(ceph_msgr_wq, &con->work.work)) { | ||
1956 | dout("queue_con %p - already queued\n", con); | 1938 | dout("queue_con %p - already queued\n", con); |
1957 | con->ops->put(con); | 1939 | con->ops->put(con); |
1958 | } else { | 1940 | } else { |
@@ -1967,15 +1949,6 @@ static void con_work(struct work_struct *work) | |||
1967 | { | 1949 | { |
1968 | struct ceph_connection *con = container_of(work, struct ceph_connection, | 1950 | struct ceph_connection *con = container_of(work, struct ceph_connection, |
1969 | work.work); | 1951 | work.work); |
1970 | int backoff = 0; | ||
1971 | |||
1972 | more: | ||
1973 | if (test_and_set_bit(BUSY, &con->state) != 0) { | ||
1974 | dout("con_work %p BUSY already set\n", con); | ||
1975 | goto out; | ||
1976 | } | ||
1977 | dout("con_work %p start, clearing QUEUED\n", con); | ||
1978 | clear_bit(QUEUED, &con->state); | ||
1979 | 1952 | ||
1980 | mutex_lock(&con->mutex); | 1953 | mutex_lock(&con->mutex); |
1981 | 1954 | ||
@@ -1994,28 +1967,13 @@ more: | |||
1994 | try_read(con) < 0 || | 1967 | try_read(con) < 0 || |
1995 | try_write(con) < 0) { | 1968 | try_write(con) < 0) { |
1996 | mutex_unlock(&con->mutex); | 1969 | mutex_unlock(&con->mutex); |
1997 | backoff = 1; | ||
1998 | ceph_fault(con); /* error/fault path */ | 1970 | ceph_fault(con); /* error/fault path */ |
1999 | goto done_unlocked; | 1971 | goto done_unlocked; |
2000 | } | 1972 | } |
2001 | 1973 | ||
2002 | done: | 1974 | done: |
2003 | mutex_unlock(&con->mutex); | 1975 | mutex_unlock(&con->mutex); |
2004 | |||
2005 | done_unlocked: | 1976 | done_unlocked: |
2006 | clear_bit(BUSY, &con->state); | ||
2007 | dout("con->state=%lu\n", con->state); | ||
2008 | if (test_bit(QUEUED, &con->state)) { | ||
2009 | if (!backoff || test_bit(OPENING, &con->state)) { | ||
2010 | dout("con_work %p QUEUED reset, looping\n", con); | ||
2011 | goto more; | ||
2012 | } | ||
2013 | dout("con_work %p QUEUED reset, but just faulted\n", con); | ||
2014 | clear_bit(QUEUED, &con->state); | ||
2015 | } | ||
2016 | dout("con_work %p done\n", con); | ||
2017 | |||
2018 | out: | ||
2019 | con->ops->put(con); | 1977 | con->ops->put(con); |
2020 | } | 1978 | } |
2021 | 1979 | ||
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index d73f3f6efa36..71603ac3dff5 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c | |||
@@ -605,8 +605,10 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) | |||
605 | goto bad; | 605 | goto bad; |
606 | } | 606 | } |
607 | err = __decode_pool(p, end, pi); | 607 | err = __decode_pool(p, end, pi); |
608 | if (err < 0) | 608 | if (err < 0) { |
609 | kfree(pi); | ||
609 | goto bad; | 610 | goto bad; |
611 | } | ||
610 | __insert_pg_pool(&map->pg_pools, pi); | 612 | __insert_pg_pool(&map->pg_pools, pi); |
611 | } | 613 | } |
612 | 614 | ||