diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-05 13:43:22 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-05 13:43:22 -0500 |
commit | fb62c00a6d8942775abc23d1621db1252e2d93d1 (patch) | |
tree | ef8760123f5a2b692126ecb7a70f2689053885c1 | |
parent | 5c4b4be3b6b937256103a5ae49177e0c3a17cb8f (diff) | |
parent | 455cec0abff563574cca432ced49f734117ca113 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
ceph: no .snap inside of snapped namespace
libceph: fix msgr standby handling
libceph: fix msgr keepalive flag
libceph: fix msgr backoff
libceph: retry after authorization failure
libceph: fix handling of short returns from get_user_pages
ceph: do not clear I_COMPLETE from d_release
ceph: do not set I_COMPLETE
Revert "ceph: keep reference to parent inode on ceph_dentry"
-rw-r--r-- | fs/ceph/dir.c | 28 | ||||
-rw-r--r-- | fs/ceph/inode.c | 2 | ||||
-rw-r--r-- | fs/ceph/super.h | 1 | ||||
-rw-r--r-- | include/linux/ceph/messenger.h | 2 | ||||
-rw-r--r-- | net/ceph/messenger.c | 71 | ||||
-rw-r--r-- | net/ceph/pagevec.c | 18 |
6 files changed, 72 insertions, 50 deletions
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index f0aef787a102..099a58615b90 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -60,7 +60,6 @@ int ceph_init_dentry(struct dentry *dentry) | |||
60 | } | 60 | } |
61 | di->dentry = dentry; | 61 | di->dentry = dentry; |
62 | di->lease_session = NULL; | 62 | di->lease_session = NULL; |
63 | di->parent_inode = igrab(dentry->d_parent->d_inode); | ||
64 | dentry->d_fsdata = di; | 63 | dentry->d_fsdata = di; |
65 | dentry->d_time = jiffies; | 64 | dentry->d_time = jiffies; |
66 | ceph_dentry_lru_add(dentry); | 65 | ceph_dentry_lru_add(dentry); |
@@ -410,7 +409,7 @@ more: | |||
410 | spin_lock(&inode->i_lock); | 409 | spin_lock(&inode->i_lock); |
411 | if (ci->i_release_count == fi->dir_release_count) { | 410 | if (ci->i_release_count == fi->dir_release_count) { |
412 | dout(" marking %p complete\n", inode); | 411 | dout(" marking %p complete\n", inode); |
413 | ci->i_ceph_flags |= CEPH_I_COMPLETE; | 412 | /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */ |
414 | ci->i_max_offset = filp->f_pos; | 413 | ci->i_max_offset = filp->f_pos; |
415 | } | 414 | } |
416 | spin_unlock(&inode->i_lock); | 415 | spin_unlock(&inode->i_lock); |
@@ -497,6 +496,7 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, | |||
497 | 496 | ||
498 | /* .snap dir? */ | 497 | /* .snap dir? */ |
499 | if (err == -ENOENT && | 498 | if (err == -ENOENT && |
499 | ceph_snap(parent) == CEPH_NOSNAP && | ||
500 | strcmp(dentry->d_name.name, | 500 | strcmp(dentry->d_name.name, |
501 | fsc->mount_options->snapdir_name) == 0) { | 501 | fsc->mount_options->snapdir_name) == 0) { |
502 | struct inode *inode = ceph_get_snapdir(parent); | 502 | struct inode *inode = ceph_get_snapdir(parent); |
@@ -1030,28 +1030,8 @@ out_touch: | |||
1030 | static void ceph_dentry_release(struct dentry *dentry) | 1030 | static void ceph_dentry_release(struct dentry *dentry) |
1031 | { | 1031 | { |
1032 | struct ceph_dentry_info *di = ceph_dentry(dentry); | 1032 | struct ceph_dentry_info *di = ceph_dentry(dentry); |
1033 | struct inode *parent_inode = NULL; | ||
1034 | u64 snapid = CEPH_NOSNAP; | ||
1035 | 1033 | ||
1036 | if (!IS_ROOT(dentry)) { | 1034 | dout("dentry_release %p\n", dentry); |
1037 | parent_inode = di->parent_inode; | ||
1038 | if (parent_inode) | ||
1039 | snapid = ceph_snap(parent_inode); | ||
1040 | } | ||
1041 | dout("dentry_release %p parent %p\n", dentry, parent_inode); | ||
1042 | if (parent_inode && snapid != CEPH_SNAPDIR) { | ||
1043 | struct ceph_inode_info *ci = ceph_inode(parent_inode); | ||
1044 | |||
1045 | spin_lock(&parent_inode->i_lock); | ||
1046 | if (ci->i_shared_gen == di->lease_shared_gen || | ||
1047 | snapid <= CEPH_MAXSNAP) { | ||
1048 | dout(" clearing %p complete (d_release)\n", | ||
1049 | parent_inode); | ||
1050 | ci->i_ceph_flags &= ~CEPH_I_COMPLETE; | ||
1051 | ci->i_release_count++; | ||
1052 | } | ||
1053 | spin_unlock(&parent_inode->i_lock); | ||
1054 | } | ||
1055 | if (di) { | 1035 | if (di) { |
1056 | ceph_dentry_lru_del(dentry); | 1036 | ceph_dentry_lru_del(dentry); |
1057 | if (di->lease_session) | 1037 | if (di->lease_session) |
@@ -1059,8 +1039,6 @@ static void ceph_dentry_release(struct dentry *dentry) | |||
1059 | kmem_cache_free(ceph_dentry_cachep, di); | 1039 | kmem_cache_free(ceph_dentry_cachep, di); |
1060 | dentry->d_fsdata = NULL; | 1040 | dentry->d_fsdata = NULL; |
1061 | } | 1041 | } |
1062 | if (parent_inode) | ||
1063 | iput(parent_inode); | ||
1064 | } | 1042 | } |
1065 | 1043 | ||
1066 | static int ceph_snapdir_d_revalidate(struct dentry *dentry, | 1044 | static int ceph_snapdir_d_revalidate(struct dentry *dentry, |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 5625463aa479..193bfa5e9cbd 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -707,7 +707,7 @@ static int fill_inode(struct inode *inode, | |||
707 | (issued & CEPH_CAP_FILE_EXCL) == 0 && | 707 | (issued & CEPH_CAP_FILE_EXCL) == 0 && |
708 | (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { | 708 | (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { |
709 | dout(" marking %p complete (empty)\n", inode); | 709 | dout(" marking %p complete (empty)\n", inode); |
710 | ci->i_ceph_flags |= CEPH_I_COMPLETE; | 710 | /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */ |
711 | ci->i_max_offset = 2; | 711 | ci->i_max_offset = 2; |
712 | } | 712 | } |
713 | break; | 713 | break; |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 88fcaa21b801..20b907d76ae2 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -207,7 +207,6 @@ struct ceph_dentry_info { | |||
207 | struct dentry *dentry; | 207 | struct dentry *dentry; |
208 | u64 time; | 208 | u64 time; |
209 | u64 offset; | 209 | u64 offset; |
210 | struct inode *parent_inode; | ||
211 | }; | 210 | }; |
212 | 211 | ||
213 | struct ceph_inode_xattrs_info { | 212 | struct ceph_inode_xattrs_info { |
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index c3011beac30d..31d91a64838b 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h | |||
@@ -123,6 +123,7 @@ struct ceph_msg_pos { | |||
123 | #define SOCK_CLOSED 11 /* socket state changed to closed */ | 123 | #define SOCK_CLOSED 11 /* socket state changed to closed */ |
124 | #define OPENING 13 /* open connection w/ (possibly new) peer */ | 124 | #define OPENING 13 /* open connection w/ (possibly new) peer */ |
125 | #define DEAD 14 /* dead, about to kfree */ | 125 | #define DEAD 14 /* dead, about to kfree */ |
126 | #define BACKOFF 15 | ||
126 | 127 | ||
127 | /* | 128 | /* |
128 | * A single connection with another host. | 129 | * A single connection with another host. |
@@ -160,7 +161,6 @@ struct ceph_connection { | |||
160 | struct list_head out_queue; | 161 | struct list_head out_queue; |
161 | struct list_head out_sent; /* sending or sent but unacked */ | 162 | struct list_head out_sent; /* sending or sent but unacked */ |
162 | u64 out_seq; /* last message queued for send */ | 163 | u64 out_seq; /* last message queued for send */ |
163 | bool out_keepalive_pending; | ||
164 | 164 | ||
165 | u64 in_seq, in_seq_acked; /* last message received, acked */ | 165 | u64 in_seq, in_seq_acked; /* last message received, acked */ |
166 | 166 | ||
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 35b36b86d762..05f357828a2f 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
@@ -336,7 +336,6 @@ static void reset_connection(struct ceph_connection *con) | |||
336 | ceph_msg_put(con->out_msg); | 336 | ceph_msg_put(con->out_msg); |
337 | con->out_msg = NULL; | 337 | con->out_msg = NULL; |
338 | } | 338 | } |
339 | con->out_keepalive_pending = false; | ||
340 | con->in_seq = 0; | 339 | con->in_seq = 0; |
341 | con->in_seq_acked = 0; | 340 | con->in_seq_acked = 0; |
342 | } | 341 | } |
@@ -1248,8 +1247,6 @@ static int process_connect(struct ceph_connection *con) | |||
1248 | con->auth_retry); | 1247 | con->auth_retry); |
1249 | if (con->auth_retry == 2) { | 1248 | if (con->auth_retry == 2) { |
1250 | con->error_msg = "connect authorization failure"; | 1249 | con->error_msg = "connect authorization failure"; |
1251 | reset_connection(con); | ||
1252 | set_bit(CLOSED, &con->state); | ||
1253 | return -1; | 1250 | return -1; |
1254 | } | 1251 | } |
1255 | con->auth_retry = 1; | 1252 | con->auth_retry = 1; |
@@ -1715,14 +1712,6 @@ more: | |||
1715 | 1712 | ||
1716 | /* open the socket first? */ | 1713 | /* open the socket first? */ |
1717 | if (con->sock == NULL) { | 1714 | if (con->sock == NULL) { |
1718 | /* | ||
1719 | * if we were STANDBY and are reconnecting _this_ | ||
1720 | * connection, bump connect_seq now. Always bump | ||
1721 | * global_seq. | ||
1722 | */ | ||
1723 | if (test_and_clear_bit(STANDBY, &con->state)) | ||
1724 | con->connect_seq++; | ||
1725 | |||
1726 | prepare_write_banner(msgr, con); | 1715 | prepare_write_banner(msgr, con); |
1727 | prepare_write_connect(msgr, con, 1); | 1716 | prepare_write_connect(msgr, con, 1); |
1728 | prepare_read_banner(con); | 1717 | prepare_read_banner(con); |
@@ -1951,7 +1940,24 @@ static void con_work(struct work_struct *work) | |||
1951 | work.work); | 1940 | work.work); |
1952 | 1941 | ||
1953 | mutex_lock(&con->mutex); | 1942 | mutex_lock(&con->mutex); |
1943 | if (test_and_clear_bit(BACKOFF, &con->state)) { | ||
1944 | dout("con_work %p backing off\n", con); | ||
1945 | if (queue_delayed_work(ceph_msgr_wq, &con->work, | ||
1946 | round_jiffies_relative(con->delay))) { | ||
1947 | dout("con_work %p backoff %lu\n", con, con->delay); | ||
1948 | mutex_unlock(&con->mutex); | ||
1949 | return; | ||
1950 | } else { | ||
1951 | con->ops->put(con); | ||
1952 | dout("con_work %p FAILED to back off %lu\n", con, | ||
1953 | con->delay); | ||
1954 | } | ||
1955 | } | ||
1954 | 1956 | ||
1957 | if (test_bit(STANDBY, &con->state)) { | ||
1958 | dout("con_work %p STANDBY\n", con); | ||
1959 | goto done; | ||
1960 | } | ||
1955 | if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ | 1961 | if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ |
1956 | dout("con_work CLOSED\n"); | 1962 | dout("con_work CLOSED\n"); |
1957 | con_close_socket(con); | 1963 | con_close_socket(con); |
@@ -2008,10 +2014,12 @@ static void ceph_fault(struct ceph_connection *con) | |||
2008 | /* Requeue anything that hasn't been acked */ | 2014 | /* Requeue anything that hasn't been acked */ |
2009 | list_splice_init(&con->out_sent, &con->out_queue); | 2015 | list_splice_init(&con->out_sent, &con->out_queue); |
2010 | 2016 | ||
2011 | /* If there are no messages in the queue, place the connection | 2017 | /* If there are no messages queued or keepalive pending, place |
2012 | * in a STANDBY state (i.e., don't try to reconnect just yet). */ | 2018 | * the connection in a STANDBY state */ |
2013 | if (list_empty(&con->out_queue) && !con->out_keepalive_pending) { | 2019 | if (list_empty(&con->out_queue) && |
2014 | dout("fault setting STANDBY\n"); | 2020 | !test_bit(KEEPALIVE_PENDING, &con->state)) { |
2021 | dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con); | ||
2022 | clear_bit(WRITE_PENDING, &con->state); | ||
2015 | set_bit(STANDBY, &con->state); | 2023 | set_bit(STANDBY, &con->state); |
2016 | } else { | 2024 | } else { |
2017 | /* retry after a delay. */ | 2025 | /* retry after a delay. */ |
@@ -2019,11 +2027,24 @@ static void ceph_fault(struct ceph_connection *con) | |||
2019 | con->delay = BASE_DELAY_INTERVAL; | 2027 | con->delay = BASE_DELAY_INTERVAL; |
2020 | else if (con->delay < MAX_DELAY_INTERVAL) | 2028 | else if (con->delay < MAX_DELAY_INTERVAL) |
2021 | con->delay *= 2; | 2029 | con->delay *= 2; |
2022 | dout("fault queueing %p delay %lu\n", con, con->delay); | ||
2023 | con->ops->get(con); | 2030 | con->ops->get(con); |
2024 | if (queue_delayed_work(ceph_msgr_wq, &con->work, | 2031 | if (queue_delayed_work(ceph_msgr_wq, &con->work, |
2025 | round_jiffies_relative(con->delay)) == 0) | 2032 | round_jiffies_relative(con->delay))) { |
2033 | dout("fault queued %p delay %lu\n", con, con->delay); | ||
2034 | } else { | ||
2026 | con->ops->put(con); | 2035 | con->ops->put(con); |
2036 | dout("fault failed to queue %p delay %lu, backoff\n", | ||
2037 | con, con->delay); | ||
2038 | /* | ||
2039 | * In many cases we see a socket state change | ||
2040 | * while con_work is running and end up | ||
2041 | * queuing (non-delayed) work, such that we | ||
2042 | * can't backoff with a delay. Set a flag so | ||
2043 | * that when con_work restarts we schedule the | ||
2044 | * delay then. | ||
2045 | */ | ||
2046 | set_bit(BACKOFF, &con->state); | ||
2047 | } | ||
2027 | } | 2048 | } |
2028 | 2049 | ||
2029 | out_unlock: | 2050 | out_unlock: |
@@ -2094,6 +2115,19 @@ void ceph_messenger_destroy(struct ceph_messenger *msgr) | |||
2094 | } | 2115 | } |
2095 | EXPORT_SYMBOL(ceph_messenger_destroy); | 2116 | EXPORT_SYMBOL(ceph_messenger_destroy); |
2096 | 2117 | ||
2118 | static void clear_standby(struct ceph_connection *con) | ||
2119 | { | ||
2120 | /* come back from STANDBY? */ | ||
2121 | if (test_and_clear_bit(STANDBY, &con->state)) { | ||
2122 | mutex_lock(&con->mutex); | ||
2123 | dout("clear_standby %p and ++connect_seq\n", con); | ||
2124 | con->connect_seq++; | ||
2125 | WARN_ON(test_bit(WRITE_PENDING, &con->state)); | ||
2126 | WARN_ON(test_bit(KEEPALIVE_PENDING, &con->state)); | ||
2127 | mutex_unlock(&con->mutex); | ||
2128 | } | ||
2129 | } | ||
2130 | |||
2097 | /* | 2131 | /* |
2098 | * Queue up an outgoing message on the given connection. | 2132 | * Queue up an outgoing message on the given connection. |
2099 | */ | 2133 | */ |
@@ -2126,6 +2160,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) | |||
2126 | 2160 | ||
2127 | /* if there wasn't anything waiting to send before, queue | 2161 | /* if there wasn't anything waiting to send before, queue |
2128 | * new work */ | 2162 | * new work */ |
2163 | clear_standby(con); | ||
2129 | if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) | 2164 | if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) |
2130 | queue_con(con); | 2165 | queue_con(con); |
2131 | } | 2166 | } |
@@ -2191,6 +2226,8 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg) | |||
2191 | */ | 2226 | */ |
2192 | void ceph_con_keepalive(struct ceph_connection *con) | 2227 | void ceph_con_keepalive(struct ceph_connection *con) |
2193 | { | 2228 | { |
2229 | dout("con_keepalive %p\n", con); | ||
2230 | clear_standby(con); | ||
2194 | if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 && | 2231 | if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 && |
2195 | test_and_set_bit(WRITE_PENDING, &con->state) == 0) | 2232 | test_and_set_bit(WRITE_PENDING, &con->state) == 0) |
2196 | queue_con(con); | 2233 | queue_con(con); |
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index 1a040e64c69f..cd9c21df87d1 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c | |||
@@ -16,22 +16,30 @@ struct page **ceph_get_direct_page_vector(const char __user *data, | |||
16 | int num_pages, bool write_page) | 16 | int num_pages, bool write_page) |
17 | { | 17 | { |
18 | struct page **pages; | 18 | struct page **pages; |
19 | int rc; | 19 | int got = 0; |
20 | int rc = 0; | ||
20 | 21 | ||
21 | pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); | 22 | pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); |
22 | if (!pages) | 23 | if (!pages) |
23 | return ERR_PTR(-ENOMEM); | 24 | return ERR_PTR(-ENOMEM); |
24 | 25 | ||
25 | down_read(¤t->mm->mmap_sem); | 26 | down_read(¤t->mm->mmap_sem); |
26 | rc = get_user_pages(current, current->mm, (unsigned long)data, | 27 | while (got < num_pages) { |
27 | num_pages, write_page, 0, pages, NULL); | 28 | rc = get_user_pages(current, current->mm, |
29 | (unsigned long)data + ((unsigned long)got * PAGE_SIZE), | ||
30 | num_pages - got, write_page, 0, pages + got, NULL); | ||
31 | if (rc < 0) | ||
32 | break; | ||
33 | BUG_ON(rc == 0); | ||
34 | got += rc; | ||
35 | } | ||
28 | up_read(¤t->mm->mmap_sem); | 36 | up_read(¤t->mm->mmap_sem); |
29 | if (rc < num_pages) | 37 | if (rc < 0) |
30 | goto fail; | 38 | goto fail; |
31 | return pages; | 39 | return pages; |
32 | 40 | ||
33 | fail: | 41 | fail: |
34 | ceph_put_page_vector(pages, rc > 0 ? rc : 0, false); | 42 | ceph_put_page_vector(pages, got, false); |
35 | return ERR_PTR(rc); | 43 | return ERR_PTR(rc); |
36 | } | 44 | } |
37 | EXPORT_SYMBOL(ceph_get_direct_page_vector); | 45 | EXPORT_SYMBOL(ceph_get_direct_page_vector); |