diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-05 13:43:22 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-05 13:43:22 -0500 |
| commit | fb62c00a6d8942775abc23d1621db1252e2d93d1 (patch) | |
| tree | ef8760123f5a2b692126ecb7a70f2689053885c1 | |
| parent | 5c4b4be3b6b937256103a5ae49177e0c3a17cb8f (diff) | |
| parent | 455cec0abff563574cca432ced49f734117ca113 (diff) | |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
ceph: no .snap inside of snapped namespace
libceph: fix msgr standby handling
libceph: fix msgr keepalive flag
libceph: fix msgr backoff
libceph: retry after authorization failure
libceph: fix handling of short returns from get_user_pages
ceph: do not clear I_COMPLETE from d_release
ceph: do not set I_COMPLETE
Revert "ceph: keep reference to parent inode on ceph_dentry"
| -rw-r--r-- | fs/ceph/dir.c | 28 | ||||
| -rw-r--r-- | fs/ceph/inode.c | 2 | ||||
| -rw-r--r-- | fs/ceph/super.h | 1 | ||||
| -rw-r--r-- | include/linux/ceph/messenger.h | 2 | ||||
| -rw-r--r-- | net/ceph/messenger.c | 71 | ||||
| -rw-r--r-- | net/ceph/pagevec.c | 18 |
6 files changed, 72 insertions, 50 deletions
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index f0aef787a10..099a58615b9 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
| @@ -60,7 +60,6 @@ int ceph_init_dentry(struct dentry *dentry) | |||
| 60 | } | 60 | } |
| 61 | di->dentry = dentry; | 61 | di->dentry = dentry; |
| 62 | di->lease_session = NULL; | 62 | di->lease_session = NULL; |
| 63 | di->parent_inode = igrab(dentry->d_parent->d_inode); | ||
| 64 | dentry->d_fsdata = di; | 63 | dentry->d_fsdata = di; |
| 65 | dentry->d_time = jiffies; | 64 | dentry->d_time = jiffies; |
| 66 | ceph_dentry_lru_add(dentry); | 65 | ceph_dentry_lru_add(dentry); |
| @@ -410,7 +409,7 @@ more: | |||
| 410 | spin_lock(&inode->i_lock); | 409 | spin_lock(&inode->i_lock); |
| 411 | if (ci->i_release_count == fi->dir_release_count) { | 410 | if (ci->i_release_count == fi->dir_release_count) { |
| 412 | dout(" marking %p complete\n", inode); | 411 | dout(" marking %p complete\n", inode); |
| 413 | ci->i_ceph_flags |= CEPH_I_COMPLETE; | 412 | /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */ |
| 414 | ci->i_max_offset = filp->f_pos; | 413 | ci->i_max_offset = filp->f_pos; |
| 415 | } | 414 | } |
| 416 | spin_unlock(&inode->i_lock); | 415 | spin_unlock(&inode->i_lock); |
| @@ -497,6 +496,7 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, | |||
| 497 | 496 | ||
| 498 | /* .snap dir? */ | 497 | /* .snap dir? */ |
| 499 | if (err == -ENOENT && | 498 | if (err == -ENOENT && |
| 499 | ceph_snap(parent) == CEPH_NOSNAP && | ||
| 500 | strcmp(dentry->d_name.name, | 500 | strcmp(dentry->d_name.name, |
| 501 | fsc->mount_options->snapdir_name) == 0) { | 501 | fsc->mount_options->snapdir_name) == 0) { |
| 502 | struct inode *inode = ceph_get_snapdir(parent); | 502 | struct inode *inode = ceph_get_snapdir(parent); |
| @@ -1030,28 +1030,8 @@ out_touch: | |||
| 1030 | static void ceph_dentry_release(struct dentry *dentry) | 1030 | static void ceph_dentry_release(struct dentry *dentry) |
| 1031 | { | 1031 | { |
| 1032 | struct ceph_dentry_info *di = ceph_dentry(dentry); | 1032 | struct ceph_dentry_info *di = ceph_dentry(dentry); |
| 1033 | struct inode *parent_inode = NULL; | ||
| 1034 | u64 snapid = CEPH_NOSNAP; | ||
| 1035 | 1033 | ||
| 1036 | if (!IS_ROOT(dentry)) { | 1034 | dout("dentry_release %p\n", dentry); |
| 1037 | parent_inode = di->parent_inode; | ||
| 1038 | if (parent_inode) | ||
| 1039 | snapid = ceph_snap(parent_inode); | ||
| 1040 | } | ||
| 1041 | dout("dentry_release %p parent %p\n", dentry, parent_inode); | ||
| 1042 | if (parent_inode && snapid != CEPH_SNAPDIR) { | ||
| 1043 | struct ceph_inode_info *ci = ceph_inode(parent_inode); | ||
| 1044 | |||
| 1045 | spin_lock(&parent_inode->i_lock); | ||
| 1046 | if (ci->i_shared_gen == di->lease_shared_gen || | ||
| 1047 | snapid <= CEPH_MAXSNAP) { | ||
| 1048 | dout(" clearing %p complete (d_release)\n", | ||
| 1049 | parent_inode); | ||
| 1050 | ci->i_ceph_flags &= ~CEPH_I_COMPLETE; | ||
| 1051 | ci->i_release_count++; | ||
| 1052 | } | ||
| 1053 | spin_unlock(&parent_inode->i_lock); | ||
| 1054 | } | ||
| 1055 | if (di) { | 1035 | if (di) { |
| 1056 | ceph_dentry_lru_del(dentry); | 1036 | ceph_dentry_lru_del(dentry); |
| 1057 | if (di->lease_session) | 1037 | if (di->lease_session) |
| @@ -1059,8 +1039,6 @@ static void ceph_dentry_release(struct dentry *dentry) | |||
| 1059 | kmem_cache_free(ceph_dentry_cachep, di); | 1039 | kmem_cache_free(ceph_dentry_cachep, di); |
| 1060 | dentry->d_fsdata = NULL; | 1040 | dentry->d_fsdata = NULL; |
| 1061 | } | 1041 | } |
| 1062 | if (parent_inode) | ||
| 1063 | iput(parent_inode); | ||
| 1064 | } | 1042 | } |
| 1065 | 1043 | ||
| 1066 | static int ceph_snapdir_d_revalidate(struct dentry *dentry, | 1044 | static int ceph_snapdir_d_revalidate(struct dentry *dentry, |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 5625463aa47..193bfa5e9cb 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
| @@ -707,7 +707,7 @@ static int fill_inode(struct inode *inode, | |||
| 707 | (issued & CEPH_CAP_FILE_EXCL) == 0 && | 707 | (issued & CEPH_CAP_FILE_EXCL) == 0 && |
| 708 | (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { | 708 | (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { |
| 709 | dout(" marking %p complete (empty)\n", inode); | 709 | dout(" marking %p complete (empty)\n", inode); |
| 710 | ci->i_ceph_flags |= CEPH_I_COMPLETE; | 710 | /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */ |
| 711 | ci->i_max_offset = 2; | 711 | ci->i_max_offset = 2; |
| 712 | } | 712 | } |
| 713 | break; | 713 | break; |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 88fcaa21b80..20b907d76ae 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
| @@ -207,7 +207,6 @@ struct ceph_dentry_info { | |||
| 207 | struct dentry *dentry; | 207 | struct dentry *dentry; |
| 208 | u64 time; | 208 | u64 time; |
| 209 | u64 offset; | 209 | u64 offset; |
| 210 | struct inode *parent_inode; | ||
| 211 | }; | 210 | }; |
| 212 | 211 | ||
| 213 | struct ceph_inode_xattrs_info { | 212 | struct ceph_inode_xattrs_info { |
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index c3011beac30..31d91a64838 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h | |||
| @@ -123,6 +123,7 @@ struct ceph_msg_pos { | |||
| 123 | #define SOCK_CLOSED 11 /* socket state changed to closed */ | 123 | #define SOCK_CLOSED 11 /* socket state changed to closed */ |
| 124 | #define OPENING 13 /* open connection w/ (possibly new) peer */ | 124 | #define OPENING 13 /* open connection w/ (possibly new) peer */ |
| 125 | #define DEAD 14 /* dead, about to kfree */ | 125 | #define DEAD 14 /* dead, about to kfree */ |
| 126 | #define BACKOFF 15 | ||
| 126 | 127 | ||
| 127 | /* | 128 | /* |
| 128 | * A single connection with another host. | 129 | * A single connection with another host. |
| @@ -160,7 +161,6 @@ struct ceph_connection { | |||
| 160 | struct list_head out_queue; | 161 | struct list_head out_queue; |
| 161 | struct list_head out_sent; /* sending or sent but unacked */ | 162 | struct list_head out_sent; /* sending or sent but unacked */ |
| 162 | u64 out_seq; /* last message queued for send */ | 163 | u64 out_seq; /* last message queued for send */ |
| 163 | bool out_keepalive_pending; | ||
| 164 | 164 | ||
| 165 | u64 in_seq, in_seq_acked; /* last message received, acked */ | 165 | u64 in_seq, in_seq_acked; /* last message received, acked */ |
| 166 | 166 | ||
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 35b36b86d76..05f357828a2 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
| @@ -336,7 +336,6 @@ static void reset_connection(struct ceph_connection *con) | |||
| 336 | ceph_msg_put(con->out_msg); | 336 | ceph_msg_put(con->out_msg); |
| 337 | con->out_msg = NULL; | 337 | con->out_msg = NULL; |
| 338 | } | 338 | } |
| 339 | con->out_keepalive_pending = false; | ||
| 340 | con->in_seq = 0; | 339 | con->in_seq = 0; |
| 341 | con->in_seq_acked = 0; | 340 | con->in_seq_acked = 0; |
| 342 | } | 341 | } |
| @@ -1248,8 +1247,6 @@ static int process_connect(struct ceph_connection *con) | |||
| 1248 | con->auth_retry); | 1247 | con->auth_retry); |
| 1249 | if (con->auth_retry == 2) { | 1248 | if (con->auth_retry == 2) { |
| 1250 | con->error_msg = "connect authorization failure"; | 1249 | con->error_msg = "connect authorization failure"; |
| 1251 | reset_connection(con); | ||
| 1252 | set_bit(CLOSED, &con->state); | ||
| 1253 | return -1; | 1250 | return -1; |
| 1254 | } | 1251 | } |
| 1255 | con->auth_retry = 1; | 1252 | con->auth_retry = 1; |
| @@ -1715,14 +1712,6 @@ more: | |||
| 1715 | 1712 | ||
| 1716 | /* open the socket first? */ | 1713 | /* open the socket first? */ |
| 1717 | if (con->sock == NULL) { | 1714 | if (con->sock == NULL) { |
| 1718 | /* | ||
| 1719 | * if we were STANDBY and are reconnecting _this_ | ||
| 1720 | * connection, bump connect_seq now. Always bump | ||
| 1721 | * global_seq. | ||
| 1722 | */ | ||
| 1723 | if (test_and_clear_bit(STANDBY, &con->state)) | ||
| 1724 | con->connect_seq++; | ||
| 1725 | |||
| 1726 | prepare_write_banner(msgr, con); | 1715 | prepare_write_banner(msgr, con); |
| 1727 | prepare_write_connect(msgr, con, 1); | 1716 | prepare_write_connect(msgr, con, 1); |
| 1728 | prepare_read_banner(con); | 1717 | prepare_read_banner(con); |
| @@ -1951,7 +1940,24 @@ static void con_work(struct work_struct *work) | |||
| 1951 | work.work); | 1940 | work.work); |
| 1952 | 1941 | ||
| 1953 | mutex_lock(&con->mutex); | 1942 | mutex_lock(&con->mutex); |
| 1943 | if (test_and_clear_bit(BACKOFF, &con->state)) { | ||
| 1944 | dout("con_work %p backing off\n", con); | ||
| 1945 | if (queue_delayed_work(ceph_msgr_wq, &con->work, | ||
| 1946 | round_jiffies_relative(con->delay))) { | ||
| 1947 | dout("con_work %p backoff %lu\n", con, con->delay); | ||
| 1948 | mutex_unlock(&con->mutex); | ||
| 1949 | return; | ||
| 1950 | } else { | ||
| 1951 | con->ops->put(con); | ||
| 1952 | dout("con_work %p FAILED to back off %lu\n", con, | ||
| 1953 | con->delay); | ||
| 1954 | } | ||
| 1955 | } | ||
| 1954 | 1956 | ||
| 1957 | if (test_bit(STANDBY, &con->state)) { | ||
| 1958 | dout("con_work %p STANDBY\n", con); | ||
| 1959 | goto done; | ||
| 1960 | } | ||
| 1955 | if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ | 1961 | if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ |
| 1956 | dout("con_work CLOSED\n"); | 1962 | dout("con_work CLOSED\n"); |
| 1957 | con_close_socket(con); | 1963 | con_close_socket(con); |
| @@ -2008,10 +2014,12 @@ static void ceph_fault(struct ceph_connection *con) | |||
| 2008 | /* Requeue anything that hasn't been acked */ | 2014 | /* Requeue anything that hasn't been acked */ |
| 2009 | list_splice_init(&con->out_sent, &con->out_queue); | 2015 | list_splice_init(&con->out_sent, &con->out_queue); |
| 2010 | 2016 | ||
| 2011 | /* If there are no messages in the queue, place the connection | 2017 | /* If there are no messages queued or keepalive pending, place |
| 2012 | * in a STANDBY state (i.e., don't try to reconnect just yet). */ | 2018 | * the connection in a STANDBY state */ |
| 2013 | if (list_empty(&con->out_queue) && !con->out_keepalive_pending) { | 2019 | if (list_empty(&con->out_queue) && |
| 2014 | dout("fault setting STANDBY\n"); | 2020 | !test_bit(KEEPALIVE_PENDING, &con->state)) { |
| 2021 | dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con); | ||
| 2022 | clear_bit(WRITE_PENDING, &con->state); | ||
| 2015 | set_bit(STANDBY, &con->state); | 2023 | set_bit(STANDBY, &con->state); |
| 2016 | } else { | 2024 | } else { |
| 2017 | /* retry after a delay. */ | 2025 | /* retry after a delay. */ |
| @@ -2019,11 +2027,24 @@ static void ceph_fault(struct ceph_connection *con) | |||
| 2019 | con->delay = BASE_DELAY_INTERVAL; | 2027 | con->delay = BASE_DELAY_INTERVAL; |
| 2020 | else if (con->delay < MAX_DELAY_INTERVAL) | 2028 | else if (con->delay < MAX_DELAY_INTERVAL) |
| 2021 | con->delay *= 2; | 2029 | con->delay *= 2; |
| 2022 | dout("fault queueing %p delay %lu\n", con, con->delay); | ||
| 2023 | con->ops->get(con); | 2030 | con->ops->get(con); |
| 2024 | if (queue_delayed_work(ceph_msgr_wq, &con->work, | 2031 | if (queue_delayed_work(ceph_msgr_wq, &con->work, |
| 2025 | round_jiffies_relative(con->delay)) == 0) | 2032 | round_jiffies_relative(con->delay))) { |
| 2033 | dout("fault queued %p delay %lu\n", con, con->delay); | ||
| 2034 | } else { | ||
| 2026 | con->ops->put(con); | 2035 | con->ops->put(con); |
| 2036 | dout("fault failed to queue %p delay %lu, backoff\n", | ||
| 2037 | con, con->delay); | ||
| 2038 | /* | ||
| 2039 | * In many cases we see a socket state change | ||
| 2040 | * while con_work is running and end up | ||
| 2041 | * queuing (non-delayed) work, such that we | ||
| 2042 | * can't backoff with a delay. Set a flag so | ||
| 2043 | * that when con_work restarts we schedule the | ||
| 2044 | * delay then. | ||
| 2045 | */ | ||
| 2046 | set_bit(BACKOFF, &con->state); | ||
| 2047 | } | ||
| 2027 | } | 2048 | } |
| 2028 | 2049 | ||
| 2029 | out_unlock: | 2050 | out_unlock: |
| @@ -2094,6 +2115,19 @@ void ceph_messenger_destroy(struct ceph_messenger *msgr) | |||
| 2094 | } | 2115 | } |
| 2095 | EXPORT_SYMBOL(ceph_messenger_destroy); | 2116 | EXPORT_SYMBOL(ceph_messenger_destroy); |
| 2096 | 2117 | ||
| 2118 | static void clear_standby(struct ceph_connection *con) | ||
| 2119 | { | ||
| 2120 | /* come back from STANDBY? */ | ||
| 2121 | if (test_and_clear_bit(STANDBY, &con->state)) { | ||
| 2122 | mutex_lock(&con->mutex); | ||
| 2123 | dout("clear_standby %p and ++connect_seq\n", con); | ||
| 2124 | con->connect_seq++; | ||
| 2125 | WARN_ON(test_bit(WRITE_PENDING, &con->state)); | ||
| 2126 | WARN_ON(test_bit(KEEPALIVE_PENDING, &con->state)); | ||
| 2127 | mutex_unlock(&con->mutex); | ||
| 2128 | } | ||
| 2129 | } | ||
| 2130 | |||
| 2097 | /* | 2131 | /* |
| 2098 | * Queue up an outgoing message on the given connection. | 2132 | * Queue up an outgoing message on the given connection. |
| 2099 | */ | 2133 | */ |
| @@ -2126,6 +2160,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) | |||
| 2126 | 2160 | ||
| 2127 | /* if there wasn't anything waiting to send before, queue | 2161 | /* if there wasn't anything waiting to send before, queue |
| 2128 | * new work */ | 2162 | * new work */ |
| 2163 | clear_standby(con); | ||
| 2129 | if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) | 2164 | if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) |
| 2130 | queue_con(con); | 2165 | queue_con(con); |
| 2131 | } | 2166 | } |
| @@ -2191,6 +2226,8 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg) | |||
| 2191 | */ | 2226 | */ |
| 2192 | void ceph_con_keepalive(struct ceph_connection *con) | 2227 | void ceph_con_keepalive(struct ceph_connection *con) |
| 2193 | { | 2228 | { |
| 2229 | dout("con_keepalive %p\n", con); | ||
| 2230 | clear_standby(con); | ||
| 2194 | if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 && | 2231 | if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 && |
| 2195 | test_and_set_bit(WRITE_PENDING, &con->state) == 0) | 2232 | test_and_set_bit(WRITE_PENDING, &con->state) == 0) |
| 2196 | queue_con(con); | 2233 | queue_con(con); |
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index 1a040e64c69..cd9c21df87d 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c | |||
| @@ -16,22 +16,30 @@ struct page **ceph_get_direct_page_vector(const char __user *data, | |||
| 16 | int num_pages, bool write_page) | 16 | int num_pages, bool write_page) |
| 17 | { | 17 | { |
| 18 | struct page **pages; | 18 | struct page **pages; |
| 19 | int rc; | 19 | int got = 0; |
| 20 | int rc = 0; | ||
| 20 | 21 | ||
| 21 | pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); | 22 | pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); |
| 22 | if (!pages) | 23 | if (!pages) |
| 23 | return ERR_PTR(-ENOMEM); | 24 | return ERR_PTR(-ENOMEM); |
| 24 | 25 | ||
| 25 | down_read(¤t->mm->mmap_sem); | 26 | down_read(¤t->mm->mmap_sem); |
| 26 | rc = get_user_pages(current, current->mm, (unsigned long)data, | 27 | while (got < num_pages) { |
| 27 | num_pages, write_page, 0, pages, NULL); | 28 | rc = get_user_pages(current, current->mm, |
| 29 | (unsigned long)data + ((unsigned long)got * PAGE_SIZE), | ||
| 30 | num_pages - got, write_page, 0, pages + got, NULL); | ||
| 31 | if (rc < 0) | ||
| 32 | break; | ||
| 33 | BUG_ON(rc == 0); | ||
| 34 | got += rc; | ||
| 35 | } | ||
| 28 | up_read(¤t->mm->mmap_sem); | 36 | up_read(¤t->mm->mmap_sem); |
| 29 | if (rc < num_pages) | 37 | if (rc < 0) |
| 30 | goto fail; | 38 | goto fail; |
| 31 | return pages; | 39 | return pages; |
| 32 | 40 | ||
| 33 | fail: | 41 | fail: |
| 34 | ceph_put_page_vector(pages, rc > 0 ? rc : 0, false); | 42 | ceph_put_page_vector(pages, got, false); |
| 35 | return ERR_PTR(rc); | 43 | return ERR_PTR(rc); |
| 36 | } | 44 | } |
| 37 | EXPORT_SYMBOL(ceph_get_direct_page_vector); | 45 | EXPORT_SYMBOL(ceph_get_direct_page_vector); |
