aboutsummaryrefslogtreecommitdiffstats
path: root/net/ceph/messenger.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ceph/messenger.c')
-rw-r--r--net/ceph/messenger.c133
1 files changed, 85 insertions, 48 deletions
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index dff633d62e5b..05f357828a2f 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -252,8 +252,12 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
252{ 252{
253 struct kvec iov = {buf, len}; 253 struct kvec iov = {buf, len};
254 struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; 254 struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
255 int r;
255 256
256 return kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags); 257 r = kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags);
258 if (r == -EAGAIN)
259 r = 0;
260 return r;
257} 261}
258 262
259/* 263/*
@@ -264,13 +268,17 @@ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
264 size_t kvlen, size_t len, int more) 268 size_t kvlen, size_t len, int more)
265{ 269{
266 struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; 270 struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
271 int r;
267 272
268 if (more) 273 if (more)
269 msg.msg_flags |= MSG_MORE; 274 msg.msg_flags |= MSG_MORE;
270 else 275 else
271 msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */ 276 msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */
272 277
273 return kernel_sendmsg(sock, &msg, iov, kvlen, len); 278 r = kernel_sendmsg(sock, &msg, iov, kvlen, len);
279 if (r == -EAGAIN)
280 r = 0;
281 return r;
274} 282}
275 283
276 284
@@ -328,7 +336,6 @@ static void reset_connection(struct ceph_connection *con)
328 ceph_msg_put(con->out_msg); 336 ceph_msg_put(con->out_msg);
329 con->out_msg = NULL; 337 con->out_msg = NULL;
330 } 338 }
331 con->out_keepalive_pending = false;
332 con->in_seq = 0; 339 con->in_seq = 0;
333 con->in_seq_acked = 0; 340 con->in_seq_acked = 0;
334} 341}
@@ -847,6 +854,8 @@ static int write_partial_msg_pages(struct ceph_connection *con)
847 (msg->pages || msg->pagelist || msg->bio || in_trail)) 854 (msg->pages || msg->pagelist || msg->bio || in_trail))
848 kunmap(page); 855 kunmap(page);
849 856
857 if (ret == -EAGAIN)
858 ret = 0;
850 if (ret <= 0) 859 if (ret <= 0)
851 goto out; 860 goto out;
852 861
@@ -1238,8 +1247,6 @@ static int process_connect(struct ceph_connection *con)
1238 con->auth_retry); 1247 con->auth_retry);
1239 if (con->auth_retry == 2) { 1248 if (con->auth_retry == 2) {
1240 con->error_msg = "connect authorization failure"; 1249 con->error_msg = "connect authorization failure";
1241 reset_connection(con);
1242 set_bit(CLOSED, &con->state);
1243 return -1; 1250 return -1;
1244 } 1251 }
1245 con->auth_retry = 1; 1252 con->auth_retry = 1;
@@ -1705,14 +1712,6 @@ more:
1705 1712
1706 /* open the socket first? */ 1713 /* open the socket first? */
1707 if (con->sock == NULL) { 1714 if (con->sock == NULL) {
1708 /*
1709 * if we were STANDBY and are reconnecting _this_
1710 * connection, bump connect_seq now. Always bump
1711 * global_seq.
1712 */
1713 if (test_and_clear_bit(STANDBY, &con->state))
1714 con->connect_seq++;
1715
1716 prepare_write_banner(msgr, con); 1715 prepare_write_banner(msgr, con);
1717 prepare_write_connect(msgr, con, 1); 1716 prepare_write_connect(msgr, con, 1);
1718 prepare_read_banner(con); 1717 prepare_read_banner(con);
@@ -1737,16 +1736,12 @@ more_kvec:
1737 if (con->out_skip) { 1736 if (con->out_skip) {
1738 ret = write_partial_skip(con); 1737 ret = write_partial_skip(con);
1739 if (ret <= 0) 1738 if (ret <= 0)
1740 goto done; 1739 goto out;
1741 if (ret < 0) {
1742 dout("try_write write_partial_skip err %d\n", ret);
1743 goto done;
1744 }
1745 } 1740 }
1746 if (con->out_kvec_left) { 1741 if (con->out_kvec_left) {
1747 ret = write_partial_kvec(con); 1742 ret = write_partial_kvec(con);
1748 if (ret <= 0) 1743 if (ret <= 0)
1749 goto done; 1744 goto out;
1750 } 1745 }
1751 1746
1752 /* msg pages? */ 1747 /* msg pages? */
@@ -1761,11 +1756,11 @@ more_kvec:
1761 if (ret == 1) 1756 if (ret == 1)
1762 goto more_kvec; /* we need to send the footer, too! */ 1757 goto more_kvec; /* we need to send the footer, too! */
1763 if (ret == 0) 1758 if (ret == 0)
1764 goto done; 1759 goto out;
1765 if (ret < 0) { 1760 if (ret < 0) {
1766 dout("try_write write_partial_msg_pages err %d\n", 1761 dout("try_write write_partial_msg_pages err %d\n",
1767 ret); 1762 ret);
1768 goto done; 1763 goto out;
1769 } 1764 }
1770 } 1765 }
1771 1766
@@ -1789,10 +1784,9 @@ do_next:
1789 /* Nothing to do! */ 1784 /* Nothing to do! */
1790 clear_bit(WRITE_PENDING, &con->state); 1785 clear_bit(WRITE_PENDING, &con->state);
1791 dout("try_write nothing else to write.\n"); 1786 dout("try_write nothing else to write.\n");
1792done:
1793 ret = 0; 1787 ret = 0;
1794out: 1788out:
1795 dout("try_write done on %p\n", con); 1789 dout("try_write done on %p ret %d\n", con, ret);
1796 return ret; 1790 return ret;
1797} 1791}
1798 1792
@@ -1821,19 +1815,17 @@ more:
1821 dout("try_read connecting\n"); 1815 dout("try_read connecting\n");
1822 ret = read_partial_banner(con); 1816 ret = read_partial_banner(con);
1823 if (ret <= 0) 1817 if (ret <= 0)
1824 goto done;
1825 if (process_banner(con) < 0) {
1826 ret = -1;
1827 goto out; 1818 goto out;
1828 } 1819 ret = process_banner(con);
1820 if (ret < 0)
1821 goto out;
1829 } 1822 }
1830 ret = read_partial_connect(con); 1823 ret = read_partial_connect(con);
1831 if (ret <= 0) 1824 if (ret <= 0)
1832 goto done;
1833 if (process_connect(con) < 0) {
1834 ret = -1;
1835 goto out; 1825 goto out;
1836 } 1826 ret = process_connect(con);
1827 if (ret < 0)
1828 goto out;
1837 goto more; 1829 goto more;
1838 } 1830 }
1839 1831
@@ -1848,7 +1840,7 @@ more:
1848 dout("skipping %d / %d bytes\n", skip, -con->in_base_pos); 1840 dout("skipping %d / %d bytes\n", skip, -con->in_base_pos);
1849 ret = ceph_tcp_recvmsg(con->sock, buf, skip); 1841 ret = ceph_tcp_recvmsg(con->sock, buf, skip);
1850 if (ret <= 0) 1842 if (ret <= 0)
1851 goto done; 1843 goto out;
1852 con->in_base_pos += ret; 1844 con->in_base_pos += ret;
1853 if (con->in_base_pos) 1845 if (con->in_base_pos)
1854 goto more; 1846 goto more;
@@ -1859,7 +1851,7 @@ more:
1859 */ 1851 */
1860 ret = ceph_tcp_recvmsg(con->sock, &con->in_tag, 1); 1852 ret = ceph_tcp_recvmsg(con->sock, &con->in_tag, 1);
1861 if (ret <= 0) 1853 if (ret <= 0)
1862 goto done; 1854 goto out;
1863 dout("try_read got tag %d\n", (int)con->in_tag); 1855 dout("try_read got tag %d\n", (int)con->in_tag);
1864 switch (con->in_tag) { 1856 switch (con->in_tag) {
1865 case CEPH_MSGR_TAG_MSG: 1857 case CEPH_MSGR_TAG_MSG:
@@ -1870,7 +1862,7 @@ more:
1870 break; 1862 break;
1871 case CEPH_MSGR_TAG_CLOSE: 1863 case CEPH_MSGR_TAG_CLOSE:
1872 set_bit(CLOSED, &con->state); /* fixme */ 1864 set_bit(CLOSED, &con->state); /* fixme */
1873 goto done; 1865 goto out;
1874 default: 1866 default:
1875 goto bad_tag; 1867 goto bad_tag;
1876 } 1868 }
@@ -1882,13 +1874,12 @@ more:
1882 case -EBADMSG: 1874 case -EBADMSG:
1883 con->error_msg = "bad crc"; 1875 con->error_msg = "bad crc";
1884 ret = -EIO; 1876 ret = -EIO;
1885 goto out; 1877 break;
1886 case -EIO: 1878 case -EIO:
1887 con->error_msg = "io error"; 1879 con->error_msg = "io error";
1888 goto out; 1880 break;
1889 default:
1890 goto done;
1891 } 1881 }
1882 goto out;
1892 } 1883 }
1893 if (con->in_tag == CEPH_MSGR_TAG_READY) 1884 if (con->in_tag == CEPH_MSGR_TAG_READY)
1894 goto more; 1885 goto more;
@@ -1898,15 +1889,13 @@ more:
1898 if (con->in_tag == CEPH_MSGR_TAG_ACK) { 1889 if (con->in_tag == CEPH_MSGR_TAG_ACK) {
1899 ret = read_partial_ack(con); 1890 ret = read_partial_ack(con);
1900 if (ret <= 0) 1891 if (ret <= 0)
1901 goto done; 1892 goto out;
1902 process_ack(con); 1893 process_ack(con);
1903 goto more; 1894 goto more;
1904 } 1895 }
1905 1896
1906done:
1907 ret = 0;
1908out: 1897out:
1909 dout("try_read done on %p\n", con); 1898 dout("try_read done on %p ret %d\n", con, ret);
1910 return ret; 1899 return ret;
1911 1900
1912bad_tag: 1901bad_tag:
@@ -1951,7 +1940,24 @@ static void con_work(struct work_struct *work)
1951 work.work); 1940 work.work);
1952 1941
1953 mutex_lock(&con->mutex); 1942 mutex_lock(&con->mutex);
1943 if (test_and_clear_bit(BACKOFF, &con->state)) {
1944 dout("con_work %p backing off\n", con);
1945 if (queue_delayed_work(ceph_msgr_wq, &con->work,
1946 round_jiffies_relative(con->delay))) {
1947 dout("con_work %p backoff %lu\n", con, con->delay);
1948 mutex_unlock(&con->mutex);
1949 return;
1950 } else {
1951 con->ops->put(con);
1952 dout("con_work %p FAILED to back off %lu\n", con,
1953 con->delay);
1954 }
1955 }
1954 1956
1957 if (test_bit(STANDBY, &con->state)) {
1958 dout("con_work %p STANDBY\n", con);
1959 goto done;
1960 }
1955 if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ 1961 if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */
1956 dout("con_work CLOSED\n"); 1962 dout("con_work CLOSED\n");
1957 con_close_socket(con); 1963 con_close_socket(con);
@@ -2008,10 +2014,12 @@ static void ceph_fault(struct ceph_connection *con)
2008 /* Requeue anything that hasn't been acked */ 2014 /* Requeue anything that hasn't been acked */
2009 list_splice_init(&con->out_sent, &con->out_queue); 2015 list_splice_init(&con->out_sent, &con->out_queue);
2010 2016
2011 /* If there are no messages in the queue, place the connection 2017 /* If there are no messages queued or keepalive pending, place
2012 * in a STANDBY state (i.e., don't try to reconnect just yet). */ 2018 * the connection in a STANDBY state */
2013 if (list_empty(&con->out_queue) && !con->out_keepalive_pending) { 2019 if (list_empty(&con->out_queue) &&
2014 dout("fault setting STANDBY\n"); 2020 !test_bit(KEEPALIVE_PENDING, &con->state)) {
2021 dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con);
2022 clear_bit(WRITE_PENDING, &con->state);
2015 set_bit(STANDBY, &con->state); 2023 set_bit(STANDBY, &con->state);
2016 } else { 2024 } else {
2017 /* retry after a delay. */ 2025 /* retry after a delay. */
@@ -2019,11 +2027,24 @@ static void ceph_fault(struct ceph_connection *con)
2019 con->delay = BASE_DELAY_INTERVAL; 2027 con->delay = BASE_DELAY_INTERVAL;
2020 else if (con->delay < MAX_DELAY_INTERVAL) 2028 else if (con->delay < MAX_DELAY_INTERVAL)
2021 con->delay *= 2; 2029 con->delay *= 2;
2022 dout("fault queueing %p delay %lu\n", con, con->delay);
2023 con->ops->get(con); 2030 con->ops->get(con);
2024 if (queue_delayed_work(ceph_msgr_wq, &con->work, 2031 if (queue_delayed_work(ceph_msgr_wq, &con->work,
2025 round_jiffies_relative(con->delay)) == 0) 2032 round_jiffies_relative(con->delay))) {
2033 dout("fault queued %p delay %lu\n", con, con->delay);
2034 } else {
2026 con->ops->put(con); 2035 con->ops->put(con);
2036 dout("fault failed to queue %p delay %lu, backoff\n",
2037 con, con->delay);
2038 /*
2039 * In many cases we see a socket state change
2040 * while con_work is running and end up
2041 * queuing (non-delayed) work, such that we
2042 * can't backoff with a delay. Set a flag so
2043 * that when con_work restarts we schedule the
2044 * delay then.
2045 */
2046 set_bit(BACKOFF, &con->state);
2047 }
2027 } 2048 }
2028 2049
2029out_unlock: 2050out_unlock:
@@ -2094,6 +2115,19 @@ void ceph_messenger_destroy(struct ceph_messenger *msgr)
2094} 2115}
2095EXPORT_SYMBOL(ceph_messenger_destroy); 2116EXPORT_SYMBOL(ceph_messenger_destroy);
2096 2117
2118static void clear_standby(struct ceph_connection *con)
2119{
2120 /* come back from STANDBY? */
2121 if (test_and_clear_bit(STANDBY, &con->state)) {
2122 mutex_lock(&con->mutex);
2123 dout("clear_standby %p and ++connect_seq\n", con);
2124 con->connect_seq++;
2125 WARN_ON(test_bit(WRITE_PENDING, &con->state));
2126 WARN_ON(test_bit(KEEPALIVE_PENDING, &con->state));
2127 mutex_unlock(&con->mutex);
2128 }
2129}
2130
2097/* 2131/*
2098 * Queue up an outgoing message on the given connection. 2132 * Queue up an outgoing message on the given connection.
2099 */ 2133 */
@@ -2126,6 +2160,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
2126 2160
2127 /* if there wasn't anything waiting to send before, queue 2161 /* if there wasn't anything waiting to send before, queue
2128 * new work */ 2162 * new work */
2163 clear_standby(con);
2129 if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) 2164 if (test_and_set_bit(WRITE_PENDING, &con->state) == 0)
2130 queue_con(con); 2165 queue_con(con);
2131} 2166}
@@ -2191,6 +2226,8 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
2191 */ 2226 */
2192void ceph_con_keepalive(struct ceph_connection *con) 2227void ceph_con_keepalive(struct ceph_connection *con)
2193{ 2228{
2229 dout("con_keepalive %p\n", con);
2230 clear_standby(con);
2194 if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 && 2231 if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 &&
2195 test_and_set_bit(WRITE_PENDING, &con->state) == 0) 2232 test_and_set_bit(WRITE_PENDING, &con->state) == 0)
2196 queue_con(con); 2233 queue_con(con);