diff options
author | Trond Myklebust <Trond.Myklebust@netapp.com> | 2009-03-11 14:38:03 -0400 |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2009-03-19 15:17:34 -0400 |
commit | 7d1e8255cf959fba7ee2317550dfde39f0b936ae (patch) | |
tree | 0a046b83e5ac29501849c34408059480d4c210be | |
parent | 5e3771ce2d6a69e10fcc870cdf226d121d868491 (diff) |
SUNRPC: Add the equivalent of the linger and linger2 timeouts to RPC sockets
This fixes a regression against FreeBSD servers as reported by Tomas
Kasparek. Apparently when using RPC over a TCP socket, the FreeBSD servers
don't ever react to the client closing the socket, and so commit
e06799f958bf7f9f8fae15f0c6f519953fb0257c (SUNRPC: Use shutdown() instead of
close() when disconnecting a TCP socket) causes the setup to hang forever
whenever the client attempts to close and then reconnect.
We break the deadlock by adding a 'linger2' style timeout to the socket,
after which, the client will abort the connection using a TCP 'RST'.
The default timeout is set to 15 seconds. A subsequent patch will put it
under user control by means of a systctl.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r-- | include/linux/sunrpc/xprt.h | 1 | ||||
-rw-r--r-- | net/sunrpc/xprtsock.c | 98 |
2 files changed, 82 insertions, 17 deletions
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 2b0d960603b9..1758d9f5b5c3 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h | |||
@@ -260,6 +260,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); | |||
260 | #define XPRT_BOUND (4) | 260 | #define XPRT_BOUND (4) |
261 | #define XPRT_BINDING (5) | 261 | #define XPRT_BINDING (5) |
262 | #define XPRT_CLOSING (6) | 262 | #define XPRT_CLOSING (6) |
263 | #define XPRT_CONNECTION_ABORT (7) | ||
263 | 264 | ||
264 | static inline void xprt_set_connected(struct rpc_xprt *xprt) | 265 | static inline void xprt_set_connected(struct rpc_xprt *xprt) |
265 | { | 266 | { |
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 2e070679ab4a..b51f58b95c39 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c | |||
@@ -49,6 +49,8 @@ unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; | |||
49 | unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; | 49 | unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; |
50 | unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; | 50 | unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; |
51 | 51 | ||
52 | #define XS_TCP_LINGER_TO (15U * HZ) | ||
53 | |||
52 | /* | 54 | /* |
53 | * We can register our own files under /proc/sys/sunrpc by | 55 | * We can register our own files under /proc/sys/sunrpc by |
54 | * calling register_sysctl_table() again. The files in that | 56 | * calling register_sysctl_table() again. The files in that |
@@ -806,6 +808,7 @@ static void xs_close(struct rpc_xprt *xprt) | |||
806 | xs_reset_transport(transport); | 808 | xs_reset_transport(transport); |
807 | 809 | ||
808 | smp_mb__before_clear_bit(); | 810 | smp_mb__before_clear_bit(); |
811 | clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); | ||
809 | clear_bit(XPRT_CLOSE_WAIT, &xprt->state); | 812 | clear_bit(XPRT_CLOSE_WAIT, &xprt->state); |
810 | clear_bit(XPRT_CLOSING, &xprt->state); | 813 | clear_bit(XPRT_CLOSING, &xprt->state); |
811 | smp_mb__after_clear_bit(); | 814 | smp_mb__after_clear_bit(); |
@@ -1133,6 +1136,47 @@ out: | |||
1133 | read_unlock(&sk->sk_callback_lock); | 1136 | read_unlock(&sk->sk_callback_lock); |
1134 | } | 1137 | } |
1135 | 1138 | ||
1139 | /* | ||
1140 | * Do the equivalent of linger/linger2 handling for dealing with | ||
1141 | * broken servers that don't close the socket in a timely | ||
1142 | * fashion | ||
1143 | */ | ||
1144 | static void xs_tcp_schedule_linger_timeout(struct rpc_xprt *xprt, | ||
1145 | unsigned long timeout) | ||
1146 | { | ||
1147 | struct sock_xprt *transport; | ||
1148 | |||
1149 | if (xprt_test_and_set_connecting(xprt)) | ||
1150 | return; | ||
1151 | set_bit(XPRT_CONNECTION_ABORT, &xprt->state); | ||
1152 | transport = container_of(xprt, struct sock_xprt, xprt); | ||
1153 | queue_delayed_work(rpciod_workqueue, &transport->connect_worker, | ||
1154 | timeout); | ||
1155 | } | ||
1156 | |||
1157 | static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt) | ||
1158 | { | ||
1159 | struct sock_xprt *transport; | ||
1160 | |||
1161 | transport = container_of(xprt, struct sock_xprt, xprt); | ||
1162 | |||
1163 | if (!test_bit(XPRT_CONNECTION_ABORT, &xprt->state) || | ||
1164 | !cancel_delayed_work(&transport->connect_worker)) | ||
1165 | return; | ||
1166 | clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); | ||
1167 | xprt_clear_connecting(xprt); | ||
1168 | } | ||
1169 | |||
1170 | static void xs_sock_mark_closed(struct rpc_xprt *xprt) | ||
1171 | { | ||
1172 | smp_mb__before_clear_bit(); | ||
1173 | clear_bit(XPRT_CLOSE_WAIT, &xprt->state); | ||
1174 | clear_bit(XPRT_CLOSING, &xprt->state); | ||
1175 | smp_mb__after_clear_bit(); | ||
1176 | /* Mark transport as closed and wake up all pending tasks */ | ||
1177 | xprt_disconnect_done(xprt); | ||
1178 | } | ||
1179 | |||
1136 | /** | 1180 | /** |
1137 | * xs_tcp_state_change - callback to handle TCP socket state changes | 1181 | * xs_tcp_state_change - callback to handle TCP socket state changes |
1138 | * @sk: socket whose state has changed | 1182 | * @sk: socket whose state has changed |
@@ -1178,6 +1222,7 @@ static void xs_tcp_state_change(struct sock *sk) | |||
1178 | clear_bit(XPRT_CONNECTED, &xprt->state); | 1222 | clear_bit(XPRT_CONNECTED, &xprt->state); |
1179 | clear_bit(XPRT_CLOSE_WAIT, &xprt->state); | 1223 | clear_bit(XPRT_CLOSE_WAIT, &xprt->state); |
1180 | smp_mb__after_clear_bit(); | 1224 | smp_mb__after_clear_bit(); |
1225 | xs_tcp_schedule_linger_timeout(xprt, XS_TCP_LINGER_TO); | ||
1181 | break; | 1226 | break; |
1182 | case TCP_CLOSE_WAIT: | 1227 | case TCP_CLOSE_WAIT: |
1183 | /* The server initiated a shutdown of the socket */ | 1228 | /* The server initiated a shutdown of the socket */ |
@@ -1194,17 +1239,14 @@ static void xs_tcp_state_change(struct sock *sk) | |||
1194 | break; | 1239 | break; |
1195 | case TCP_LAST_ACK: | 1240 | case TCP_LAST_ACK: |
1196 | set_bit(XPRT_CLOSING, &xprt->state); | 1241 | set_bit(XPRT_CLOSING, &xprt->state); |
1242 | xs_tcp_schedule_linger_timeout(xprt, XS_TCP_LINGER_TO); | ||
1197 | smp_mb__before_clear_bit(); | 1243 | smp_mb__before_clear_bit(); |
1198 | clear_bit(XPRT_CONNECTED, &xprt->state); | 1244 | clear_bit(XPRT_CONNECTED, &xprt->state); |
1199 | smp_mb__after_clear_bit(); | 1245 | smp_mb__after_clear_bit(); |
1200 | break; | 1246 | break; |
1201 | case TCP_CLOSE: | 1247 | case TCP_CLOSE: |
1202 | smp_mb__before_clear_bit(); | 1248 | xs_tcp_cancel_linger_timeout(xprt); |
1203 | clear_bit(XPRT_CLOSE_WAIT, &xprt->state); | 1249 | xs_sock_mark_closed(xprt); |
1204 | clear_bit(XPRT_CLOSING, &xprt->state); | ||
1205 | smp_mb__after_clear_bit(); | ||
1206 | /* Mark transport as closed and wake up all pending tasks */ | ||
1207 | xprt_disconnect_done(xprt); | ||
1208 | } | 1250 | } |
1209 | out: | 1251 | out: |
1210 | read_unlock(&sk->sk_callback_lock); | 1252 | read_unlock(&sk->sk_callback_lock); |
@@ -1562,8 +1604,8 @@ static void xs_udp_connect_worker4(struct work_struct *work) | |||
1562 | xs_udp_finish_connecting(xprt, sock); | 1604 | xs_udp_finish_connecting(xprt, sock); |
1563 | status = 0; | 1605 | status = 0; |
1564 | out: | 1606 | out: |
1565 | xprt_wake_pending_tasks(xprt, status); | ||
1566 | xprt_clear_connecting(xprt); | 1607 | xprt_clear_connecting(xprt); |
1608 | xprt_wake_pending_tasks(xprt, status); | ||
1567 | } | 1609 | } |
1568 | 1610 | ||
1569 | /** | 1611 | /** |
@@ -1604,8 +1646,8 @@ static void xs_udp_connect_worker6(struct work_struct *work) | |||
1604 | xs_udp_finish_connecting(xprt, sock); | 1646 | xs_udp_finish_connecting(xprt, sock); |
1605 | status = 0; | 1647 | status = 0; |
1606 | out: | 1648 | out: |
1607 | xprt_wake_pending_tasks(xprt, status); | ||
1608 | xprt_clear_connecting(xprt); | 1649 | xprt_clear_connecting(xprt); |
1650 | xprt_wake_pending_tasks(xprt, status); | ||
1609 | } | 1651 | } |
1610 | 1652 | ||
1611 | /* | 1653 | /* |
@@ -1626,7 +1668,9 @@ static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transpo | |||
1626 | memset(&any, 0, sizeof(any)); | 1668 | memset(&any, 0, sizeof(any)); |
1627 | any.sa_family = AF_UNSPEC; | 1669 | any.sa_family = AF_UNSPEC; |
1628 | result = kernel_connect(transport->sock, &any, sizeof(any), 0); | 1670 | result = kernel_connect(transport->sock, &any, sizeof(any), 0); |
1629 | if (result) | 1671 | if (!result) |
1672 | xs_sock_mark_closed(xprt); | ||
1673 | else | ||
1630 | dprintk("RPC: AF_UNSPEC connect return code %d\n", | 1674 | dprintk("RPC: AF_UNSPEC connect return code %d\n", |
1631 | result); | 1675 | result); |
1632 | } | 1676 | } |
@@ -1702,6 +1746,7 @@ static void xs_tcp_connect_worker4(struct work_struct *work) | |||
1702 | goto out; | 1746 | goto out; |
1703 | 1747 | ||
1704 | if (!sock) { | 1748 | if (!sock) { |
1749 | clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); | ||
1705 | /* start from scratch */ | 1750 | /* start from scratch */ |
1706 | if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { | 1751 | if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { |
1707 | dprintk("RPC: can't create TCP transport socket (%d).\n", -err); | 1752 | dprintk("RPC: can't create TCP transport socket (%d).\n", -err); |
@@ -1713,10 +1758,18 @@ static void xs_tcp_connect_worker4(struct work_struct *work) | |||
1713 | sock_release(sock); | 1758 | sock_release(sock); |
1714 | goto out; | 1759 | goto out; |
1715 | } | 1760 | } |
1716 | } else | 1761 | } else { |
1762 | int abort_and_exit; | ||
1763 | |||
1764 | abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT, | ||
1765 | &xprt->state); | ||
1717 | /* "close" the socket, preserving the local port */ | 1766 | /* "close" the socket, preserving the local port */ |
1718 | xs_tcp_reuse_connection(xprt, transport); | 1767 | xs_tcp_reuse_connection(xprt, transport); |
1719 | 1768 | ||
1769 | if (abort_and_exit) | ||
1770 | goto out_eagain; | ||
1771 | } | ||
1772 | |||
1720 | dprintk("RPC: worker connecting xprt %p to address: %s\n", | 1773 | dprintk("RPC: worker connecting xprt %p to address: %s\n", |
1721 | xprt, xprt->address_strings[RPC_DISPLAY_ALL]); | 1774 | xprt, xprt->address_strings[RPC_DISPLAY_ALL]); |
1722 | 1775 | ||
@@ -1732,17 +1785,18 @@ static void xs_tcp_connect_worker4(struct work_struct *work) | |||
1732 | case 0: | 1785 | case 0: |
1733 | case -EINPROGRESS: | 1786 | case -EINPROGRESS: |
1734 | case -EALREADY: | 1787 | case -EALREADY: |
1735 | goto out_clear; | 1788 | xprt_clear_connecting(xprt); |
1789 | return; | ||
1736 | } | 1790 | } |
1737 | /* get rid of existing socket, and retry */ | 1791 | /* get rid of existing socket, and retry */ |
1738 | xs_tcp_shutdown(xprt); | 1792 | xs_tcp_shutdown(xprt); |
1739 | printk("%s: connect returned unhandled error %d\n", | 1793 | printk("%s: connect returned unhandled error %d\n", |
1740 | __func__, status); | 1794 | __func__, status); |
1795 | out_eagain: | ||
1741 | status = -EAGAIN; | 1796 | status = -EAGAIN; |
1742 | out: | 1797 | out: |
1743 | xprt_wake_pending_tasks(xprt, status); | ||
1744 | out_clear: | ||
1745 | xprt_clear_connecting(xprt); | 1798 | xprt_clear_connecting(xprt); |
1799 | xprt_wake_pending_tasks(xprt, status); | ||
1746 | } | 1800 | } |
1747 | 1801 | ||
1748 | /** | 1802 | /** |
@@ -1763,6 +1817,7 @@ static void xs_tcp_connect_worker6(struct work_struct *work) | |||
1763 | goto out; | 1817 | goto out; |
1764 | 1818 | ||
1765 | if (!sock) { | 1819 | if (!sock) { |
1820 | clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); | ||
1766 | /* start from scratch */ | 1821 | /* start from scratch */ |
1767 | if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { | 1822 | if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { |
1768 | dprintk("RPC: can't create TCP transport socket (%d).\n", -err); | 1823 | dprintk("RPC: can't create TCP transport socket (%d).\n", -err); |
@@ -1774,10 +1829,18 @@ static void xs_tcp_connect_worker6(struct work_struct *work) | |||
1774 | sock_release(sock); | 1829 | sock_release(sock); |
1775 | goto out; | 1830 | goto out; |
1776 | } | 1831 | } |
1777 | } else | 1832 | } else { |
1833 | int abort_and_exit; | ||
1834 | |||
1835 | abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT, | ||
1836 | &xprt->state); | ||
1778 | /* "close" the socket, preserving the local port */ | 1837 | /* "close" the socket, preserving the local port */ |
1779 | xs_tcp_reuse_connection(xprt, transport); | 1838 | xs_tcp_reuse_connection(xprt, transport); |
1780 | 1839 | ||
1840 | if (abort_and_exit) | ||
1841 | goto out_eagain; | ||
1842 | } | ||
1843 | |||
1781 | dprintk("RPC: worker connecting xprt %p to address: %s\n", | 1844 | dprintk("RPC: worker connecting xprt %p to address: %s\n", |
1782 | xprt, xprt->address_strings[RPC_DISPLAY_ALL]); | 1845 | xprt, xprt->address_strings[RPC_DISPLAY_ALL]); |
1783 | 1846 | ||
@@ -1792,17 +1855,18 @@ static void xs_tcp_connect_worker6(struct work_struct *work) | |||
1792 | case 0: | 1855 | case 0: |
1793 | case -EINPROGRESS: | 1856 | case -EINPROGRESS: |
1794 | case -EALREADY: | 1857 | case -EALREADY: |
1795 | goto out_clear; | 1858 | xprt_clear_connecting(xprt); |
1859 | return; | ||
1796 | } | 1860 | } |
1797 | /* get rid of existing socket, and retry */ | 1861 | /* get rid of existing socket, and retry */ |
1798 | xs_tcp_shutdown(xprt); | 1862 | xs_tcp_shutdown(xprt); |
1799 | printk("%s: connect returned unhandled error %d\n", | 1863 | printk("%s: connect returned unhandled error %d\n", |
1800 | __func__, status); | 1864 | __func__, status); |
1865 | out_eagain: | ||
1801 | status = -EAGAIN; | 1866 | status = -EAGAIN; |
1802 | out: | 1867 | out: |
1803 | xprt_wake_pending_tasks(xprt, status); | ||
1804 | out_clear: | ||
1805 | xprt_clear_connecting(xprt); | 1868 | xprt_clear_connecting(xprt); |
1869 | xprt_wake_pending_tasks(xprt, status); | ||
1806 | } | 1870 | } |
1807 | 1871 | ||
1808 | /** | 1872 | /** |