diff options
| author | Trond Myklebust <Trond.Myklebust@netapp.com> | 2009-03-11 14:38:03 -0400 |
|---|---|---|
| committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2009-03-19 15:17:34 -0400 |
| commit | 7d1e8255cf959fba7ee2317550dfde39f0b936ae (patch) | |
| tree | 0a046b83e5ac29501849c34408059480d4c210be | |
| parent | 5e3771ce2d6a69e10fcc870cdf226d121d868491 (diff) | |
SUNRPC: Add the equivalent of the linger and linger2 timeouts to RPC sockets
This fixes a regression against FreeBSD servers as reported by Tomas
Kasparek. Apparently when using RPC over a TCP socket, the FreeBSD servers
don't ever react to the client closing the socket, and so commit
e06799f958bf7f9f8fae15f0c6f519953fb0257c (SUNRPC: Use shutdown() instead of
close() when disconnecting a TCP socket) causes the setup to hang forever
whenever the client attempts to close and then reconnect.
We break the deadlock by adding a 'linger2' style timeout to the socket,
after which, the client will abort the connection using a TCP 'RST'.
The default timeout is set to 15 seconds. A subsequent patch will put it
under user control by means of a systctl.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
| -rw-r--r-- | include/linux/sunrpc/xprt.h | 1 | ||||
| -rw-r--r-- | net/sunrpc/xprtsock.c | 98 |
2 files changed, 82 insertions, 17 deletions
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 2b0d960603b9..1758d9f5b5c3 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h | |||
| @@ -260,6 +260,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); | |||
| 260 | #define XPRT_BOUND (4) | 260 | #define XPRT_BOUND (4) |
| 261 | #define XPRT_BINDING (5) | 261 | #define XPRT_BINDING (5) |
| 262 | #define XPRT_CLOSING (6) | 262 | #define XPRT_CLOSING (6) |
| 263 | #define XPRT_CONNECTION_ABORT (7) | ||
| 263 | 264 | ||
| 264 | static inline void xprt_set_connected(struct rpc_xprt *xprt) | 265 | static inline void xprt_set_connected(struct rpc_xprt *xprt) |
| 265 | { | 266 | { |
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 2e070679ab4a..b51f58b95c39 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c | |||
| @@ -49,6 +49,8 @@ unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; | |||
| 49 | unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; | 49 | unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; |
| 50 | unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; | 50 | unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; |
| 51 | 51 | ||
| 52 | #define XS_TCP_LINGER_TO (15U * HZ) | ||
| 53 | |||
| 52 | /* | 54 | /* |
| 53 | * We can register our own files under /proc/sys/sunrpc by | 55 | * We can register our own files under /proc/sys/sunrpc by |
| 54 | * calling register_sysctl_table() again. The files in that | 56 | * calling register_sysctl_table() again. The files in that |
| @@ -806,6 +808,7 @@ static void xs_close(struct rpc_xprt *xprt) | |||
| 806 | xs_reset_transport(transport); | 808 | xs_reset_transport(transport); |
| 807 | 809 | ||
| 808 | smp_mb__before_clear_bit(); | 810 | smp_mb__before_clear_bit(); |
| 811 | clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); | ||
| 809 | clear_bit(XPRT_CLOSE_WAIT, &xprt->state); | 812 | clear_bit(XPRT_CLOSE_WAIT, &xprt->state); |
| 810 | clear_bit(XPRT_CLOSING, &xprt->state); | 813 | clear_bit(XPRT_CLOSING, &xprt->state); |
| 811 | smp_mb__after_clear_bit(); | 814 | smp_mb__after_clear_bit(); |
| @@ -1133,6 +1136,47 @@ out: | |||
| 1133 | read_unlock(&sk->sk_callback_lock); | 1136 | read_unlock(&sk->sk_callback_lock); |
| 1134 | } | 1137 | } |
| 1135 | 1138 | ||
| 1139 | /* | ||
| 1140 | * Do the equivalent of linger/linger2 handling for dealing with | ||
| 1141 | * broken servers that don't close the socket in a timely | ||
| 1142 | * fashion | ||
| 1143 | */ | ||
| 1144 | static void xs_tcp_schedule_linger_timeout(struct rpc_xprt *xprt, | ||
| 1145 | unsigned long timeout) | ||
| 1146 | { | ||
| 1147 | struct sock_xprt *transport; | ||
| 1148 | |||
| 1149 | if (xprt_test_and_set_connecting(xprt)) | ||
| 1150 | return; | ||
| 1151 | set_bit(XPRT_CONNECTION_ABORT, &xprt->state); | ||
| 1152 | transport = container_of(xprt, struct sock_xprt, xprt); | ||
| 1153 | queue_delayed_work(rpciod_workqueue, &transport->connect_worker, | ||
| 1154 | timeout); | ||
| 1155 | } | ||
| 1156 | |||
| 1157 | static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt) | ||
| 1158 | { | ||
| 1159 | struct sock_xprt *transport; | ||
| 1160 | |||
| 1161 | transport = container_of(xprt, struct sock_xprt, xprt); | ||
| 1162 | |||
| 1163 | if (!test_bit(XPRT_CONNECTION_ABORT, &xprt->state) || | ||
| 1164 | !cancel_delayed_work(&transport->connect_worker)) | ||
| 1165 | return; | ||
| 1166 | clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); | ||
| 1167 | xprt_clear_connecting(xprt); | ||
| 1168 | } | ||
| 1169 | |||
| 1170 | static void xs_sock_mark_closed(struct rpc_xprt *xprt) | ||
| 1171 | { | ||
| 1172 | smp_mb__before_clear_bit(); | ||
| 1173 | clear_bit(XPRT_CLOSE_WAIT, &xprt->state); | ||
| 1174 | clear_bit(XPRT_CLOSING, &xprt->state); | ||
| 1175 | smp_mb__after_clear_bit(); | ||
| 1176 | /* Mark transport as closed and wake up all pending tasks */ | ||
| 1177 | xprt_disconnect_done(xprt); | ||
| 1178 | } | ||
| 1179 | |||
| 1136 | /** | 1180 | /** |
| 1137 | * xs_tcp_state_change - callback to handle TCP socket state changes | 1181 | * xs_tcp_state_change - callback to handle TCP socket state changes |
| 1138 | * @sk: socket whose state has changed | 1182 | * @sk: socket whose state has changed |
| @@ -1178,6 +1222,7 @@ static void xs_tcp_state_change(struct sock *sk) | |||
| 1178 | clear_bit(XPRT_CONNECTED, &xprt->state); | 1222 | clear_bit(XPRT_CONNECTED, &xprt->state); |
| 1179 | clear_bit(XPRT_CLOSE_WAIT, &xprt->state); | 1223 | clear_bit(XPRT_CLOSE_WAIT, &xprt->state); |
| 1180 | smp_mb__after_clear_bit(); | 1224 | smp_mb__after_clear_bit(); |
| 1225 | xs_tcp_schedule_linger_timeout(xprt, XS_TCP_LINGER_TO); | ||
| 1181 | break; | 1226 | break; |
| 1182 | case TCP_CLOSE_WAIT: | 1227 | case TCP_CLOSE_WAIT: |
| 1183 | /* The server initiated a shutdown of the socket */ | 1228 | /* The server initiated a shutdown of the socket */ |
| @@ -1194,17 +1239,14 @@ static void xs_tcp_state_change(struct sock *sk) | |||
| 1194 | break; | 1239 | break; |
| 1195 | case TCP_LAST_ACK: | 1240 | case TCP_LAST_ACK: |
| 1196 | set_bit(XPRT_CLOSING, &xprt->state); | 1241 | set_bit(XPRT_CLOSING, &xprt->state); |
| 1242 | xs_tcp_schedule_linger_timeout(xprt, XS_TCP_LINGER_TO); | ||
| 1197 | smp_mb__before_clear_bit(); | 1243 | smp_mb__before_clear_bit(); |
| 1198 | clear_bit(XPRT_CONNECTED, &xprt->state); | 1244 | clear_bit(XPRT_CONNECTED, &xprt->state); |
| 1199 | smp_mb__after_clear_bit(); | 1245 | smp_mb__after_clear_bit(); |
| 1200 | break; | 1246 | break; |
| 1201 | case TCP_CLOSE: | 1247 | case TCP_CLOSE: |
| 1202 | smp_mb__before_clear_bit(); | 1248 | xs_tcp_cancel_linger_timeout(xprt); |
| 1203 | clear_bit(XPRT_CLOSE_WAIT, &xprt->state); | 1249 | xs_sock_mark_closed(xprt); |
| 1204 | clear_bit(XPRT_CLOSING, &xprt->state); | ||
| 1205 | smp_mb__after_clear_bit(); | ||
| 1206 | /* Mark transport as closed and wake up all pending tasks */ | ||
| 1207 | xprt_disconnect_done(xprt); | ||
| 1208 | } | 1250 | } |
| 1209 | out: | 1251 | out: |
| 1210 | read_unlock(&sk->sk_callback_lock); | 1252 | read_unlock(&sk->sk_callback_lock); |
| @@ -1562,8 +1604,8 @@ static void xs_udp_connect_worker4(struct work_struct *work) | |||
| 1562 | xs_udp_finish_connecting(xprt, sock); | 1604 | xs_udp_finish_connecting(xprt, sock); |
| 1563 | status = 0; | 1605 | status = 0; |
| 1564 | out: | 1606 | out: |
| 1565 | xprt_wake_pending_tasks(xprt, status); | ||
| 1566 | xprt_clear_connecting(xprt); | 1607 | xprt_clear_connecting(xprt); |
| 1608 | xprt_wake_pending_tasks(xprt, status); | ||
| 1567 | } | 1609 | } |
| 1568 | 1610 | ||
| 1569 | /** | 1611 | /** |
| @@ -1604,8 +1646,8 @@ static void xs_udp_connect_worker6(struct work_struct *work) | |||
| 1604 | xs_udp_finish_connecting(xprt, sock); | 1646 | xs_udp_finish_connecting(xprt, sock); |
| 1605 | status = 0; | 1647 | status = 0; |
| 1606 | out: | 1648 | out: |
| 1607 | xprt_wake_pending_tasks(xprt, status); | ||
| 1608 | xprt_clear_connecting(xprt); | 1649 | xprt_clear_connecting(xprt); |
| 1650 | xprt_wake_pending_tasks(xprt, status); | ||
| 1609 | } | 1651 | } |
| 1610 | 1652 | ||
| 1611 | /* | 1653 | /* |
| @@ -1626,7 +1668,9 @@ static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transpo | |||
| 1626 | memset(&any, 0, sizeof(any)); | 1668 | memset(&any, 0, sizeof(any)); |
| 1627 | any.sa_family = AF_UNSPEC; | 1669 | any.sa_family = AF_UNSPEC; |
| 1628 | result = kernel_connect(transport->sock, &any, sizeof(any), 0); | 1670 | result = kernel_connect(transport->sock, &any, sizeof(any), 0); |
| 1629 | if (result) | 1671 | if (!result) |
| 1672 | xs_sock_mark_closed(xprt); | ||
| 1673 | else | ||
| 1630 | dprintk("RPC: AF_UNSPEC connect return code %d\n", | 1674 | dprintk("RPC: AF_UNSPEC connect return code %d\n", |
| 1631 | result); | 1675 | result); |
| 1632 | } | 1676 | } |
| @@ -1702,6 +1746,7 @@ static void xs_tcp_connect_worker4(struct work_struct *work) | |||
| 1702 | goto out; | 1746 | goto out; |
| 1703 | 1747 | ||
| 1704 | if (!sock) { | 1748 | if (!sock) { |
| 1749 | clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); | ||
| 1705 | /* start from scratch */ | 1750 | /* start from scratch */ |
| 1706 | if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { | 1751 | if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { |
| 1707 | dprintk("RPC: can't create TCP transport socket (%d).\n", -err); | 1752 | dprintk("RPC: can't create TCP transport socket (%d).\n", -err); |
| @@ -1713,10 +1758,18 @@ static void xs_tcp_connect_worker4(struct work_struct *work) | |||
| 1713 | sock_release(sock); | 1758 | sock_release(sock); |
| 1714 | goto out; | 1759 | goto out; |
| 1715 | } | 1760 | } |
| 1716 | } else | 1761 | } else { |
| 1762 | int abort_and_exit; | ||
| 1763 | |||
| 1764 | abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT, | ||
| 1765 | &xprt->state); | ||
| 1717 | /* "close" the socket, preserving the local port */ | 1766 | /* "close" the socket, preserving the local port */ |
| 1718 | xs_tcp_reuse_connection(xprt, transport); | 1767 | xs_tcp_reuse_connection(xprt, transport); |
| 1719 | 1768 | ||
| 1769 | if (abort_and_exit) | ||
| 1770 | goto out_eagain; | ||
| 1771 | } | ||
| 1772 | |||
| 1720 | dprintk("RPC: worker connecting xprt %p to address: %s\n", | 1773 | dprintk("RPC: worker connecting xprt %p to address: %s\n", |
| 1721 | xprt, xprt->address_strings[RPC_DISPLAY_ALL]); | 1774 | xprt, xprt->address_strings[RPC_DISPLAY_ALL]); |
| 1722 | 1775 | ||
| @@ -1732,17 +1785,18 @@ static void xs_tcp_connect_worker4(struct work_struct *work) | |||
| 1732 | case 0: | 1785 | case 0: |
| 1733 | case -EINPROGRESS: | 1786 | case -EINPROGRESS: |
| 1734 | case -EALREADY: | 1787 | case -EALREADY: |
| 1735 | goto out_clear; | 1788 | xprt_clear_connecting(xprt); |
| 1789 | return; | ||
| 1736 | } | 1790 | } |
| 1737 | /* get rid of existing socket, and retry */ | 1791 | /* get rid of existing socket, and retry */ |
| 1738 | xs_tcp_shutdown(xprt); | 1792 | xs_tcp_shutdown(xprt); |
| 1739 | printk("%s: connect returned unhandled error %d\n", | 1793 | printk("%s: connect returned unhandled error %d\n", |
| 1740 | __func__, status); | 1794 | __func__, status); |
| 1795 | out_eagain: | ||
| 1741 | status = -EAGAIN; | 1796 | status = -EAGAIN; |
| 1742 | out: | 1797 | out: |
| 1743 | xprt_wake_pending_tasks(xprt, status); | ||
| 1744 | out_clear: | ||
| 1745 | xprt_clear_connecting(xprt); | 1798 | xprt_clear_connecting(xprt); |
| 1799 | xprt_wake_pending_tasks(xprt, status); | ||
| 1746 | } | 1800 | } |
| 1747 | 1801 | ||
| 1748 | /** | 1802 | /** |
| @@ -1763,6 +1817,7 @@ static void xs_tcp_connect_worker6(struct work_struct *work) | |||
| 1763 | goto out; | 1817 | goto out; |
| 1764 | 1818 | ||
| 1765 | if (!sock) { | 1819 | if (!sock) { |
| 1820 | clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); | ||
| 1766 | /* start from scratch */ | 1821 | /* start from scratch */ |
| 1767 | if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { | 1822 | if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { |
| 1768 | dprintk("RPC: can't create TCP transport socket (%d).\n", -err); | 1823 | dprintk("RPC: can't create TCP transport socket (%d).\n", -err); |
| @@ -1774,10 +1829,18 @@ static void xs_tcp_connect_worker6(struct work_struct *work) | |||
| 1774 | sock_release(sock); | 1829 | sock_release(sock); |
| 1775 | goto out; | 1830 | goto out; |
| 1776 | } | 1831 | } |
| 1777 | } else | 1832 | } else { |
| 1833 | int abort_and_exit; | ||
| 1834 | |||
| 1835 | abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT, | ||
| 1836 | &xprt->state); | ||
| 1778 | /* "close" the socket, preserving the local port */ | 1837 | /* "close" the socket, preserving the local port */ |
| 1779 | xs_tcp_reuse_connection(xprt, transport); | 1838 | xs_tcp_reuse_connection(xprt, transport); |
| 1780 | 1839 | ||
| 1840 | if (abort_and_exit) | ||
| 1841 | goto out_eagain; | ||
| 1842 | } | ||
| 1843 | |||
| 1781 | dprintk("RPC: worker connecting xprt %p to address: %s\n", | 1844 | dprintk("RPC: worker connecting xprt %p to address: %s\n", |
| 1782 | xprt, xprt->address_strings[RPC_DISPLAY_ALL]); | 1845 | xprt, xprt->address_strings[RPC_DISPLAY_ALL]); |
| 1783 | 1846 | ||
| @@ -1792,17 +1855,18 @@ static void xs_tcp_connect_worker6(struct work_struct *work) | |||
| 1792 | case 0: | 1855 | case 0: |
| 1793 | case -EINPROGRESS: | 1856 | case -EINPROGRESS: |
| 1794 | case -EALREADY: | 1857 | case -EALREADY: |
| 1795 | goto out_clear; | 1858 | xprt_clear_connecting(xprt); |
| 1859 | return; | ||
| 1796 | } | 1860 | } |
| 1797 | /* get rid of existing socket, and retry */ | 1861 | /* get rid of existing socket, and retry */ |
| 1798 | xs_tcp_shutdown(xprt); | 1862 | xs_tcp_shutdown(xprt); |
| 1799 | printk("%s: connect returned unhandled error %d\n", | 1863 | printk("%s: connect returned unhandled error %d\n", |
| 1800 | __func__, status); | 1864 | __func__, status); |
| 1865 | out_eagain: | ||
| 1801 | status = -EAGAIN; | 1866 | status = -EAGAIN; |
| 1802 | out: | 1867 | out: |
| 1803 | xprt_wake_pending_tasks(xprt, status); | ||
| 1804 | out_clear: | ||
| 1805 | xprt_clear_connecting(xprt); | 1868 | xprt_clear_connecting(xprt); |
| 1869 | xprt_wake_pending_tasks(xprt, status); | ||
| 1806 | } | 1870 | } |
| 1807 | 1871 | ||
| 1808 | /** | 1872 | /** |
