aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTrond Myklebust <Trond.Myklebust@netapp.com>2009-03-11 14:38:03 -0400
committerTrond Myklebust <Trond.Myklebust@netapp.com>2009-03-19 15:17:34 -0400
commit7d1e8255cf959fba7ee2317550dfde39f0b936ae (patch)
tree0a046b83e5ac29501849c34408059480d4c210be
parent5e3771ce2d6a69e10fcc870cdf226d121d868491 (diff)
SUNRPC: Add the equivalent of the linger and linger2 timeouts to RPC sockets
This fixes a regression against FreeBSD servers as reported by Tomas Kasparek. Apparently when using RPC over a TCP socket, the FreeBSD servers don't ever react to the client closing the socket, and so commit e06799f958bf7f9f8fae15f0c6f519953fb0257c (SUNRPC: Use shutdown() instead of close() when disconnecting a TCP socket) causes the setup to hang forever whenever the client attempts to close and then reconnect. We break the deadlock by adding a 'linger2' style timeout to the socket, after which, the client will abort the connection using a TCP 'RST'. The default timeout is set to 15 seconds. A subsequent patch will put it under user control by means of a systctl. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r--include/linux/sunrpc/xprt.h1
-rw-r--r--net/sunrpc/xprtsock.c98
2 files changed, 82 insertions, 17 deletions
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 2b0d960603b9..1758d9f5b5c3 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -260,6 +260,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie);
260#define XPRT_BOUND (4) 260#define XPRT_BOUND (4)
261#define XPRT_BINDING (5) 261#define XPRT_BINDING (5)
262#define XPRT_CLOSING (6) 262#define XPRT_CLOSING (6)
263#define XPRT_CONNECTION_ABORT (7)
263 264
264static inline void xprt_set_connected(struct rpc_xprt *xprt) 265static inline void xprt_set_connected(struct rpc_xprt *xprt)
265{ 266{
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 2e070679ab4a..b51f58b95c39 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -49,6 +49,8 @@ unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE;
49unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; 49unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
50unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; 50unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
51 51
52#define XS_TCP_LINGER_TO (15U * HZ)
53
52/* 54/*
53 * We can register our own files under /proc/sys/sunrpc by 55 * We can register our own files under /proc/sys/sunrpc by
54 * calling register_sysctl_table() again. The files in that 56 * calling register_sysctl_table() again. The files in that
@@ -806,6 +808,7 @@ static void xs_close(struct rpc_xprt *xprt)
806 xs_reset_transport(transport); 808 xs_reset_transport(transport);
807 809
808 smp_mb__before_clear_bit(); 810 smp_mb__before_clear_bit();
811 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
809 clear_bit(XPRT_CLOSE_WAIT, &xprt->state); 812 clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
810 clear_bit(XPRT_CLOSING, &xprt->state); 813 clear_bit(XPRT_CLOSING, &xprt->state);
811 smp_mb__after_clear_bit(); 814 smp_mb__after_clear_bit();
@@ -1133,6 +1136,47 @@ out:
1133 read_unlock(&sk->sk_callback_lock); 1136 read_unlock(&sk->sk_callback_lock);
1134} 1137}
1135 1138
1139/*
1140 * Do the equivalent of linger/linger2 handling for dealing with
1141 * broken servers that don't close the socket in a timely
1142 * fashion
1143 */
1144static void xs_tcp_schedule_linger_timeout(struct rpc_xprt *xprt,
1145 unsigned long timeout)
1146{
1147 struct sock_xprt *transport;
1148
1149 if (xprt_test_and_set_connecting(xprt))
1150 return;
1151 set_bit(XPRT_CONNECTION_ABORT, &xprt->state);
1152 transport = container_of(xprt, struct sock_xprt, xprt);
1153 queue_delayed_work(rpciod_workqueue, &transport->connect_worker,
1154 timeout);
1155}
1156
1157static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt)
1158{
1159 struct sock_xprt *transport;
1160
1161 transport = container_of(xprt, struct sock_xprt, xprt);
1162
1163 if (!test_bit(XPRT_CONNECTION_ABORT, &xprt->state) ||
1164 !cancel_delayed_work(&transport->connect_worker))
1165 return;
1166 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
1167 xprt_clear_connecting(xprt);
1168}
1169
1170static void xs_sock_mark_closed(struct rpc_xprt *xprt)
1171{
1172 smp_mb__before_clear_bit();
1173 clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
1174 clear_bit(XPRT_CLOSING, &xprt->state);
1175 smp_mb__after_clear_bit();
1176 /* Mark transport as closed and wake up all pending tasks */
1177 xprt_disconnect_done(xprt);
1178}
1179
1136/** 1180/**
1137 * xs_tcp_state_change - callback to handle TCP socket state changes 1181 * xs_tcp_state_change - callback to handle TCP socket state changes
1138 * @sk: socket whose state has changed 1182 * @sk: socket whose state has changed
@@ -1178,6 +1222,7 @@ static void xs_tcp_state_change(struct sock *sk)
1178 clear_bit(XPRT_CONNECTED, &xprt->state); 1222 clear_bit(XPRT_CONNECTED, &xprt->state);
1179 clear_bit(XPRT_CLOSE_WAIT, &xprt->state); 1223 clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
1180 smp_mb__after_clear_bit(); 1224 smp_mb__after_clear_bit();
1225 xs_tcp_schedule_linger_timeout(xprt, XS_TCP_LINGER_TO);
1181 break; 1226 break;
1182 case TCP_CLOSE_WAIT: 1227 case TCP_CLOSE_WAIT:
1183 /* The server initiated a shutdown of the socket */ 1228 /* The server initiated a shutdown of the socket */
@@ -1194,17 +1239,14 @@ static void xs_tcp_state_change(struct sock *sk)
1194 break; 1239 break;
1195 case TCP_LAST_ACK: 1240 case TCP_LAST_ACK:
1196 set_bit(XPRT_CLOSING, &xprt->state); 1241 set_bit(XPRT_CLOSING, &xprt->state);
1242 xs_tcp_schedule_linger_timeout(xprt, XS_TCP_LINGER_TO);
1197 smp_mb__before_clear_bit(); 1243 smp_mb__before_clear_bit();
1198 clear_bit(XPRT_CONNECTED, &xprt->state); 1244 clear_bit(XPRT_CONNECTED, &xprt->state);
1199 smp_mb__after_clear_bit(); 1245 smp_mb__after_clear_bit();
1200 break; 1246 break;
1201 case TCP_CLOSE: 1247 case TCP_CLOSE:
1202 smp_mb__before_clear_bit(); 1248 xs_tcp_cancel_linger_timeout(xprt);
1203 clear_bit(XPRT_CLOSE_WAIT, &xprt->state); 1249 xs_sock_mark_closed(xprt);
1204 clear_bit(XPRT_CLOSING, &xprt->state);
1205 smp_mb__after_clear_bit();
1206 /* Mark transport as closed and wake up all pending tasks */
1207 xprt_disconnect_done(xprt);
1208 } 1250 }
1209 out: 1251 out:
1210 read_unlock(&sk->sk_callback_lock); 1252 read_unlock(&sk->sk_callback_lock);
@@ -1562,8 +1604,8 @@ static void xs_udp_connect_worker4(struct work_struct *work)
1562 xs_udp_finish_connecting(xprt, sock); 1604 xs_udp_finish_connecting(xprt, sock);
1563 status = 0; 1605 status = 0;
1564out: 1606out:
1565 xprt_wake_pending_tasks(xprt, status);
1566 xprt_clear_connecting(xprt); 1607 xprt_clear_connecting(xprt);
1608 xprt_wake_pending_tasks(xprt, status);
1567} 1609}
1568 1610
1569/** 1611/**
@@ -1604,8 +1646,8 @@ static void xs_udp_connect_worker6(struct work_struct *work)
1604 xs_udp_finish_connecting(xprt, sock); 1646 xs_udp_finish_connecting(xprt, sock);
1605 status = 0; 1647 status = 0;
1606out: 1648out:
1607 xprt_wake_pending_tasks(xprt, status);
1608 xprt_clear_connecting(xprt); 1649 xprt_clear_connecting(xprt);
1650 xprt_wake_pending_tasks(xprt, status);
1609} 1651}
1610 1652
1611/* 1653/*
@@ -1626,7 +1668,9 @@ static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transpo
1626 memset(&any, 0, sizeof(any)); 1668 memset(&any, 0, sizeof(any));
1627 any.sa_family = AF_UNSPEC; 1669 any.sa_family = AF_UNSPEC;
1628 result = kernel_connect(transport->sock, &any, sizeof(any), 0); 1670 result = kernel_connect(transport->sock, &any, sizeof(any), 0);
1629 if (result) 1671 if (!result)
1672 xs_sock_mark_closed(xprt);
1673 else
1630 dprintk("RPC: AF_UNSPEC connect return code %d\n", 1674 dprintk("RPC: AF_UNSPEC connect return code %d\n",
1631 result); 1675 result);
1632} 1676}
@@ -1702,6 +1746,7 @@ static void xs_tcp_connect_worker4(struct work_struct *work)
1702 goto out; 1746 goto out;
1703 1747
1704 if (!sock) { 1748 if (!sock) {
1749 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
1705 /* start from scratch */ 1750 /* start from scratch */
1706 if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { 1751 if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
1707 dprintk("RPC: can't create TCP transport socket (%d).\n", -err); 1752 dprintk("RPC: can't create TCP transport socket (%d).\n", -err);
@@ -1713,10 +1758,18 @@ static void xs_tcp_connect_worker4(struct work_struct *work)
1713 sock_release(sock); 1758 sock_release(sock);
1714 goto out; 1759 goto out;
1715 } 1760 }
1716 } else 1761 } else {
1762 int abort_and_exit;
1763
1764 abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT,
1765 &xprt->state);
1717 /* "close" the socket, preserving the local port */ 1766 /* "close" the socket, preserving the local port */
1718 xs_tcp_reuse_connection(xprt, transport); 1767 xs_tcp_reuse_connection(xprt, transport);
1719 1768
1769 if (abort_and_exit)
1770 goto out_eagain;
1771 }
1772
1720 dprintk("RPC: worker connecting xprt %p to address: %s\n", 1773 dprintk("RPC: worker connecting xprt %p to address: %s\n",
1721 xprt, xprt->address_strings[RPC_DISPLAY_ALL]); 1774 xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
1722 1775
@@ -1732,17 +1785,18 @@ static void xs_tcp_connect_worker4(struct work_struct *work)
1732 case 0: 1785 case 0:
1733 case -EINPROGRESS: 1786 case -EINPROGRESS:
1734 case -EALREADY: 1787 case -EALREADY:
1735 goto out_clear; 1788 xprt_clear_connecting(xprt);
1789 return;
1736 } 1790 }
1737 /* get rid of existing socket, and retry */ 1791 /* get rid of existing socket, and retry */
1738 xs_tcp_shutdown(xprt); 1792 xs_tcp_shutdown(xprt);
1739 printk("%s: connect returned unhandled error %d\n", 1793 printk("%s: connect returned unhandled error %d\n",
1740 __func__, status); 1794 __func__, status);
1795out_eagain:
1741 status = -EAGAIN; 1796 status = -EAGAIN;
1742out: 1797out:
1743 xprt_wake_pending_tasks(xprt, status);
1744out_clear:
1745 xprt_clear_connecting(xprt); 1798 xprt_clear_connecting(xprt);
1799 xprt_wake_pending_tasks(xprt, status);
1746} 1800}
1747 1801
1748/** 1802/**
@@ -1763,6 +1817,7 @@ static void xs_tcp_connect_worker6(struct work_struct *work)
1763 goto out; 1817 goto out;
1764 1818
1765 if (!sock) { 1819 if (!sock) {
1820 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
1766 /* start from scratch */ 1821 /* start from scratch */
1767 if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { 1822 if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
1768 dprintk("RPC: can't create TCP transport socket (%d).\n", -err); 1823 dprintk("RPC: can't create TCP transport socket (%d).\n", -err);
@@ -1774,10 +1829,18 @@ static void xs_tcp_connect_worker6(struct work_struct *work)
1774 sock_release(sock); 1829 sock_release(sock);
1775 goto out; 1830 goto out;
1776 } 1831 }
1777 } else 1832 } else {
1833 int abort_and_exit;
1834
1835 abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT,
1836 &xprt->state);
1778 /* "close" the socket, preserving the local port */ 1837 /* "close" the socket, preserving the local port */
1779 xs_tcp_reuse_connection(xprt, transport); 1838 xs_tcp_reuse_connection(xprt, transport);
1780 1839
1840 if (abort_and_exit)
1841 goto out_eagain;
1842 }
1843
1781 dprintk("RPC: worker connecting xprt %p to address: %s\n", 1844 dprintk("RPC: worker connecting xprt %p to address: %s\n",
1782 xprt, xprt->address_strings[RPC_DISPLAY_ALL]); 1845 xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
1783 1846
@@ -1792,17 +1855,18 @@ static void xs_tcp_connect_worker6(struct work_struct *work)
1792 case 0: 1855 case 0:
1793 case -EINPROGRESS: 1856 case -EINPROGRESS:
1794 case -EALREADY: 1857 case -EALREADY:
1795 goto out_clear; 1858 xprt_clear_connecting(xprt);
1859 return;
1796 } 1860 }
1797 /* get rid of existing socket, and retry */ 1861 /* get rid of existing socket, and retry */
1798 xs_tcp_shutdown(xprt); 1862 xs_tcp_shutdown(xprt);
1799 printk("%s: connect returned unhandled error %d\n", 1863 printk("%s: connect returned unhandled error %d\n",
1800 __func__, status); 1864 __func__, status);
1865out_eagain:
1801 status = -EAGAIN; 1866 status = -EAGAIN;
1802out: 1867out:
1803 xprt_wake_pending_tasks(xprt, status);
1804out_clear:
1805 xprt_clear_connecting(xprt); 1868 xprt_clear_connecting(xprt);
1869 xprt_wake_pending_tasks(xprt, status);
1806} 1870}
1807 1871
1808/** 1872/**