diff options
author | Andy Grover <andy.grover@oracle.com> | 2009-08-21 08:28:31 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-08-23 22:13:02 -0400 |
commit | 70041088e3b976627ba9a183b812f39ef8a9ba0e (patch) | |
tree | bad7b11763d7b02b185bd705fe5ed292397cbc7a /net/rds/tcp_connect.c | |
parent | 7d6fd5e7e97a2188d56441e4e96494c21c5994a7 (diff) |
RDS: Add TCP transport to RDS
This code allows RDS to be tunneled over a TCP connection.
RDMA operations are disabled when using TCP transport,
but this frees RDS from the IB/RDMA stack dependency, and allows
it to be used with standard Ethernet adapters, or in a VM.
Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/rds/tcp_connect.c')
-rw-r--r-- | net/rds/tcp_connect.c | 153 |
1 files changed, 153 insertions, 0 deletions
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c new file mode 100644 index 000000000000..211522f9a9a2 --- /dev/null +++ b/net/rds/tcp_connect.c | |||
@@ -0,0 +1,153 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/kernel.h> | ||
34 | #include <linux/in.h> | ||
35 | #include <net/tcp.h> | ||
36 | |||
37 | #include "rds.h" | ||
38 | #include "tcp.h" | ||
39 | |||
40 | void rds_tcp_state_change(struct sock *sk) | ||
41 | { | ||
42 | void (*state_change)(struct sock *sk); | ||
43 | struct rds_connection *conn; | ||
44 | struct rds_tcp_connection *tc; | ||
45 | |||
46 | read_lock(&sk->sk_callback_lock); | ||
47 | conn = sk->sk_user_data; | ||
48 | if (conn == NULL) { | ||
49 | state_change = sk->sk_state_change; | ||
50 | goto out; | ||
51 | } | ||
52 | tc = conn->c_transport_data; | ||
53 | state_change = tc->t_orig_state_change; | ||
54 | |||
55 | rdsdebug("sock %p state_change to %d\n", tc->t_sock, sk->sk_state); | ||
56 | |||
57 | switch(sk->sk_state) { | ||
58 | /* ignore connecting sockets as they make progress */ | ||
59 | case TCP_SYN_SENT: | ||
60 | case TCP_SYN_RECV: | ||
61 | break; | ||
62 | case TCP_ESTABLISHED: | ||
63 | rds_connect_complete(conn); | ||
64 | break; | ||
65 | case TCP_CLOSE: | ||
66 | rds_conn_drop(conn); | ||
67 | default: | ||
68 | break; | ||
69 | } | ||
70 | out: | ||
71 | read_unlock(&sk->sk_callback_lock); | ||
72 | state_change(sk); | ||
73 | } | ||
74 | |||
75 | int rds_tcp_conn_connect(struct rds_connection *conn) | ||
76 | { | ||
77 | struct socket *sock = NULL; | ||
78 | struct sockaddr_in src, dest; | ||
79 | int ret; | ||
80 | |||
81 | ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); | ||
82 | if (ret < 0) | ||
83 | goto out; | ||
84 | |||
85 | rds_tcp_tune(sock); | ||
86 | |||
87 | src.sin_family = AF_INET; | ||
88 | src.sin_addr.s_addr = (__force u32)conn->c_laddr; | ||
89 | src.sin_port = (__force u16)htons(0); | ||
90 | |||
91 | ret = sock->ops->bind(sock, (struct sockaddr *)&src, sizeof(src)); | ||
92 | if (ret) { | ||
93 | rdsdebug("bind failed with %d at address %u.%u.%u.%u\n", | ||
94 | ret, NIPQUAD(conn->c_laddr)); | ||
95 | goto out; | ||
96 | } | ||
97 | |||
98 | dest.sin_family = AF_INET; | ||
99 | dest.sin_addr.s_addr = (__force u32)conn->c_faddr; | ||
100 | dest.sin_port = (__force u16)htons(RDS_TCP_PORT); | ||
101 | |||
102 | /* | ||
103 | * once we call connect() we can start getting callbacks and they | ||
104 | * own the socket | ||
105 | */ | ||
106 | rds_tcp_set_callbacks(sock, conn); | ||
107 | ret = sock->ops->connect(sock, (struct sockaddr *)&dest, sizeof(dest), | ||
108 | O_NONBLOCK); | ||
109 | sock = NULL; | ||
110 | |||
111 | rdsdebug("connect to address %u.%u.%u.%u returned %d\n", | ||
112 | NIPQUAD(conn->c_faddr), ret); | ||
113 | if (ret == -EINPROGRESS) | ||
114 | ret = 0; | ||
115 | |||
116 | out: | ||
117 | if (sock) | ||
118 | sock_release(sock); | ||
119 | return ret; | ||
120 | } | ||
121 | |||
122 | /* | ||
123 | * Before killing the tcp socket this needs to serialize with callbacks. The | ||
124 | * caller has already grabbed the sending sem so we're serialized with other | ||
125 | * senders. | ||
126 | * | ||
127 | * TCP calls the callbacks with the sock lock so we hold it while we reset the | ||
128 | * callbacks to those set by TCP. Our callbacks won't execute again once we | ||
129 | * hold the sock lock. | ||
130 | */ | ||
131 | void rds_tcp_conn_shutdown(struct rds_connection *conn) | ||
132 | { | ||
133 | struct rds_tcp_connection *tc = conn->c_transport_data; | ||
134 | struct socket *sock = tc->t_sock; | ||
135 | |||
136 | rdsdebug("shutting down conn %p tc %p sock %p\n", conn, tc, sock); | ||
137 | |||
138 | if (sock) { | ||
139 | sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN); | ||
140 | lock_sock(sock->sk); | ||
141 | rds_tcp_restore_callbacks(sock, tc); /* tc->tc_sock = NULL */ | ||
142 | |||
143 | release_sock(sock->sk); | ||
144 | sock_release(sock); | ||
145 | }; | ||
146 | |||
147 | if (tc->t_tinc) { | ||
148 | rds_inc_put(&tc->t_tinc->ti_inc); | ||
149 | tc->t_tinc = NULL; | ||
150 | } | ||
151 | tc->t_tinc_hdr_rem = sizeof(struct rds_header); | ||
152 | tc->t_tinc_data_rem = 0; | ||
153 | } | ||