summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorSowmini Varadhan <sowmini.varadhan@oracle.com>2016-11-16 16:29:49 -0500
committerDavid S. Miller <davem@davemloft.net>2016-11-17 13:35:18 -0500
commit905dd4184e0732de41d6ee3c7b06e0cfdd9f0aad (patch)
tree4bc06e636fa2587cc040c64935b70f8fce0a61ac /net
parent315ca6d98ed3fd7abe235637c28dd2f9f0a77795 (diff)
RDS: TCP: Track peer's connection generation number
The RDS transport has to be able to distinguish between two types of failure events: (a) when the transport fails (e.g., TCP connection reset) but the RDS socket/connection layer on both sides stays the same (b) when the peer's RDS layer itself resets (e.g., due to module reload or machine reboot at the peer) In case (a) both sides must reconnect and continue the RDS messaging without any message loss or disruption to the message sequence numbers, and this is achieved by rds_send_path_reset(). In case (b) we should reset all rds_connection state to the new incarnation of the peer. Examples of state that needs to be reset are next expected rx sequence number from, or messages to be retransmitted to, the new incarnation of the peer. To achieve this, the RDS handshake probe added as part of commit 5916e2c1554f ("RDS: TCP: Enable multipath RDS for TCP") is enhanced so that sender and receiver of the RDS ping-probe will add a generation number as part of the RDS_EXTHDR_GEN_NUM extension header. Each peer stores local and remote generation numbers as part of each rds_connection. Changes in generation number will be detected via incoming handshake probe ping request or response and will allow the receiver to reset rds_connection state. Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com> Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/rds/af_rds.c4
-rw-r--r--net/rds/connection.c2
-rw-r--r--net/rds/message.c1
-rw-r--r--net/rds/rds.h8
-rw-r--r--net/rds/recv.c36
-rw-r--r--net/rds/send.c9
6 files changed, 57 insertions, 3 deletions
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 6beaeb1138f3..2ac1e6194be3 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -605,10 +605,14 @@ static void rds_exit(void)
605} 605}
606module_exit(rds_exit); 606module_exit(rds_exit);
607 607
608u32 rds_gen_num;
609
608static int rds_init(void) 610static int rds_init(void)
609{ 611{
610 int ret; 612 int ret;
611 613
614 net_get_random_once(&rds_gen_num, sizeof(rds_gen_num));
615
612 ret = rds_bind_lock_init(); 616 ret = rds_bind_lock_init();
613 if (ret) 617 if (ret)
614 goto out; 618 goto out;
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 13f459dad4ef..b86e188bde32 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -269,6 +269,8 @@ static struct rds_connection *__rds_conn_create(struct net *net,
269 kmem_cache_free(rds_conn_slab, conn); 269 kmem_cache_free(rds_conn_slab, conn);
270 conn = found; 270 conn = found;
271 } else { 271 } else {
272 conn->c_my_gen_num = rds_gen_num;
273 conn->c_peer_gen_num = 0;
272 hlist_add_head_rcu(&conn->c_hash_node, head); 274 hlist_add_head_rcu(&conn->c_hash_node, head);
273 rds_cong_add_conn(conn); 275 rds_cong_add_conn(conn);
274 rds_conn_count++; 276 rds_conn_count++;
diff --git a/net/rds/message.c b/net/rds/message.c
index 6cb91061556a..49bfb512d808 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -42,6 +42,7 @@ static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = {
42[RDS_EXTHDR_RDMA] = sizeof(struct rds_ext_header_rdma), 42[RDS_EXTHDR_RDMA] = sizeof(struct rds_ext_header_rdma),
43[RDS_EXTHDR_RDMA_DEST] = sizeof(struct rds_ext_header_rdma_dest), 43[RDS_EXTHDR_RDMA_DEST] = sizeof(struct rds_ext_header_rdma_dest),
44[RDS_EXTHDR_NPATHS] = sizeof(u16), 44[RDS_EXTHDR_NPATHS] = sizeof(u16),
45[RDS_EXTHDR_GEN_NUM] = sizeof(u32),
45}; 46};
46 47
47 48
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 4121e1862444..ebbf909b87ec 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -151,6 +151,9 @@ struct rds_connection {
151 151
152 struct rds_conn_path c_path[RDS_MPATH_WORKERS]; 152 struct rds_conn_path c_path[RDS_MPATH_WORKERS];
153 wait_queue_head_t c_hs_waitq; /* handshake waitq */ 153 wait_queue_head_t c_hs_waitq; /* handshake waitq */
154
155 u32 c_my_gen_num;
156 u32 c_peer_gen_num;
154}; 157};
155 158
156static inline 159static inline
@@ -243,7 +246,8 @@ struct rds_ext_header_rdma_dest {
243/* Extension header announcing number of paths. 246/* Extension header announcing number of paths.
244 * Implicit length = 2 bytes. 247 * Implicit length = 2 bytes.
245 */ 248 */
246#define RDS_EXTHDR_NPATHS 4 249#define RDS_EXTHDR_NPATHS 5
250#define RDS_EXTHDR_GEN_NUM 6
247 251
248#define __RDS_EXTHDR_MAX 16 /* for now */ 252#define __RDS_EXTHDR_MAX 16 /* for now */
249 253
@@ -338,6 +342,7 @@ static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
338#define RDS_MSG_RETRANSMITTED 5 342#define RDS_MSG_RETRANSMITTED 5
339#define RDS_MSG_MAPPED 6 343#define RDS_MSG_MAPPED 6
340#define RDS_MSG_PAGEVEC 7 344#define RDS_MSG_PAGEVEC 7
345#define RDS_MSG_FLUSH 8
341 346
342struct rds_message { 347struct rds_message {
343 atomic_t m_refcount; 348 atomic_t m_refcount;
@@ -664,6 +669,7 @@ void rds_cong_exit(void);
664struct rds_message *rds_cong_update_alloc(struct rds_connection *conn); 669struct rds_message *rds_cong_update_alloc(struct rds_connection *conn);
665 670
666/* conn.c */ 671/* conn.c */
672extern u32 rds_gen_num;
667int rds_conn_init(void); 673int rds_conn_init(void);
668void rds_conn_exit(void); 674void rds_conn_exit(void);
669struct rds_connection *rds_conn_create(struct net *net, 675struct rds_connection *rds_conn_create(struct net *net,
diff --git a/net/rds/recv.c b/net/rds/recv.c
index cbfabdf3ff48..9d0666e5fe35 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -120,6 +120,36 @@ static void rds_recv_rcvbuf_delta(struct rds_sock *rs, struct sock *sk,
120 /* do nothing if no change in cong state */ 120 /* do nothing if no change in cong state */
121} 121}
122 122
123static void rds_conn_peer_gen_update(struct rds_connection *conn,
124 u32 peer_gen_num)
125{
126 int i;
127 struct rds_message *rm, *tmp;
128 unsigned long flags;
129
130 WARN_ON(conn->c_trans->t_type != RDS_TRANS_TCP);
131 if (peer_gen_num != 0) {
132 if (conn->c_peer_gen_num != 0 &&
133 peer_gen_num != conn->c_peer_gen_num) {
134 for (i = 0; i < RDS_MPATH_WORKERS; i++) {
135 struct rds_conn_path *cp;
136
137 cp = &conn->c_path[i];
138 spin_lock_irqsave(&cp->cp_lock, flags);
139 cp->cp_next_tx_seq = 1;
140 cp->cp_next_rx_seq = 0;
141 list_for_each_entry_safe(rm, tmp,
142 &cp->cp_retrans,
143 m_conn_item) {
144 set_bit(RDS_MSG_FLUSH, &rm->m_flags);
145 }
146 spin_unlock_irqrestore(&cp->cp_lock, flags);
147 }
148 }
149 conn->c_peer_gen_num = peer_gen_num;
150 }
151}
152
123/* 153/*
124 * Process all extension headers that come with this message. 154 * Process all extension headers that come with this message.
125 */ 155 */
@@ -163,7 +193,9 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr,
163 union { 193 union {
164 struct rds_ext_header_version version; 194 struct rds_ext_header_version version;
165 u16 rds_npaths; 195 u16 rds_npaths;
196 u32 rds_gen_num;
166 } buffer; 197 } buffer;
198 u32 new_peer_gen_num = 0;
167 199
168 while (1) { 200 while (1) {
169 len = sizeof(buffer); 201 len = sizeof(buffer);
@@ -176,6 +208,9 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr,
176 conn->c_npaths = min_t(int, RDS_MPATH_WORKERS, 208 conn->c_npaths = min_t(int, RDS_MPATH_WORKERS,
177 buffer.rds_npaths); 209 buffer.rds_npaths);
178 break; 210 break;
211 case RDS_EXTHDR_GEN_NUM:
212 new_peer_gen_num = buffer.rds_gen_num;
213 break;
179 default: 214 default:
180 pr_warn_ratelimited("ignoring unknown exthdr type " 215 pr_warn_ratelimited("ignoring unknown exthdr type "
181 "0x%x\n", type); 216 "0x%x\n", type);
@@ -183,6 +218,7 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr,
183 } 218 }
184 /* if RDS_EXTHDR_NPATHS was not found, default to a single-path */ 219 /* if RDS_EXTHDR_NPATHS was not found, default to a single-path */
185 conn->c_npaths = max_t(int, conn->c_npaths, 1); 220 conn->c_npaths = max_t(int, conn->c_npaths, 1);
221 rds_conn_peer_gen_update(conn, new_peer_gen_num);
186} 222}
187 223
188/* rds_start_mprds() will synchronously start multiple paths when appropriate. 224/* rds_start_mprds() will synchronously start multiple paths when appropriate.
diff --git a/net/rds/send.c b/net/rds/send.c
index 896626b9a0ef..77c8c6e613ad 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -259,8 +259,9 @@ restart:
259 * connection. 259 * connection.
260 * Therefore, we never retransmit messages with RDMA ops. 260 * Therefore, we never retransmit messages with RDMA ops.
261 */ 261 */
262 if (rm->rdma.op_active && 262 if (test_bit(RDS_MSG_FLUSH, &rm->m_flags) ||
263 test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) { 263 (rm->rdma.op_active &&
264 test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags))) {
264 spin_lock_irqsave(&cp->cp_lock, flags); 265 spin_lock_irqsave(&cp->cp_lock, flags);
265 if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) 266 if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags))
266 list_move(&rm->m_conn_item, &to_be_dropped); 267 list_move(&rm->m_conn_item, &to_be_dropped);
@@ -1209,6 +1210,10 @@ rds_send_probe(struct rds_conn_path *cp, __be16 sport,
1209 rds_message_add_extension(&rm->m_inc.i_hdr, 1210 rds_message_add_extension(&rm->m_inc.i_hdr,
1210 RDS_EXTHDR_NPATHS, &npaths, 1211 RDS_EXTHDR_NPATHS, &npaths,
1211 sizeof(npaths)); 1212 sizeof(npaths));
1213 rds_message_add_extension(&rm->m_inc.i_hdr,
1214 RDS_EXTHDR_GEN_NUM,
1215 &cp->cp_conn->c_my_gen_num,
1216 sizeof(u32));
1212 } 1217 }
1213 spin_unlock_irqrestore(&cp->cp_lock, flags); 1218 spin_unlock_irqrestore(&cp->cp_lock, flags);
1214 1219