aboutsummaryrefslogtreecommitdiffstats
path: root/net/rds
diff options
context:
space:
mode:
authorSantosh Shilimkar <santosh.shilimkar@oracle.com>2016-07-05 01:35:15 -0400
committerSantosh Shilimkar <santosh.shilimkar@oracle.com>2017-01-02 17:02:59 -0500
commit3289025aedc018f8fd9d0e37fb9efa0c6d531ffa (patch)
tree251ddb4c6606d91809d98d40a22e18382025664e /net/rds
parentf9fb69adb6c7acca60977a4db5a5f95b8e66c041 (diff)
RDS: add receive message trace used by application
Socket option to tap receive path latency in various stages in nano seconds. It can be enabled on selective sockets using using SO_RDS_MSG_RXPATH_LATENCY socket option. RDS will return the data to application with RDS_CMSG_RXPATH_LATENCY in defined format. Scope is left to add more trace points for future without need of change in the interface. Reviewed-by: Sowmini Varadhan <sowmini.varadhan@oracle.com> Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Diffstat (limited to 'net/rds')
-rw-r--r--net/rds/af_rds.c28
-rw-r--r--net/rds/ib_recv.c4
-rw-r--r--net/rds/rds.h10
-rw-r--r--net/rds/recv.c32
-rw-r--r--net/rds/tcp_recv.c5
5 files changed, 76 insertions, 3 deletions
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 2ac1e6194be3..fd8217404162 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -298,6 +298,30 @@ static int rds_enable_recvtstamp(struct sock *sk, char __user *optval,
298 return 0; 298 return 0;
299} 299}
300 300
301static int rds_recv_track_latency(struct rds_sock *rs, char __user *optval,
302 int optlen)
303{
304 struct rds_rx_trace_so trace;
305 int i;
306
307 if (optlen != sizeof(struct rds_rx_trace_so))
308 return -EFAULT;
309
310 if (copy_from_user(&trace, optval, sizeof(trace)))
311 return -EFAULT;
312
313 rs->rs_rx_traces = trace.rx_traces;
314 for (i = 0; i < rs->rs_rx_traces; i++) {
315 if (trace.rx_trace_pos[i] > RDS_MSG_RX_DGRAM_TRACE_MAX) {
316 rs->rs_rx_traces = 0;
317 return -EFAULT;
318 }
319 rs->rs_rx_trace[i] = trace.rx_trace_pos[i];
320 }
321
322 return 0;
323}
324
301static int rds_setsockopt(struct socket *sock, int level, int optname, 325static int rds_setsockopt(struct socket *sock, int level, int optname,
302 char __user *optval, unsigned int optlen) 326 char __user *optval, unsigned int optlen)
303{ 327{
@@ -338,6 +362,9 @@ static int rds_setsockopt(struct socket *sock, int level, int optname,
338 ret = rds_enable_recvtstamp(sock->sk, optval, optlen); 362 ret = rds_enable_recvtstamp(sock->sk, optval, optlen);
339 release_sock(sock->sk); 363 release_sock(sock->sk);
340 break; 364 break;
365 case SO_RDS_MSG_RXPATH_LATENCY:
366 ret = rds_recv_track_latency(rs, optval, optlen);
367 break;
341 default: 368 default:
342 ret = -ENOPROTOOPT; 369 ret = -ENOPROTOOPT;
343 } 370 }
@@ -484,6 +511,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
484 INIT_LIST_HEAD(&rs->rs_cong_list); 511 INIT_LIST_HEAD(&rs->rs_cong_list);
485 spin_lock_init(&rs->rs_rdma_lock); 512 spin_lock_init(&rs->rs_rdma_lock);
486 rs->rs_rdma_keys = RB_ROOT; 513 rs->rs_rdma_keys = RB_ROOT;
514 rs->rs_rx_traces = 0;
487 515
488 spin_lock_bh(&rds_sock_lock); 516 spin_lock_bh(&rds_sock_lock);
489 list_add_tail(&rs->rs_item, &rds_sock_list); 517 list_add_tail(&rs->rs_item, &rds_sock_list);
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 4b0f12679219..e10624aa6959 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -911,8 +911,12 @@ static void rds_ib_process_recv(struct rds_connection *conn,
911 ic->i_ibinc = ibinc; 911 ic->i_ibinc = ibinc;
912 912
913 hdr = &ibinc->ii_inc.i_hdr; 913 hdr = &ibinc->ii_inc.i_hdr;
914 ibinc->ii_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] =
915 local_clock();
914 memcpy(hdr, ihdr, sizeof(*hdr)); 916 memcpy(hdr, ihdr, sizeof(*hdr));
915 ic->i_recv_data_rem = be32_to_cpu(hdr->h_len); 917 ic->i_recv_data_rem = be32_to_cpu(hdr->h_len);
918 ibinc->ii_inc.i_rx_lat_trace[RDS_MSG_RX_START] =
919 local_clock();
916 920
917 rdsdebug("ic %p ibinc %p rem %u flag 0x%x\n", ic, ibinc, 921 rdsdebug("ic %p ibinc %p rem %u flag 0x%x\n", ic, ibinc,
918 ic->i_recv_data_rem, hdr->h_flags); 922 ic->i_recv_data_rem, hdr->h_flags);
diff --git a/net/rds/rds.h b/net/rds/rds.h
index f713194e4620..07fff73dd4f3 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -253,6 +253,11 @@ struct rds_ext_header_rdma_dest {
253#define RDS_EXTHDR_GEN_NUM 6 253#define RDS_EXTHDR_GEN_NUM 6
254 254
255#define __RDS_EXTHDR_MAX 16 /* for now */ 255#define __RDS_EXTHDR_MAX 16 /* for now */
256#define RDS_RX_MAX_TRACES (RDS_MSG_RX_DGRAM_TRACE_MAX + 1)
257#define RDS_MSG_RX_HDR 0
258#define RDS_MSG_RX_START 1
259#define RDS_MSG_RX_END 2
260#define RDS_MSG_RX_CMSG 3
256 261
257struct rds_incoming { 262struct rds_incoming {
258 atomic_t i_refcount; 263 atomic_t i_refcount;
@@ -265,6 +270,7 @@ struct rds_incoming {
265 270
266 rds_rdma_cookie_t i_rdma_cookie; 271 rds_rdma_cookie_t i_rdma_cookie;
267 struct timeval i_rx_tstamp; 272 struct timeval i_rx_tstamp;
273 u64 i_rx_lat_trace[RDS_RX_MAX_TRACES];
268}; 274};
269 275
270struct rds_mr { 276struct rds_mr {
@@ -575,6 +581,10 @@ struct rds_sock {
575 unsigned char rs_recverr, 581 unsigned char rs_recverr,
576 rs_cong_monitor; 582 rs_cong_monitor;
577 u32 rs_hash_initval; 583 u32 rs_hash_initval;
584
585 /* Socket receive path trace points*/
586 u8 rs_rx_traces;
587 u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
578}; 588};
579 589
580static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk) 590static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
diff --git a/net/rds/recv.c b/net/rds/recv.c
index ba19eeeae85a..8b7e7b7f2c2d 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -43,6 +43,8 @@
43void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, 43void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
44 __be32 saddr) 44 __be32 saddr)
45{ 45{
46 int i;
47
46 atomic_set(&inc->i_refcount, 1); 48 atomic_set(&inc->i_refcount, 1);
47 INIT_LIST_HEAD(&inc->i_item); 49 INIT_LIST_HEAD(&inc->i_item);
48 inc->i_conn = conn; 50 inc->i_conn = conn;
@@ -50,6 +52,9 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
50 inc->i_rdma_cookie = 0; 52 inc->i_rdma_cookie = 0;
51 inc->i_rx_tstamp.tv_sec = 0; 53 inc->i_rx_tstamp.tv_sec = 0;
52 inc->i_rx_tstamp.tv_usec = 0; 54 inc->i_rx_tstamp.tv_usec = 0;
55
56 for (i = 0; i < RDS_RX_MAX_TRACES; i++)
57 inc->i_rx_lat_trace[i] = 0;
53} 58}
54EXPORT_SYMBOL_GPL(rds_inc_init); 59EXPORT_SYMBOL_GPL(rds_inc_init);
55 60
@@ -373,6 +378,7 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
373 if (sock_flag(sk, SOCK_RCVTSTAMP)) 378 if (sock_flag(sk, SOCK_RCVTSTAMP))
374 do_gettimeofday(&inc->i_rx_tstamp); 379 do_gettimeofday(&inc->i_rx_tstamp);
375 rds_inc_addref(inc); 380 rds_inc_addref(inc);
381 inc->i_rx_lat_trace[RDS_MSG_RX_END] = local_clock();
376 list_add_tail(&inc->i_item, &rs->rs_recv_queue); 382 list_add_tail(&inc->i_item, &rs->rs_recv_queue);
377 __rds_wake_sk_sleep(sk); 383 __rds_wake_sk_sleep(sk);
378 } else { 384 } else {
@@ -534,7 +540,7 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg,
534 ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RDMA_DEST, 540 ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RDMA_DEST,
535 sizeof(inc->i_rdma_cookie), &inc->i_rdma_cookie); 541 sizeof(inc->i_rdma_cookie), &inc->i_rdma_cookie);
536 if (ret) 542 if (ret)
537 return ret; 543 goto out;
538 } 544 }
539 545
540 if ((inc->i_rx_tstamp.tv_sec != 0) && 546 if ((inc->i_rx_tstamp.tv_sec != 0) &&
@@ -543,10 +549,30 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg,
543 sizeof(struct timeval), 549 sizeof(struct timeval),
544 &inc->i_rx_tstamp); 550 &inc->i_rx_tstamp);
545 if (ret) 551 if (ret)
546 return ret; 552 goto out;
547 } 553 }
548 554
549 return 0; 555 if (rs->rs_rx_traces) {
556 struct rds_cmsg_rx_trace t;
557 int i, j;
558
559 inc->i_rx_lat_trace[RDS_MSG_RX_CMSG] = local_clock();
560 t.rx_traces = rs->rs_rx_traces;
561 for (i = 0; i < rs->rs_rx_traces; i++) {
562 j = rs->rs_rx_trace[i];
563 t.rx_trace_pos[i] = j;
564 t.rx_trace[i] = inc->i_rx_lat_trace[j + 1] -
565 inc->i_rx_lat_trace[j];
566 }
567
568 ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RXPATH_LATENCY,
569 sizeof(t), &t);
570 if (ret)
571 goto out;
572 }
573
574out:
575 return ret;
550} 576}
551 577
552int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, 578int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index ad4892e97f91..e006ef8e6d40 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -180,6 +180,9 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
180 rdsdebug("alloced tinc %p\n", tinc); 180 rdsdebug("alloced tinc %p\n", tinc);
181 rds_inc_path_init(&tinc->ti_inc, cp, 181 rds_inc_path_init(&tinc->ti_inc, cp,
182 cp->cp_conn->c_faddr); 182 cp->cp_conn->c_faddr);
183 tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] =
184 local_clock();
185
183 /* 186 /*
184 * XXX * we might be able to use the __ variants when 187 * XXX * we might be able to use the __ variants when
185 * we've already serialized at a higher level. 188 * we've already serialized at a higher level.
@@ -204,6 +207,8 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
204 /* could be 0 for a 0 len message */ 207 /* could be 0 for a 0 len message */
205 tc->t_tinc_data_rem = 208 tc->t_tinc_data_rem =
206 be32_to_cpu(tinc->ti_inc.i_hdr.h_len); 209 be32_to_cpu(tinc->ti_inc.i_hdr.h_len);
210 tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_START] =
211 local_clock();
207 } 212 }
208 } 213 }
209 214