diff options
author | Santosh Shilimkar <santosh.shilimkar@oracle.com> | 2016-07-05 01:35:15 -0400 |
---|---|---|
committer | Santosh Shilimkar <santosh.shilimkar@oracle.com> | 2017-01-02 17:02:59 -0500 |
commit | 3289025aedc018f8fd9d0e37fb9efa0c6d531ffa (patch) | |
tree | 251ddb4c6606d91809d98d40a22e18382025664e /net/rds | |
parent | f9fb69adb6c7acca60977a4db5a5f95b8e66c041 (diff) |
RDS: add receive message trace used by application
Socket option to tap receive path latency in various stages
in nano seconds. It can be enabled on selective sockets using
using SO_RDS_MSG_RXPATH_LATENCY socket option. RDS will return
the data to application with RDS_CMSG_RXPATH_LATENCY in defined
format. Scope is left to add more trace points for future
without need of change in the interface.
Reviewed-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Diffstat (limited to 'net/rds')
-rw-r--r-- | net/rds/af_rds.c | 28 | ||||
-rw-r--r-- | net/rds/ib_recv.c | 4 | ||||
-rw-r--r-- | net/rds/rds.h | 10 | ||||
-rw-r--r-- | net/rds/recv.c | 32 | ||||
-rw-r--r-- | net/rds/tcp_recv.c | 5 |
5 files changed, 76 insertions, 3 deletions
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 2ac1e6194be3..fd8217404162 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c | |||
@@ -298,6 +298,30 @@ static int rds_enable_recvtstamp(struct sock *sk, char __user *optval, | |||
298 | return 0; | 298 | return 0; |
299 | } | 299 | } |
300 | 300 | ||
301 | static int rds_recv_track_latency(struct rds_sock *rs, char __user *optval, | ||
302 | int optlen) | ||
303 | { | ||
304 | struct rds_rx_trace_so trace; | ||
305 | int i; | ||
306 | |||
307 | if (optlen != sizeof(struct rds_rx_trace_so)) | ||
308 | return -EFAULT; | ||
309 | |||
310 | if (copy_from_user(&trace, optval, sizeof(trace))) | ||
311 | return -EFAULT; | ||
312 | |||
313 | rs->rs_rx_traces = trace.rx_traces; | ||
314 | for (i = 0; i < rs->rs_rx_traces; i++) { | ||
315 | if (trace.rx_trace_pos[i] > RDS_MSG_RX_DGRAM_TRACE_MAX) { | ||
316 | rs->rs_rx_traces = 0; | ||
317 | return -EFAULT; | ||
318 | } | ||
319 | rs->rs_rx_trace[i] = trace.rx_trace_pos[i]; | ||
320 | } | ||
321 | |||
322 | return 0; | ||
323 | } | ||
324 | |||
301 | static int rds_setsockopt(struct socket *sock, int level, int optname, | 325 | static int rds_setsockopt(struct socket *sock, int level, int optname, |
302 | char __user *optval, unsigned int optlen) | 326 | char __user *optval, unsigned int optlen) |
303 | { | 327 | { |
@@ -338,6 +362,9 @@ static int rds_setsockopt(struct socket *sock, int level, int optname, | |||
338 | ret = rds_enable_recvtstamp(sock->sk, optval, optlen); | 362 | ret = rds_enable_recvtstamp(sock->sk, optval, optlen); |
339 | release_sock(sock->sk); | 363 | release_sock(sock->sk); |
340 | break; | 364 | break; |
365 | case SO_RDS_MSG_RXPATH_LATENCY: | ||
366 | ret = rds_recv_track_latency(rs, optval, optlen); | ||
367 | break; | ||
341 | default: | 368 | default: |
342 | ret = -ENOPROTOOPT; | 369 | ret = -ENOPROTOOPT; |
343 | } | 370 | } |
@@ -484,6 +511,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol) | |||
484 | INIT_LIST_HEAD(&rs->rs_cong_list); | 511 | INIT_LIST_HEAD(&rs->rs_cong_list); |
485 | spin_lock_init(&rs->rs_rdma_lock); | 512 | spin_lock_init(&rs->rs_rdma_lock); |
486 | rs->rs_rdma_keys = RB_ROOT; | 513 | rs->rs_rdma_keys = RB_ROOT; |
514 | rs->rs_rx_traces = 0; | ||
487 | 515 | ||
488 | spin_lock_bh(&rds_sock_lock); | 516 | spin_lock_bh(&rds_sock_lock); |
489 | list_add_tail(&rs->rs_item, &rds_sock_list); | 517 | list_add_tail(&rs->rs_item, &rds_sock_list); |
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 4b0f12679219..e10624aa6959 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c | |||
@@ -911,8 +911,12 @@ static void rds_ib_process_recv(struct rds_connection *conn, | |||
911 | ic->i_ibinc = ibinc; | 911 | ic->i_ibinc = ibinc; |
912 | 912 | ||
913 | hdr = &ibinc->ii_inc.i_hdr; | 913 | hdr = &ibinc->ii_inc.i_hdr; |
914 | ibinc->ii_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] = | ||
915 | local_clock(); | ||
914 | memcpy(hdr, ihdr, sizeof(*hdr)); | 916 | memcpy(hdr, ihdr, sizeof(*hdr)); |
915 | ic->i_recv_data_rem = be32_to_cpu(hdr->h_len); | 917 | ic->i_recv_data_rem = be32_to_cpu(hdr->h_len); |
918 | ibinc->ii_inc.i_rx_lat_trace[RDS_MSG_RX_START] = | ||
919 | local_clock(); | ||
916 | 920 | ||
917 | rdsdebug("ic %p ibinc %p rem %u flag 0x%x\n", ic, ibinc, | 921 | rdsdebug("ic %p ibinc %p rem %u flag 0x%x\n", ic, ibinc, |
918 | ic->i_recv_data_rem, hdr->h_flags); | 922 | ic->i_recv_data_rem, hdr->h_flags); |
diff --git a/net/rds/rds.h b/net/rds/rds.h index f713194e4620..07fff73dd4f3 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h | |||
@@ -253,6 +253,11 @@ struct rds_ext_header_rdma_dest { | |||
253 | #define RDS_EXTHDR_GEN_NUM 6 | 253 | #define RDS_EXTHDR_GEN_NUM 6 |
254 | 254 | ||
255 | #define __RDS_EXTHDR_MAX 16 /* for now */ | 255 | #define __RDS_EXTHDR_MAX 16 /* for now */ |
256 | #define RDS_RX_MAX_TRACES (RDS_MSG_RX_DGRAM_TRACE_MAX + 1) | ||
257 | #define RDS_MSG_RX_HDR 0 | ||
258 | #define RDS_MSG_RX_START 1 | ||
259 | #define RDS_MSG_RX_END 2 | ||
260 | #define RDS_MSG_RX_CMSG 3 | ||
256 | 261 | ||
257 | struct rds_incoming { | 262 | struct rds_incoming { |
258 | atomic_t i_refcount; | 263 | atomic_t i_refcount; |
@@ -265,6 +270,7 @@ struct rds_incoming { | |||
265 | 270 | ||
266 | rds_rdma_cookie_t i_rdma_cookie; | 271 | rds_rdma_cookie_t i_rdma_cookie; |
267 | struct timeval i_rx_tstamp; | 272 | struct timeval i_rx_tstamp; |
273 | u64 i_rx_lat_trace[RDS_RX_MAX_TRACES]; | ||
268 | }; | 274 | }; |
269 | 275 | ||
270 | struct rds_mr { | 276 | struct rds_mr { |
@@ -575,6 +581,10 @@ struct rds_sock { | |||
575 | unsigned char rs_recverr, | 581 | unsigned char rs_recverr, |
576 | rs_cong_monitor; | 582 | rs_cong_monitor; |
577 | u32 rs_hash_initval; | 583 | u32 rs_hash_initval; |
584 | |||
585 | /* Socket receive path trace points*/ | ||
586 | u8 rs_rx_traces; | ||
587 | u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX]; | ||
578 | }; | 588 | }; |
579 | 589 | ||
580 | static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk) | 590 | static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk) |
diff --git a/net/rds/recv.c b/net/rds/recv.c index ba19eeeae85a..8b7e7b7f2c2d 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c | |||
@@ -43,6 +43,8 @@ | |||
43 | void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, | 43 | void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, |
44 | __be32 saddr) | 44 | __be32 saddr) |
45 | { | 45 | { |
46 | int i; | ||
47 | |||
46 | atomic_set(&inc->i_refcount, 1); | 48 | atomic_set(&inc->i_refcount, 1); |
47 | INIT_LIST_HEAD(&inc->i_item); | 49 | INIT_LIST_HEAD(&inc->i_item); |
48 | inc->i_conn = conn; | 50 | inc->i_conn = conn; |
@@ -50,6 +52,9 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, | |||
50 | inc->i_rdma_cookie = 0; | 52 | inc->i_rdma_cookie = 0; |
51 | inc->i_rx_tstamp.tv_sec = 0; | 53 | inc->i_rx_tstamp.tv_sec = 0; |
52 | inc->i_rx_tstamp.tv_usec = 0; | 54 | inc->i_rx_tstamp.tv_usec = 0; |
55 | |||
56 | for (i = 0; i < RDS_RX_MAX_TRACES; i++) | ||
57 | inc->i_rx_lat_trace[i] = 0; | ||
53 | } | 58 | } |
54 | EXPORT_SYMBOL_GPL(rds_inc_init); | 59 | EXPORT_SYMBOL_GPL(rds_inc_init); |
55 | 60 | ||
@@ -373,6 +378,7 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, | |||
373 | if (sock_flag(sk, SOCK_RCVTSTAMP)) | 378 | if (sock_flag(sk, SOCK_RCVTSTAMP)) |
374 | do_gettimeofday(&inc->i_rx_tstamp); | 379 | do_gettimeofday(&inc->i_rx_tstamp); |
375 | rds_inc_addref(inc); | 380 | rds_inc_addref(inc); |
381 | inc->i_rx_lat_trace[RDS_MSG_RX_END] = local_clock(); | ||
376 | list_add_tail(&inc->i_item, &rs->rs_recv_queue); | 382 | list_add_tail(&inc->i_item, &rs->rs_recv_queue); |
377 | __rds_wake_sk_sleep(sk); | 383 | __rds_wake_sk_sleep(sk); |
378 | } else { | 384 | } else { |
@@ -534,7 +540,7 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg, | |||
534 | ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RDMA_DEST, | 540 | ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RDMA_DEST, |
535 | sizeof(inc->i_rdma_cookie), &inc->i_rdma_cookie); | 541 | sizeof(inc->i_rdma_cookie), &inc->i_rdma_cookie); |
536 | if (ret) | 542 | if (ret) |
537 | return ret; | 543 | goto out; |
538 | } | 544 | } |
539 | 545 | ||
540 | if ((inc->i_rx_tstamp.tv_sec != 0) && | 546 | if ((inc->i_rx_tstamp.tv_sec != 0) && |
@@ -543,10 +549,30 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg, | |||
543 | sizeof(struct timeval), | 549 | sizeof(struct timeval), |
544 | &inc->i_rx_tstamp); | 550 | &inc->i_rx_tstamp); |
545 | if (ret) | 551 | if (ret) |
546 | return ret; | 552 | goto out; |
547 | } | 553 | } |
548 | 554 | ||
549 | return 0; | 555 | if (rs->rs_rx_traces) { |
556 | struct rds_cmsg_rx_trace t; | ||
557 | int i, j; | ||
558 | |||
559 | inc->i_rx_lat_trace[RDS_MSG_RX_CMSG] = local_clock(); | ||
560 | t.rx_traces = rs->rs_rx_traces; | ||
561 | for (i = 0; i < rs->rs_rx_traces; i++) { | ||
562 | j = rs->rs_rx_trace[i]; | ||
563 | t.rx_trace_pos[i] = j; | ||
564 | t.rx_trace[i] = inc->i_rx_lat_trace[j + 1] - | ||
565 | inc->i_rx_lat_trace[j]; | ||
566 | } | ||
567 | |||
568 | ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RXPATH_LATENCY, | ||
569 | sizeof(t), &t); | ||
570 | if (ret) | ||
571 | goto out; | ||
572 | } | ||
573 | |||
574 | out: | ||
575 | return ret; | ||
550 | } | 576 | } |
551 | 577 | ||
552 | int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, | 578 | int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, |
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index ad4892e97f91..e006ef8e6d40 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c | |||
@@ -180,6 +180,9 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, | |||
180 | rdsdebug("alloced tinc %p\n", tinc); | 180 | rdsdebug("alloced tinc %p\n", tinc); |
181 | rds_inc_path_init(&tinc->ti_inc, cp, | 181 | rds_inc_path_init(&tinc->ti_inc, cp, |
182 | cp->cp_conn->c_faddr); | 182 | cp->cp_conn->c_faddr); |
183 | tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] = | ||
184 | local_clock(); | ||
185 | |||
183 | /* | 186 | /* |
184 | * XXX * we might be able to use the __ variants when | 187 | * XXX * we might be able to use the __ variants when |
185 | * we've already serialized at a higher level. | 188 | * we've already serialized at a higher level. |
@@ -204,6 +207,8 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, | |||
204 | /* could be 0 for a 0 len message */ | 207 | /* could be 0 for a 0 len message */ |
205 | tc->t_tinc_data_rem = | 208 | tc->t_tinc_data_rem = |
206 | be32_to_cpu(tinc->ti_inc.i_hdr.h_len); | 209 | be32_to_cpu(tinc->ti_inc.i_hdr.h_len); |
210 | tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_START] = | ||
211 | local_clock(); | ||
207 | } | 212 | } |
208 | } | 213 | } |
209 | 214 | ||