aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLeon Romanovsky <leonro@mellanox.com>2018-01-28 04:17:25 -0500
committerJason Gunthorpe <jgg@mellanox.com>2018-01-29 22:21:41 -0500
commitb5fa635aab8f0d39a824c01991266a6d06f007fb (patch)
tree0ddaf7eafe6711a4f37bffb2e5c7b8a191c45e3a /drivers
parentbf3c5a93c52368410a521af34ed3bff91a99df44 (diff)
RDMA/nldev: Provide detailed QP information
Implement RDMA nldev netlink interface to get detailed information on each QP in the system. This includes the owning process or kernel ULP and detailed information from the qp_attrs. Currently only the dumpit variant is implemented. Reviewed-by: Mark Bloch <markb@mellanox.com> Signed-off-by: Leon Romanovsky <leonro@mellanox.com> Reviewed-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/infiniband/core/nldev.c227
1 files changed, 227 insertions, 0 deletions
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index c37bb041f647..fa8655e3b3ed 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -59,6 +59,18 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
59 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME] = { .type = NLA_NUL_STRING, 59 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME] = { .type = NLA_NUL_STRING,
60 .len = 16 }, 60 .len = 16 },
61 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR] = { .type = NLA_U64 }, 61 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR] = { .type = NLA_U64 },
62 [RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED },
63 [RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED },
64 [RDMA_NLDEV_ATTR_RES_LQPN] = { .type = NLA_U32 },
65 [RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 },
66 [RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 },
67 [RDMA_NLDEV_ATTR_RES_SQ_PSN] = { .type = NLA_U32 },
68 [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 },
69 [RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 },
70 [RDMA_NLDEV_ATTR_RES_STATE] = { .type = NLA_U8 },
71 [RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 },
72 [RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING,
73 .len = TASK_COMM_LEN },
62}; 74};
63 75
64static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device) 76static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
@@ -200,6 +212,78 @@ err:
200 return ret; 212 return ret;
201} 213}
202 214
215static int fill_res_qp_entry(struct sk_buff *msg,
216 struct ib_qp *qp, uint32_t port)
217{
218 struct rdma_restrack_entry *res = &qp->res;
219 struct ib_qp_init_attr qp_init_attr;
220 struct nlattr *entry_attr;
221 struct ib_qp_attr qp_attr;
222 int ret;
223
224 ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
225 if (ret)
226 return ret;
227
228 if (port && port != qp_attr.port_num)
229 return 0;
230
231 entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
232 if (!entry_attr)
233 goto out;
234
235 /* In create_qp() port is not set yet */
236 if (qp_attr.port_num &&
237 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num))
238 goto err;
239
240 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num))
241 goto err;
242 if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
243 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
244 qp_attr.dest_qp_num))
245 goto err;
246 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
247 qp_attr.rq_psn))
248 goto err;
249 }
250
251 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
252 goto err;
253
254 if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
255 qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
256 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
257 qp_attr.path_mig_state))
258 goto err;
259 }
260 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
261 goto err;
262 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
263 goto err;
264
265 /*
266 * Existence of task means that it is user QP and netlink
267 * user is invited to go and read /proc/PID/comm to get name
268 * of the task file and res->task_com should be NULL.
269 */
270 if (rdma_is_kernel_res(res)) {
271 if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, res->kern_name))
272 goto err;
273 } else {
274 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, task_pid_vnr(res->task)))
275 goto err;
276 }
277
278 nla_nest_end(msg, entry_attr);
279 return 0;
280
281err:
282 nla_nest_cancel(msg, entry_attr);
283out:
284 return -EMSGSIZE;
285}
286
203static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 287static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
204 struct netlink_ext_ack *extack) 288 struct netlink_ext_ack *extack)
205{ 289{
@@ -472,6 +556,136 @@ static int nldev_res_get_dumpit(struct sk_buff *skb,
472 return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb); 556 return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
473} 557}
474 558
559static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
560 struct netlink_callback *cb)
561{
562 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
563 struct rdma_restrack_entry *res;
564 int err, ret = 0, idx = 0;
565 struct nlattr *table_attr;
566 struct ib_device *device;
567 int start = cb->args[0];
568 struct ib_qp *qp = NULL;
569 struct nlmsghdr *nlh;
570 u32 index, port = 0;
571
572 err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
573 nldev_policy, NULL);
574 /*
575 * Right now, we are expecting the device index to get QP information,
576 * but it is possible to extend this code to return all devices in
577 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
578 * if it doesn't exist, we will iterate over all devices.
579 *
580 * But it is not needed for now.
581 */
582 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
583 return -EINVAL;
584
585 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
586 device = ib_device_get_by_index(index);
587 if (!device)
588 return -EINVAL;
589
590 /*
591 * If no PORT_INDEX is supplied, we will return all QPs from that device
592 */
593 if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
594 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
595 if (!rdma_is_port_valid(device, port)) {
596 ret = -EINVAL;
597 goto err_index;
598 }
599 }
600
601 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
602 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_QP_GET),
603 0, NLM_F_MULTI);
604
605 if (fill_nldev_handle(skb, device)) {
606 ret = -EMSGSIZE;
607 goto err;
608 }
609
610 table_attr = nla_nest_start(skb, RDMA_NLDEV_ATTR_RES_QP);
611 if (!table_attr) {
612 ret = -EMSGSIZE;
613 goto err;
614 }
615
616 down_read(&device->res.rwsem);
617 hash_for_each_possible(device->res.hash, res, node, RDMA_RESTRACK_QP) {
618 if (idx < start)
619 goto next;
620
621 if ((rdma_is_kernel_res(res) &&
622 task_active_pid_ns(current) != &init_pid_ns) ||
623 (!rdma_is_kernel_res(res) &&
624 task_active_pid_ns(current) != task_active_pid_ns(res->task)))
625 /*
626 * 1. Kernel QPs should be visible in init namspace only
627 * 2. Present only QPs visible in the current namespace
628 */
629 goto next;
630
631 if (!rdma_restrack_get(res))
632 /*
633 * Resource is under release now, but we are not
634 * relesing lock now, so it will be released in
635 * our next pass, once we will get ->next pointer.
636 */
637 goto next;
638
639 qp = container_of(res, struct ib_qp, res);
640
641 up_read(&device->res.rwsem);
642 ret = fill_res_qp_entry(skb, qp, port);
643 down_read(&device->res.rwsem);
644 /*
645 * Return resource back, but it won't be released till
646 * the &device->res.rwsem will be released for write.
647 */
648 rdma_restrack_put(res);
649
650 if (ret == -EMSGSIZE)
651 /*
652 * There is a chance to optimize here.
653 * It can be done by using list_prepare_entry
654 * and list_for_each_entry_continue afterwards.
655 */
656 break;
657 if (ret)
658 goto res_err;
659next: idx++;
660 }
661 up_read(&device->res.rwsem);
662
663 nla_nest_end(skb, table_attr);
664 nlmsg_end(skb, nlh);
665 cb->args[0] = idx;
666
667 /*
668 * No more QPs to fill, cancel the message and
669 * return 0 to mark end of dumpit.
670 */
671 if (!qp)
672 goto err;
673
674 put_device(&device->dev);
675 return skb->len;
676
677res_err:
678 nla_nest_cancel(skb, table_attr);
679 up_read(&device->res.rwsem);
680
681err:
682 nlmsg_cancel(skb, nlh);
683
684err_index:
685 put_device(&device->dev);
686 return ret;
687}
688
475static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { 689static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
476 [RDMA_NLDEV_CMD_GET] = { 690 [RDMA_NLDEV_CMD_GET] = {
477 .doit = nldev_get_doit, 691 .doit = nldev_get_doit,
@@ -485,6 +699,19 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
485 .doit = nldev_res_get_doit, 699 .doit = nldev_res_get_doit,
486 .dump = nldev_res_get_dumpit, 700 .dump = nldev_res_get_dumpit,
487 }, 701 },
702 [RDMA_NLDEV_CMD_RES_QP_GET] = {
703 .dump = nldev_res_get_qp_dumpit,
704 /*
705 * .doit is not implemented yet for two reasons:
706 * 1. It is not needed yet.
707 * 2. There is a need to provide identifier, while it is easy
708 * for the QPs (device index + port index + LQPN), it is not
709 * the case for the rest of resources (PD and CQ). Because it
710 * is better to provide similar interface for all resources,
711 * let's wait till we will have other resources implemented
712 * too.
713 */
714 },
488}; 715};
489 716
490void __init nldev_init(void) 717void __init nldev_init(void)