aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorRoland Dreier <rolandd@cisco.com>2007-05-07 00:02:48 -0400
committerRoland Dreier <rolandd@cisco.com>2007-05-07 00:18:11 -0400
commited23a72778f3dbd465e55b06fe31629e7e1dd2f3 (patch)
tree99ab8b4cf7c51ae64b4d3d9108e82b31db2b3465 /include
parentf4fd0b224d60044d2da5ca02f8f2b5150c1d8731 (diff)
IB: Return "maybe missed event" hint from ib_req_notify_cq()
The semantics defined by the InfiniBand specification say that completion events are only generated when a completions is added to a completion queue (CQ) after completion notification is requested. In other words, this means that the following race is possible: while (CQ is not empty) ib_poll_cq(CQ); // new completion is added after while loop is exited ib_req_notify_cq(CQ); // no event is generated for the existing completion To close this race, the IB spec recommends doing another poll of the CQ after requesting notification. However, it is not always possible to arrange code this way (for example, we have found that NAPI for IPoIB cannot poll after requesting notification). Also, some hardware (eg Mellanox HCAs) actually will generate an event for completions added before the call to ib_req_notify_cq() -- which is allowed by the spec, since there's no way for any upper-layer consumer to know exactly when a completion was really added -- so the extra poll of the CQ is just a waste. Motivated by this, we add a new flag "IB_CQ_REPORT_MISSED_EVENTS" for ib_req_notify_cq() so that it can return a hint about whether the a completion may have been added before the request for notification. The return value of ib_req_notify_cq() is extended so: < 0 means an error occurred while requesting notification == 0 means notification was requested successfully, and if IB_CQ_REPORT_MISSED_EVENTS was passed in, then no events were missed and it is safe to wait for another event. > 0 is only returned if IB_CQ_REPORT_MISSED_EVENTS was passed in. It means that the consumer must poll the CQ again to make sure it is empty to avoid the race described above. We add a flag to enable this behavior rather than turning it on unconditionally, because checking for missed events may incur significant overhead for some low-level drivers, and consumers that don't care about the results of this test shouldn't be forced to pay for the test. Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'include')
-rw-r--r--include/rdma/ib_verbs.h40
1 files changed, 31 insertions, 9 deletions
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 17cc309d03ef..5342ac64ed1a 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -431,9 +431,11 @@ struct ib_wc {
431 u8 port_num; /* valid only for DR SMPs on switches */ 431 u8 port_num; /* valid only for DR SMPs on switches */
432}; 432};
433 433
434enum ib_cq_notify { 434enum ib_cq_notify_flags {
435 IB_CQ_SOLICITED, 435 IB_CQ_SOLICITED = 1 << 0,
436 IB_CQ_NEXT_COMP 436 IB_CQ_NEXT_COMP = 1 << 1,
437 IB_CQ_SOLICITED_MASK = IB_CQ_SOLICITED | IB_CQ_NEXT_COMP,
438 IB_CQ_REPORT_MISSED_EVENTS = 1 << 2,
437}; 439};
438 440
439enum ib_srq_attr_mask { 441enum ib_srq_attr_mask {
@@ -990,7 +992,7 @@ struct ib_device {
990 struct ib_wc *wc); 992 struct ib_wc *wc);
991 int (*peek_cq)(struct ib_cq *cq, int wc_cnt); 993 int (*peek_cq)(struct ib_cq *cq, int wc_cnt);
992 int (*req_notify_cq)(struct ib_cq *cq, 994 int (*req_notify_cq)(struct ib_cq *cq,
993 enum ib_cq_notify cq_notify); 995 enum ib_cq_notify_flags flags);
994 int (*req_ncomp_notif)(struct ib_cq *cq, 996 int (*req_ncomp_notif)(struct ib_cq *cq,
995 int wc_cnt); 997 int wc_cnt);
996 struct ib_mr * (*get_dma_mr)(struct ib_pd *pd, 998 struct ib_mr * (*get_dma_mr)(struct ib_pd *pd,
@@ -1419,14 +1421,34 @@ int ib_peek_cq(struct ib_cq *cq, int wc_cnt);
1419/** 1421/**
1420 * ib_req_notify_cq - Request completion notification on a CQ. 1422 * ib_req_notify_cq - Request completion notification on a CQ.
1421 * @cq: The CQ to generate an event for. 1423 * @cq: The CQ to generate an event for.
1422 * @cq_notify: If set to %IB_CQ_SOLICITED, completion notification will 1424 * @flags:
1423 * occur on the next solicited event. If set to %IB_CQ_NEXT_COMP, 1425 * Must contain exactly one of %IB_CQ_SOLICITED or %IB_CQ_NEXT_COMP
1424 * notification will occur on the next completion. 1426 * to request an event on the next solicited event or next work
1427 * completion at any type, respectively. %IB_CQ_REPORT_MISSED_EVENTS
1428 * may also be |ed in to request a hint about missed events, as
1429 * described below.
1430 *
1431 * Return Value:
1432 * < 0 means an error occurred while requesting notification
1433 * == 0 means notification was requested successfully, and if
1434 * IB_CQ_REPORT_MISSED_EVENTS was passed in, then no events
1435 * were missed and it is safe to wait for another event. In
1436 * this case is it guaranteed that any work completions added
1437 * to the CQ since the last CQ poll will trigger a completion
1438 * notification event.
1439 * > 0 is only returned if IB_CQ_REPORT_MISSED_EVENTS was passed
1440 * in. It means that the consumer must poll the CQ again to
1441 * make sure it is empty to avoid missing an event because of a
1442 * race between requesting notification and an entry being
1443 * added to the CQ. This return value means it is possible
1444 * (but not guaranteed) that a work completion has been added
1445 * to the CQ since the last poll without triggering a
1446 * completion notification event.
1425 */ 1447 */
1426static inline int ib_req_notify_cq(struct ib_cq *cq, 1448static inline int ib_req_notify_cq(struct ib_cq *cq,
1427 enum ib_cq_notify cq_notify) 1449 enum ib_cq_notify_flags flags)
1428{ 1450{
1429 return cq->device->req_notify_cq(cq, cq_notify); 1451 return cq->device->req_notify_cq(cq, flags);
1430} 1452}
1431 1453
1432/** 1454/**