aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorDave Olson <dave.olson@qlogic.com>2010-06-17 19:13:49 -0400
committerRoland Dreier <rolandd@cisco.com>2010-07-21 14:39:36 -0400
commitbdf8edcb5732df554ef53096092e94d22ab5ffc9 (patch)
treede347dfe7dc0b21fb78a5eeb0d79e59c83ebcde9 /drivers/infiniband
parent2d978a953b874bac418e0b9475edd1b9125281f6 (diff)
IB/qib: Allow PSM to select from multiple port assignment algorithms
We used to allow only full specification, or using all contexts within an HCA before moving to the next HCA. We now allow an additional method -- round-robining through HCAs -- and make that the default. Signed-off-by: Dave Olson <dave.olson@qlogic.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/qib/qib_common.h16
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c203
2 files changed, 118 insertions, 101 deletions
diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h
index b3955ed8f794..145da4040883 100644
--- a/drivers/infiniband/hw/qib/qib_common.h
+++ b/drivers/infiniband/hw/qib/qib_common.h
@@ -279,7 +279,7 @@ struct qib_base_info {
279 * may not be implemented; the user code must deal with this if it 279 * may not be implemented; the user code must deal with this if it
280 * cares, or it must abort after initialization reports the difference. 280 * cares, or it must abort after initialization reports the difference.
281 */ 281 */
282#define QIB_USER_SWMINOR 10 282#define QIB_USER_SWMINOR 11
283 283
284#define QIB_USER_SWVERSION ((QIB_USER_SWMAJOR << 16) | QIB_USER_SWMINOR) 284#define QIB_USER_SWVERSION ((QIB_USER_SWMAJOR << 16) | QIB_USER_SWMINOR)
285 285
@@ -302,6 +302,18 @@ struct qib_base_info {
302#define QIB_KERN_SWVERSION ((QIB_KERN_TYPE << 31) | QIB_USER_SWVERSION) 302#define QIB_KERN_SWVERSION ((QIB_KERN_TYPE << 31) | QIB_USER_SWVERSION)
303 303
304/* 304/*
305 * If the unit is specified via open, HCA choice is fixed. If port is
306 * specified, it's also fixed. Otherwise we try to spread contexts
307 * across ports and HCAs, using different algorithims. WITHIN is
308 * the old default, prior to this mechanism.
309 */
310#define QIB_PORT_ALG_ACROSS 0 /* round robin contexts across HCAs, then
311 * ports; this is the default */
312#define QIB_PORT_ALG_WITHIN 1 /* use all contexts on an HCA (round robin
313 * active ports within), then next HCA */
314#define QIB_PORT_ALG_COUNT 2 /* number of algorithm choices */
315
316/*
305 * This structure is passed to qib_userinit() to tell the driver where 317 * This structure is passed to qib_userinit() to tell the driver where
306 * user code buffers are, sizes, etc. The offsets and sizes of the 318 * user code buffers are, sizes, etc. The offsets and sizes of the
307 * fields must remain unchanged, for binary compatibility. It can 319 * fields must remain unchanged, for binary compatibility. It can
@@ -319,7 +331,7 @@ struct qib_user_info {
319 /* size of struct base_info to write to */ 331 /* size of struct base_info to write to */
320 __u32 spu_base_info_size; 332 __u32 spu_base_info_size;
321 333
322 __u32 _spu_unused3; 334 __u32 spu_port_alg; /* which QIB_PORT_ALG_*; unused user minor < 11 */
323 335
324 /* 336 /*
325 * If two or more processes wish to share a context, each process 337 * If two or more processes wish to share a context, each process
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index a142a9eb5226..6b11645edf35 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -1294,128 +1294,130 @@ bail:
1294 return ret; 1294 return ret;
1295} 1295}
1296 1296
1297static inline int usable(struct qib_pportdata *ppd, int active_only) 1297static inline int usable(struct qib_pportdata *ppd)
1298{ 1298{
1299 struct qib_devdata *dd = ppd->dd; 1299 struct qib_devdata *dd = ppd->dd;
1300 u32 linkok = active_only ? QIBL_LINKACTIVE :
1301 (QIBL_LINKINIT | QIBL_LINKARMED | QIBL_LINKACTIVE);
1302 1300
1303 return dd && (dd->flags & QIB_PRESENT) && dd->kregbase && ppd->lid && 1301 return dd && (dd->flags & QIB_PRESENT) && dd->kregbase && ppd->lid &&
1304 (ppd->lflags & linkok); 1302 (ppd->lflags & QIBL_LINKACTIVE);
1305} 1303}
1306 1304
1307static int find_free_ctxt(int unit, struct file *fp, 1305/*
1308 const struct qib_user_info *uinfo) 1306 * Select a context on the given device, either using a requested port
1307 * or the port based on the context number.
1308 */
1309static int choose_port_ctxt(struct file *fp, struct qib_devdata *dd, u32 port,
1310 const struct qib_user_info *uinfo)
1309{ 1311{
1310 struct qib_devdata *dd = qib_lookup(unit);
1311 struct qib_pportdata *ppd = NULL; 1312 struct qib_pportdata *ppd = NULL;
1312 int ret; 1313 int ret, ctxt;
1313 u32 ctxt;
1314 1314
1315 if (!dd || (uinfo->spu_port && uinfo->spu_port > dd->num_pports)) { 1315 if (port) {
1316 ret = -ENODEV; 1316 if (!usable(dd->pport + port - 1)) {
1317 goto bail;
1318 }
1319
1320 /*
1321 * If users requests specific port, only try that one port, else
1322 * select "best" port below, based on context.
1323 */
1324 if (uinfo->spu_port) {
1325 ppd = dd->pport + uinfo->spu_port - 1;
1326 if (!usable(ppd, 0)) {
1327 ret = -ENETDOWN; 1317 ret = -ENETDOWN;
1328 goto bail; 1318 goto done;
1329 } 1319 } else
1320 ppd = dd->pport + port - 1;
1330 } 1321 }
1331 1322 for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts && dd->rcd[ctxt];
1332 for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts; ctxt++) { 1323 ctxt++)
1333 if (dd->rcd[ctxt]) 1324 ;
1334 continue; 1325 if (ctxt == dd->cfgctxts) {
1335 /* 1326 ret = -EBUSY;
1336 * The setting and clearing of user context rcd[x] protected 1327 goto done;
1337 * by the qib_mutex 1328 }
1338 */ 1329 if (!ppd) {
1339 if (!ppd) { 1330 u32 pidx = ctxt % dd->num_pports;
1340 /* choose port based on ctxt, if up, else 1st up */ 1331 if (usable(dd->pport + pidx))
1341 ppd = dd->pport + (ctxt % dd->num_pports); 1332 ppd = dd->pport + pidx;
1342 if (!usable(ppd, 0)) { 1333 else {
1343 int i; 1334 for (pidx = 0; pidx < dd->num_pports && !ppd;
1344 for (i = 0; i < dd->num_pports; i++) { 1335 pidx++)
1345 ppd = dd->pport + i; 1336 if (usable(dd->pport + pidx))
1346 if (usable(ppd, 0)) 1337 ppd = dd->pport + pidx;
1347 break;
1348 }
1349 if (i == dd->num_pports) {
1350 ret = -ENETDOWN;
1351 goto bail;
1352 }
1353 }
1354 } 1338 }
1355 ret = setup_ctxt(ppd, ctxt, fp, uinfo);
1356 goto bail;
1357 } 1339 }
1358 ret = -EBUSY; 1340 ret = ppd ? setup_ctxt(ppd, ctxt, fp, uinfo) : -ENETDOWN;
1341done:
1342 return ret;
1343}
1344
1345static int find_free_ctxt(int unit, struct file *fp,
1346 const struct qib_user_info *uinfo)
1347{
1348 struct qib_devdata *dd = qib_lookup(unit);
1349 int ret;
1350
1351 if (!dd || (uinfo->spu_port && uinfo->spu_port > dd->num_pports))
1352 ret = -ENODEV;
1353 else
1354 ret = choose_port_ctxt(fp, dd, uinfo->spu_port, uinfo);
1359 1355
1360bail:
1361 return ret; 1356 return ret;
1362} 1357}
1363 1358
1364static int get_a_ctxt(struct file *fp, const struct qib_user_info *uinfo) 1359static int get_a_ctxt(struct file *fp, const struct qib_user_info *uinfo,
1360 unsigned alg)
1365{ 1361{
1366 struct qib_pportdata *ppd; 1362 struct qib_devdata *udd = NULL;
1367 int ret = 0, devmax; 1363 int ret = 0, devmax, npresent, nup, ndev, dusable = 0, i;
1368 int npresent, nup;
1369 int ndev;
1370 u32 port = uinfo->spu_port, ctxt; 1364 u32 port = uinfo->spu_port, ctxt;
1371 1365
1372 devmax = qib_count_units(&npresent, &nup); 1366 devmax = qib_count_units(&npresent, &nup);
1367 if (!npresent) {
1368 ret = -ENXIO;
1369 goto done;
1370 }
1371 if (nup == 0) {
1372 ret = -ENETDOWN;
1373 goto done;
1374 }
1373 1375
1374 for (ndev = 0; ndev < devmax; ndev++) { 1376 if (alg == QIB_PORT_ALG_ACROSS) {
1375 struct qib_devdata *dd = qib_lookup(ndev); 1377 unsigned inuse = ~0U;
1376 1378 /* find device (with ACTIVE ports) with fewest ctxts in use */
1377 /* device portion of usable() */ 1379 for (ndev = 0; ndev < devmax; ndev++) {
1378 if (!(dd && (dd->flags & QIB_PRESENT) && dd->kregbase)) 1380 struct qib_devdata *dd = qib_lookup(ndev);
1379 continue; 1381 unsigned cused = 0, cfree = 0;
1380 for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts; ctxt++) { 1382 if (!dd)
1381 if (dd->rcd[ctxt])
1382 continue; 1383 continue;
1383 if (port) { 1384 if (port && port <= dd->num_pports &&
1384 if (port > dd->num_pports) 1385 usable(dd->pport + port - 1))
1385 continue; 1386 dusable = 1;
1386 ppd = dd->pport + port - 1; 1387 else
1387 if (!usable(ppd, 0)) 1388 for (i = 0; i < dd->num_pports; i++)
1388 continue; 1389 if (usable(dd->pport + i))
1389 } else { 1390 dusable++;
1390 /* 1391 if (!dusable)
1391 * choose port based on ctxt, if up, else 1392 continue;
1392 * first port that's up for multi-port HCA 1393 for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts;
1393 */ 1394 ctxt++)
1394 ppd = dd->pport + (ctxt % dd->num_pports); 1395 if (dd->rcd[ctxt])
1395 if (!usable(ppd, 0)) { 1396 cused++;
1396 int j; 1397 else
1397 1398 cfree++;
1398 ppd = NULL; 1399 if (cfree && cused < inuse) {
1399 for (j = 0; j < dd->num_pports && 1400 udd = dd;
1400 !ppd; j++) 1401 inuse = cused;
1401 if (usable(dd->pport + j, 0))
1402 ppd = dd->pport + j;
1403 if (!ppd)
1404 continue; /* to next unit */
1405 }
1406 } 1402 }
1407 ret = setup_ctxt(ppd, ctxt, fp, uinfo); 1403 }
1404 if (udd) {
1405 ret = choose_port_ctxt(fp, udd, port, uinfo);
1408 goto done; 1406 goto done;
1409 } 1407 }
1408 } else {
1409 for (ndev = 0; ndev < devmax; ndev++) {
1410 struct qib_devdata *dd = qib_lookup(ndev);
1411 if (dd) {
1412 ret = choose_port_ctxt(fp, dd, port, uinfo);
1413 if (!ret)
1414 goto done;
1415 if (ret == -EBUSY)
1416 dusable++;
1417 }
1418 }
1410 } 1419 }
1411 1420 ret = dusable ? -EBUSY : -ENETDOWN;
1412 if (npresent) {
1413 if (nup == 0)
1414 ret = -ENETDOWN;
1415 else
1416 ret = -EBUSY;
1417 } else
1418 ret = -ENXIO;
1419 1421
1420done: 1422done:
1421 return ret; 1423 return ret;
@@ -1481,7 +1483,7 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
1481{ 1483{
1482 int ret; 1484 int ret;
1483 int i_minor; 1485 int i_minor;
1484 unsigned swmajor, swminor; 1486 unsigned swmajor, swminor, alg = QIB_PORT_ALG_ACROSS;
1485 1487
1486 /* Check to be sure we haven't already initialized this file */ 1488 /* Check to be sure we haven't already initialized this file */
1487 if (ctxt_fp(fp)) { 1489 if (ctxt_fp(fp)) {
@@ -1498,6 +1500,9 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
1498 1500
1499 swminor = uinfo->spu_userversion & 0xffff; 1501 swminor = uinfo->spu_userversion & 0xffff;
1500 1502
1503 if (swminor >= 11 && uinfo->spu_port_alg < QIB_PORT_ALG_COUNT)
1504 alg = uinfo->spu_port_alg;
1505
1501 mutex_lock(&qib_mutex); 1506 mutex_lock(&qib_mutex);
1502 1507
1503 if (qib_compatible_subctxts(swmajor, swminor) && 1508 if (qib_compatible_subctxts(swmajor, swminor) &&
@@ -1514,7 +1519,7 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
1514 if (i_minor) 1519 if (i_minor)
1515 ret = find_free_ctxt(i_minor - 1, fp, uinfo); 1520 ret = find_free_ctxt(i_minor - 1, fp, uinfo);
1516 else 1521 else
1517 ret = get_a_ctxt(fp, uinfo); 1522 ret = get_a_ctxt(fp, uinfo, alg);
1518 1523
1519done_chk_sdma: 1524done_chk_sdma:
1520 if (!ret) { 1525 if (!ret) {
@@ -1862,7 +1867,7 @@ static int disarm_req_delay(struct qib_ctxtdata *rcd)
1862{ 1867{
1863 int ret = 0; 1868 int ret = 0;
1864 1869
1865 if (!usable(rcd->ppd, 1)) { 1870 if (!usable(rcd->ppd)) {
1866 int i; 1871 int i;
1867 /* 1872 /*
1868 * if link is down, or otherwise not usable, delay 1873 * if link is down, or otherwise not usable, delay
@@ -1881,7 +1886,7 @@ static int disarm_req_delay(struct qib_ctxtdata *rcd)
1881 set_bit(_QIB_EVENT_DISARM_BUFS_BIT, 1886 set_bit(_QIB_EVENT_DISARM_BUFS_BIT,
1882 &rcd->user_event_mask[i]); 1887 &rcd->user_event_mask[i]);
1883 } 1888 }
1884 for (i = 0; !usable(rcd->ppd, 1) && i < 300; i++) 1889 for (i = 0; !usable(rcd->ppd) && i < 300; i++)
1885 msleep(100); 1890 msleep(100);
1886 ret = -ENETDOWN; 1891 ret = -ENETDOWN;
1887 } 1892 }