aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorMike Marciniszyn <mike.marciniszyn@intel.com>2012-07-19 09:04:04 -0400
committerRoland Dreier <roland@purestorage.com>2012-07-19 14:20:04 -0400
commit36a8f01cd24b125aa027c71c1288588edde5322d (patch)
treee8f3e5dd99bac1a750b7113c00d973cec12cd23c /drivers/infiniband
parent551ace124d0ef471e8a5fee3ef9e5bb7460251be (diff)
IB/qib: Add congestion control agent implementation
Add a congestion control agent in the driver that handles gets and sets from the congestion control manager in the fabric for the Performance Scale Messaging (PSM) library. Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/qib/qib.h35
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c96
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c315
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.h198
-rw-r--r--drivers/infiniband/hw/qib/qib_sysfs.c158
5 files changed, 790 insertions, 12 deletions
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index cbe577151457..6e19ec844d99 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -519,6 +519,7 @@ struct qib_pportdata {
519 struct qib_devdata *dd; 519 struct qib_devdata *dd;
520 struct qib_chippport_specific *cpspec; /* chip-specific per-port */ 520 struct qib_chippport_specific *cpspec; /* chip-specific per-port */
521 struct kobject pport_kobj; 521 struct kobject pport_kobj;
522 struct kobject pport_cc_kobj;
522 struct kobject sl2vl_kobj; 523 struct kobject sl2vl_kobj;
523 struct kobject diagc_kobj; 524 struct kobject diagc_kobj;
524 525
@@ -638,6 +639,39 @@ struct qib_pportdata {
638 struct timer_list led_override_timer; 639 struct timer_list led_override_timer;
639 struct xmit_wait cong_stats; 640 struct xmit_wait cong_stats;
640 struct timer_list symerr_clear_timer; 641 struct timer_list symerr_clear_timer;
642
643 /* Synchronize access between driver writes and sysfs reads */
644 spinlock_t cc_shadow_lock
645 ____cacheline_aligned_in_smp;
646
647 /* Shadow copy of the congestion control table */
648 struct cc_table_shadow *ccti_entries_shadow;
649
650 /* Shadow copy of the congestion control entries */
651 struct ib_cc_congestion_setting_attr_shadow *congestion_entries_shadow;
652
653 /* List of congestion control table entries */
654 struct ib_cc_table_entry_shadow *ccti_entries;
655
656 /* 16 congestion entries with each entry corresponding to a SL */
657 struct ib_cc_congestion_entry_shadow *congestion_entries;
658
659 /* Total number of congestion control table entries */
660 u16 total_cct_entry;
661
662 /* Bit map identifying service level */
663 u16 cc_sl_control_map;
664
665 /* maximum congestion control table index */
666 u16 ccti_limit;
667
668 /* CA's max number of 64 entry units in the congestion control table */
669 u8 cc_max_table_entries;
670
671 /* Maximum number of congestion control entries that the agent expects
672 * the manager to send.
673 */
674 u8 cc_supported_table_entries;
641}; 675};
642 676
643/* Observers. Not to be taken lightly, possibly not to ship. */ 677/* Observers. Not to be taken lightly, possibly not to ship. */
@@ -1078,6 +1112,7 @@ extern u32 qib_cpulist_count;
1078extern unsigned long *qib_cpulist; 1112extern unsigned long *qib_cpulist;
1079 1113
1080extern unsigned qib_wc_pat; 1114extern unsigned qib_wc_pat;
1115extern unsigned qib_cc_table_size;
1081int qib_init(struct qib_devdata *, int); 1116int qib_init(struct qib_devdata *, int);
1082int init_chip_wc_pat(struct qib_devdata *dd, u32); 1117int init_chip_wc_pat(struct qib_devdata *dd, u32);
1083int qib_enable_wc(struct qib_devdata *dd); 1118int qib_enable_wc(struct qib_devdata *dd);
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 306e65e99e99..24ad901c95c4 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -41,6 +41,7 @@
41 41
42#include "qib.h" 42#include "qib.h"
43#include "qib_common.h" 43#include "qib_common.h"
44#include "qib_mad.h"
44 45
45/* 46/*
46 * min buffers we want to have per context, after driver 47 * min buffers we want to have per context, after driver
@@ -71,6 +72,9 @@ unsigned qib_n_krcv_queues;
71module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO); 72module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO);
72MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port"); 73MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port");
73 74
75unsigned qib_cc_table_size;
76module_param_named(cc_table_size, qib_cc_table_size, uint, S_IRUGO);
77MODULE_PARM_DESC(cc_table_size, "Congestion control table entries 0 (CCA disabled - default), min = 128, max = 1984");
74/* 78/*
75 * qib_wc_pat parameter: 79 * qib_wc_pat parameter:
76 * 0 is WC via MTRR 80 * 0 is WC via MTRR
@@ -199,6 +203,7 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt)
199void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, 203void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
200 u8 hw_pidx, u8 port) 204 u8 hw_pidx, u8 port)
201{ 205{
206 int size;
202 ppd->dd = dd; 207 ppd->dd = dd;
203 ppd->hw_pidx = hw_pidx; 208 ppd->hw_pidx = hw_pidx;
204 ppd->port = port; /* IB port number, not index */ 209 ppd->port = port; /* IB port number, not index */
@@ -212,6 +217,81 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
212 ppd->symerr_clear_timer.data = (unsigned long)ppd; 217 ppd->symerr_clear_timer.data = (unsigned long)ppd;
213 218
214 ppd->qib_wq = NULL; 219 ppd->qib_wq = NULL;
220
221 spin_lock_init(&ppd->cc_shadow_lock);
222
223 if (qib_cc_table_size < IB_CCT_MIN_ENTRIES)
224 goto bail;
225
226 ppd->cc_supported_table_entries = min(max_t(int, qib_cc_table_size,
227 IB_CCT_MIN_ENTRIES), IB_CCT_ENTRIES*IB_CC_TABLE_CAP_DEFAULT);
228
229 ppd->cc_max_table_entries =
230 ppd->cc_supported_table_entries/IB_CCT_ENTRIES;
231
232 size = IB_CC_TABLE_CAP_DEFAULT * sizeof(struct ib_cc_table_entry)
233 * IB_CCT_ENTRIES;
234 ppd->ccti_entries = kzalloc(size, GFP_KERNEL);
235 if (!ppd->ccti_entries) {
236 qib_dev_err(dd,
237 "failed to allocate congestion control table for port %d!\n",
238 port);
239 goto bail;
240 }
241
242 size = IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry);
243 ppd->congestion_entries = kzalloc(size, GFP_KERNEL);
244 if (!ppd->congestion_entries) {
245 qib_dev_err(dd,
246 "failed to allocate congestion setting list for port %d!\n",
247 port);
248 goto bail_1;
249 }
250
251 size = sizeof(struct cc_table_shadow);
252 ppd->ccti_entries_shadow = kzalloc(size, GFP_KERNEL);
253 if (!ppd->ccti_entries_shadow) {
254 qib_dev_err(dd,
255 "failed to allocate shadow ccti list for port %d!\n",
256 port);
257 goto bail_2;
258 }
259
260 size = sizeof(struct ib_cc_congestion_setting_attr);
261 ppd->congestion_entries_shadow = kzalloc(size, GFP_KERNEL);
262 if (!ppd->congestion_entries_shadow) {
263 qib_dev_err(dd,
264 "failed to allocate shadow congestion setting list for port %d!\n",
265 port);
266 goto bail_3;
267 }
268
269 return;
270
271bail_3:
272 kfree(ppd->ccti_entries_shadow);
273 ppd->ccti_entries_shadow = NULL;
274bail_2:
275 kfree(ppd->congestion_entries);
276 ppd->congestion_entries = NULL;
277bail_1:
278 kfree(ppd->ccti_entries);
279 ppd->ccti_entries = NULL;
280bail:
281 /* User is intentionally disabling the congestion control agent */
282 if (!qib_cc_table_size)
283 return;
284
285 if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) {
286 qib_cc_table_size = 0;
287 qib_dev_err(dd,
288 "Congestion Control table size %d less than minimum %d for port %d\n",
289 qib_cc_table_size, IB_CCT_MIN_ENTRIES, port);
290 }
291
292 qib_dev_err(dd, "Congestion Control Agent disabled for port %d\n",
293 port);
294 return;
215} 295}
216 296
217static int init_pioavailregs(struct qib_devdata *dd) 297static int init_pioavailregs(struct qib_devdata *dd)
@@ -1164,10 +1244,24 @@ static void cleanup_device_data(struct qib_devdata *dd)
1164 unsigned long flags; 1244 unsigned long flags;
1165 1245
1166 /* users can't do anything more with chip */ 1246 /* users can't do anything more with chip */
1167 for (pidx = 0; pidx < dd->num_pports; ++pidx) 1247 for (pidx = 0; pidx < dd->num_pports; ++pidx) {
1168 if (dd->pport[pidx].statusp) 1248 if (dd->pport[pidx].statusp)
1169 *dd->pport[pidx].statusp &= ~QIB_STATUS_CHIP_PRESENT; 1249 *dd->pport[pidx].statusp &= ~QIB_STATUS_CHIP_PRESENT;
1170 1250
1251 spin_lock(&dd->pport[pidx].cc_shadow_lock);
1252
1253 kfree(dd->pport[pidx].congestion_entries);
1254 dd->pport[pidx].congestion_entries = NULL;
1255 kfree(dd->pport[pidx].ccti_entries);
1256 dd->pport[pidx].ccti_entries = NULL;
1257 kfree(dd->pport[pidx].ccti_entries_shadow);
1258 dd->pport[pidx].ccti_entries_shadow = NULL;
1259 kfree(dd->pport[pidx].congestion_entries_shadow);
1260 dd->pport[pidx].congestion_entries_shadow = NULL;
1261
1262 spin_unlock(&dd->pport[pidx].cc_shadow_lock);
1263 }
1264
1171 if (!qib_wc_pat) 1265 if (!qib_wc_pat)
1172 qib_disable_wc(dd); 1266 qib_disable_wc(dd);
1173 1267
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index 6e20b58b90b6..19f1e6c45fb6 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -49,6 +49,18 @@ static int reply(struct ib_smp *smp)
49 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; 49 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
50} 50}
51 51
52static int reply_failure(struct ib_smp *smp)
53{
54 /*
55 * The verbs framework will handle the directed/LID route
56 * packet changes.
57 */
58 smp->method = IB_MGMT_METHOD_GET_RESP;
59 if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
60 smp->status |= IB_SMP_DIRECTION;
61 return IB_MAD_RESULT_FAILURE | IB_MAD_RESULT_REPLY;
62}
63
52static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) 64static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len)
53{ 65{
54 struct ib_mad_send_buf *send_buf; 66 struct ib_mad_send_buf *send_buf;
@@ -2047,6 +2059,298 @@ bail:
2047 return ret; 2059 return ret;
2048} 2060}
2049 2061
2062static int cc_get_classportinfo(struct ib_cc_mad *ccp,
2063 struct ib_device *ibdev)
2064{
2065 struct ib_cc_classportinfo_attr *p =
2066 (struct ib_cc_classportinfo_attr *)ccp->mgmt_data;
2067
2068 memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
2069
2070 p->base_version = 1;
2071 p->class_version = 1;
2072 p->cap_mask = 0;
2073
2074 /*
2075 * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
2076 */
2077 p->resp_time_value = 18;
2078
2079 return reply((struct ib_smp *) ccp);
2080}
2081
2082static int cc_get_congestion_info(struct ib_cc_mad *ccp,
2083 struct ib_device *ibdev, u8 port)
2084{
2085 struct ib_cc_info_attr *p =
2086 (struct ib_cc_info_attr *)ccp->mgmt_data;
2087 struct qib_ibport *ibp = to_iport(ibdev, port);
2088 struct qib_pportdata *ppd = ppd_from_ibp(ibp);
2089
2090 memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
2091
2092 p->congestion_info = 0;
2093 p->control_table_cap = ppd->cc_max_table_entries;
2094
2095 return reply((struct ib_smp *) ccp);
2096}
2097
2098static int cc_get_congestion_setting(struct ib_cc_mad *ccp,
2099 struct ib_device *ibdev, u8 port)
2100{
2101 int i;
2102 struct ib_cc_congestion_setting_attr *p =
2103 (struct ib_cc_congestion_setting_attr *)ccp->mgmt_data;
2104 struct qib_ibport *ibp = to_iport(ibdev, port);
2105 struct qib_pportdata *ppd = ppd_from_ibp(ibp);
2106 struct ib_cc_congestion_entry_shadow *entries;
2107
2108 memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
2109
2110 spin_lock(&ppd->cc_shadow_lock);
2111
2112 entries = ppd->congestion_entries_shadow->entries;
2113 p->port_control = cpu_to_be16(
2114 ppd->congestion_entries_shadow->port_control);
2115 p->control_map = cpu_to_be16(
2116 ppd->congestion_entries_shadow->control_map);
2117 for (i = 0; i < IB_CC_CCS_ENTRIES; i++) {
2118 p->entries[i].ccti_increase = entries[i].ccti_increase;
2119 p->entries[i].ccti_timer = cpu_to_be16(entries[i].ccti_timer);
2120 p->entries[i].trigger_threshold = entries[i].trigger_threshold;
2121 p->entries[i].ccti_min = entries[i].ccti_min;
2122 }
2123
2124 spin_unlock(&ppd->cc_shadow_lock);
2125
2126 return reply((struct ib_smp *) ccp);
2127}
2128
2129static int cc_get_congestion_control_table(struct ib_cc_mad *ccp,
2130 struct ib_device *ibdev, u8 port)
2131{
2132 struct ib_cc_table_attr *p =
2133 (struct ib_cc_table_attr *)ccp->mgmt_data;
2134 struct qib_ibport *ibp = to_iport(ibdev, port);
2135 struct qib_pportdata *ppd = ppd_from_ibp(ibp);
2136 u32 cct_block_index = be32_to_cpu(ccp->attr_mod);
2137 u32 max_cct_block;
2138 u32 cct_entry;
2139 struct ib_cc_table_entry_shadow *entries;
2140 int i;
2141
2142 /* Is the table index more than what is supported? */
2143 if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1)
2144 goto bail;
2145
2146 memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
2147
2148 spin_lock(&ppd->cc_shadow_lock);
2149
2150 max_cct_block =
2151 (ppd->ccti_entries_shadow->ccti_last_entry + 1)/IB_CCT_ENTRIES;
2152 max_cct_block = max_cct_block ? max_cct_block - 1 : 0;
2153
2154 if (cct_block_index > max_cct_block) {
2155 spin_unlock(&ppd->cc_shadow_lock);
2156 goto bail;
2157 }
2158
2159 ccp->attr_mod = cpu_to_be32(cct_block_index);
2160
2161 cct_entry = IB_CCT_ENTRIES * (cct_block_index + 1);
2162
2163 cct_entry--;
2164
2165 p->ccti_limit = cpu_to_be16(cct_entry);
2166
2167 entries = &ppd->ccti_entries_shadow->
2168 entries[IB_CCT_ENTRIES * cct_block_index];
2169 cct_entry %= IB_CCT_ENTRIES;
2170
2171 for (i = 0; i <= cct_entry; i++)
2172 p->ccti_entries[i].entry = cpu_to_be16(entries[i].entry);
2173
2174 spin_unlock(&ppd->cc_shadow_lock);
2175
2176 return reply((struct ib_smp *) ccp);
2177
2178bail:
2179 return reply_failure((struct ib_smp *) ccp);
2180}
2181
2182static int cc_set_congestion_setting(struct ib_cc_mad *ccp,
2183 struct ib_device *ibdev, u8 port)
2184{
2185 struct ib_cc_congestion_setting_attr *p =
2186 (struct ib_cc_congestion_setting_attr *)ccp->mgmt_data;
2187 struct qib_ibport *ibp = to_iport(ibdev, port);
2188 struct qib_pportdata *ppd = ppd_from_ibp(ibp);
2189 int i;
2190
2191 ppd->cc_sl_control_map = be16_to_cpu(p->control_map);
2192
2193 for (i = 0; i < IB_CC_CCS_ENTRIES; i++) {
2194 ppd->congestion_entries[i].ccti_increase =
2195 p->entries[i].ccti_increase;
2196
2197 ppd->congestion_entries[i].ccti_timer =
2198 be16_to_cpu(p->entries[i].ccti_timer);
2199
2200 ppd->congestion_entries[i].trigger_threshold =
2201 p->entries[i].trigger_threshold;
2202
2203 ppd->congestion_entries[i].ccti_min =
2204 p->entries[i].ccti_min;
2205 }
2206
2207 return reply((struct ib_smp *) ccp);
2208}
2209
2210static int cc_set_congestion_control_table(struct ib_cc_mad *ccp,
2211 struct ib_device *ibdev, u8 port)
2212{
2213 struct ib_cc_table_attr *p =
2214 (struct ib_cc_table_attr *)ccp->mgmt_data;
2215 struct qib_ibport *ibp = to_iport(ibdev, port);
2216 struct qib_pportdata *ppd = ppd_from_ibp(ibp);
2217 u32 cct_block_index = be32_to_cpu(ccp->attr_mod);
2218 u32 cct_entry;
2219 struct ib_cc_table_entry_shadow *entries;
2220 int i;
2221
2222 /* Is the table index more than what is supported? */
2223 if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1)
2224 goto bail;
2225
2226 /* If this packet is the first in the sequence then
2227 * zero the total table entry count.
2228 */
2229 if (be16_to_cpu(p->ccti_limit) < IB_CCT_ENTRIES)
2230 ppd->total_cct_entry = 0;
2231
2232 cct_entry = (be16_to_cpu(p->ccti_limit))%IB_CCT_ENTRIES;
2233
2234 /* ccti_limit is 0 to 63 */
2235 ppd->total_cct_entry += (cct_entry + 1);
2236
2237 if (ppd->total_cct_entry > ppd->cc_supported_table_entries)
2238 goto bail;
2239
2240 ppd->ccti_limit = be16_to_cpu(p->ccti_limit);
2241
2242 entries = ppd->ccti_entries + (IB_CCT_ENTRIES * cct_block_index);
2243
2244 for (i = 0; i <= cct_entry; i++)
2245 entries[i].entry = be16_to_cpu(p->ccti_entries[i].entry);
2246
2247 spin_lock(&ppd->cc_shadow_lock);
2248
2249 ppd->ccti_entries_shadow->ccti_last_entry = ppd->total_cct_entry - 1;
2250 memcpy(ppd->ccti_entries_shadow->entries, ppd->ccti_entries,
2251 (ppd->total_cct_entry * sizeof(struct ib_cc_table_entry)));
2252
2253 ppd->congestion_entries_shadow->port_control = IB_CC_CCS_PC_SL_BASED;
2254 ppd->congestion_entries_shadow->control_map = ppd->cc_sl_control_map;
2255 memcpy(ppd->congestion_entries_shadow->entries, ppd->congestion_entries,
2256 IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry));
2257
2258 spin_unlock(&ppd->cc_shadow_lock);
2259
2260 return reply((struct ib_smp *) ccp);
2261
2262bail:
2263 return reply_failure((struct ib_smp *) ccp);
2264}
2265
2266static int check_cc_key(struct qib_ibport *ibp,
2267 struct ib_cc_mad *ccp, int mad_flags)
2268{
2269 return 0;
2270}
2271
2272static int process_cc(struct ib_device *ibdev, int mad_flags,
2273 u8 port, struct ib_mad *in_mad,
2274 struct ib_mad *out_mad)
2275{
2276 struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad;
2277 struct qib_ibport *ibp = to_iport(ibdev, port);
2278 int ret;
2279
2280 *out_mad = *in_mad;
2281
2282 if (ccp->class_version != 2) {
2283 ccp->status |= IB_SMP_UNSUP_VERSION;
2284 ret = reply((struct ib_smp *)ccp);
2285 goto bail;
2286 }
2287
2288 ret = check_cc_key(ibp, ccp, mad_flags);
2289 if (ret)
2290 goto bail;
2291
2292 switch (ccp->method) {
2293 case IB_MGMT_METHOD_GET:
2294 switch (ccp->attr_id) {
2295 case IB_CC_ATTR_CLASSPORTINFO:
2296 ret = cc_get_classportinfo(ccp, ibdev);
2297 goto bail;
2298
2299 case IB_CC_ATTR_CONGESTION_INFO:
2300 ret = cc_get_congestion_info(ccp, ibdev, port);
2301 goto bail;
2302
2303 case IB_CC_ATTR_CA_CONGESTION_SETTING:
2304 ret = cc_get_congestion_setting(ccp, ibdev, port);
2305 goto bail;
2306
2307 case IB_CC_ATTR_CONGESTION_CONTROL_TABLE:
2308 ret = cc_get_congestion_control_table(ccp, ibdev, port);
2309 goto bail;
2310
2311 /* FALLTHROUGH */
2312 default:
2313 ccp->status |= IB_SMP_UNSUP_METH_ATTR;
2314 ret = reply((struct ib_smp *) ccp);
2315 goto bail;
2316 }
2317
2318 case IB_MGMT_METHOD_SET:
2319 switch (ccp->attr_id) {
2320 case IB_CC_ATTR_CA_CONGESTION_SETTING:
2321 ret = cc_set_congestion_setting(ccp, ibdev, port);
2322 goto bail;
2323
2324 case IB_CC_ATTR_CONGESTION_CONTROL_TABLE:
2325 ret = cc_set_congestion_control_table(ccp, ibdev, port);
2326 goto bail;
2327
2328 /* FALLTHROUGH */
2329 default:
2330 ccp->status |= IB_SMP_UNSUP_METH_ATTR;
2331 ret = reply((struct ib_smp *) ccp);
2332 goto bail;
2333 }
2334
2335 case IB_MGMT_METHOD_GET_RESP:
2336 /*
2337 * The ib_mad module will call us to process responses
2338 * before checking for other consumers.
2339 * Just tell the caller to process it normally.
2340 */
2341 ret = IB_MAD_RESULT_SUCCESS;
2342 goto bail;
2343
2344 case IB_MGMT_METHOD_TRAP:
2345 default:
2346 ccp->status |= IB_SMP_UNSUP_METHOD;
2347 ret = reply((struct ib_smp *) ccp);
2348 }
2349
2350bail:
2351 return ret;
2352}
2353
2050/** 2354/**
2051 * qib_process_mad - process an incoming MAD packet 2355 * qib_process_mad - process an incoming MAD packet
2052 * @ibdev: the infiniband device this packet came in on 2356 * @ibdev: the infiniband device this packet came in on
@@ -2071,6 +2375,8 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
2071 struct ib_mad *in_mad, struct ib_mad *out_mad) 2375 struct ib_mad *in_mad, struct ib_mad *out_mad)
2072{ 2376{
2073 int ret; 2377 int ret;
2378 struct qib_ibport *ibp = to_iport(ibdev, port);
2379 struct qib_pportdata *ppd = ppd_from_ibp(ibp);
2074 2380
2075 switch (in_mad->mad_hdr.mgmt_class) { 2381 switch (in_mad->mad_hdr.mgmt_class) {
2076 case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: 2382 case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
@@ -2082,6 +2388,15 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
2082 ret = process_perf(ibdev, port, in_mad, out_mad); 2388 ret = process_perf(ibdev, port, in_mad, out_mad);
2083 goto bail; 2389 goto bail;
2084 2390
2391 case IB_MGMT_CLASS_CONG_MGMT:
2392 if (!ppd->congestion_entries_shadow ||
2393 !qib_cc_table_size) {
2394 ret = IB_MAD_RESULT_SUCCESS;
2395 goto bail;
2396 }
2397 ret = process_cc(ibdev, mad_flags, port, in_mad, out_mad);
2398 goto bail;
2399
2085 default: 2400 default:
2086 ret = IB_MAD_RESULT_SUCCESS; 2401 ret = IB_MAD_RESULT_SUCCESS;
2087 } 2402 }
diff --git a/drivers/infiniband/hw/qib/qib_mad.h b/drivers/infiniband/hw/qib/qib_mad.h
index ecc416cdbaaa..57bd3fa016bc 100644
--- a/drivers/infiniband/hw/qib/qib_mad.h
+++ b/drivers/infiniband/hw/qib/qib_mad.h
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. 2 * Copyright (c) 2012 Intel Corporation. All rights reserved.
3 * All rights reserved. 3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
5 * 5 *
6 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
@@ -31,6 +31,8 @@
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 */ 33 */
34#ifndef _QIB_MAD_H
35#define _QIB_MAD_H
34 36
35#include <rdma/ib_pma.h> 37#include <rdma/ib_pma.h>
36 38
@@ -223,6 +225,198 @@ struct ib_pma_portcounters_cong {
223#define IB_PMA_SEL_CONG_ROUTING 0x08 225#define IB_PMA_SEL_CONG_ROUTING 0x08
224 226
225/* 227/*
228 * Congestion control class attributes
229 */
230#define IB_CC_ATTR_CLASSPORTINFO cpu_to_be16(0x0001)
231#define IB_CC_ATTR_NOTICE cpu_to_be16(0x0002)
232#define IB_CC_ATTR_CONGESTION_INFO cpu_to_be16(0x0011)
233#define IB_CC_ATTR_CONGESTION_KEY_INFO cpu_to_be16(0x0012)
234#define IB_CC_ATTR_CONGESTION_LOG cpu_to_be16(0x0013)
235#define IB_CC_ATTR_SWITCH_CONGESTION_SETTING cpu_to_be16(0x0014)
236#define IB_CC_ATTR_SWITCH_PORT_CONGESTION_SETTING cpu_to_be16(0x0015)
237#define IB_CC_ATTR_CA_CONGESTION_SETTING cpu_to_be16(0x0016)
238#define IB_CC_ATTR_CONGESTION_CONTROL_TABLE cpu_to_be16(0x0017)
239#define IB_CC_ATTR_TIME_STAMP cpu_to_be16(0x0018)
240
241/* generalizations for threshold values */
242#define IB_CC_THRESHOLD_NONE 0x0
243#define IB_CC_THRESHOLD_MIN 0x1
244#define IB_CC_THRESHOLD_MAX 0xf
245
246/* CCA MAD header constants */
247#define IB_CC_MAD_LOGDATA_LEN 32
248#define IB_CC_MAD_MGMTDATA_LEN 192
249
250struct ib_cc_mad {
251 u8 base_version;
252 u8 mgmt_class;
253 u8 class_version;
254 u8 method;
255 __be16 status;
256 __be16 class_specific;
257 __be64 tid;
258 __be16 attr_id;
259 __be16 resv;
260 __be32 attr_mod;
261 __be64 cckey;
262
263 /* For CongestionLog attribute only */
264 u8 log_data[IB_CC_MAD_LOGDATA_LEN];
265
266 u8 mgmt_data[IB_CC_MAD_MGMTDATA_LEN];
267} __packed;
268
269/*
270 * Congestion Control class portinfo capability mask bits
271 */
272#define IB_CC_CPI_CM_TRAP_GEN cpu_to_be16(1 << 0)
273#define IB_CC_CPI_CM_GET_SET_NOTICE cpu_to_be16(1 << 1)
274#define IB_CC_CPI_CM_CAP2 cpu_to_be16(1 << 2)
275#define IB_CC_CPI_CM_ENHANCEDPORT0_CC cpu_to_be16(1 << 8)
276
277struct ib_cc_classportinfo_attr {
278 u8 base_version;
279 u8 class_version;
280 __be16 cap_mask;
281 u8 reserved[3];
282 u8 resp_time_value; /* only lower 5 bits */
283 union ib_gid redirect_gid;
284 __be32 redirect_tc_sl_fl; /* 8, 4, 20 bits respectively */
285 __be16 redirect_lid;
286 __be16 redirect_pkey;
287 __be32 redirect_qp; /* only lower 24 bits */
288 __be32 redirect_qkey;
289 union ib_gid trap_gid;
290 __be32 trap_tc_sl_fl; /* 8, 4, 20 bits respectively */
291 __be16 trap_lid;
292 __be16 trap_pkey;
293 __be32 trap_hl_qp; /* 8, 24 bits respectively */
294 __be32 trap_qkey;
295} __packed;
296
297/* Congestion control traps */
298#define IB_CC_TRAP_KEY_VIOLATION 0x0000
299
300struct ib_cc_trap_key_violation_attr {
301 __be16 source_lid;
302 u8 method;
303 u8 reserved1;
304 __be16 attrib_id;
305 __be32 attrib_mod;
306 __be32 qp;
307 __be64 cckey;
308 u8 sgid[16];
309 u8 padding[24];
310} __packed;
311
312/* Congestion info flags */
313#define IB_CC_CI_FLAGS_CREDIT_STARVATION 0x1
314#define IB_CC_TABLE_CAP_DEFAULT 31
315
316struct ib_cc_info_attr {
317 __be16 congestion_info;
318 u8 control_table_cap; /* Multiple of 64 entry unit CCTs */
319} __packed;
320
321struct ib_cc_key_info_attr {
322 __be64 cckey;
323 u8 protect;
324 __be16 lease_period;
325 __be16 violations;
326} __packed;
327
328#define IB_CC_CL_CA_LOGEVENTS_LEN 208
329
330struct ib_cc_log_attr {
331 u8 log_type;
332 u8 congestion_flags;
333 __be16 threshold_event_counter;
334 __be16 threshold_congestion_event_map;
335 __be16 current_time_stamp;
336 u8 log_events[IB_CC_CL_CA_LOGEVENTS_LEN];
337} __packed;
338
339#define IB_CC_CLEC_SERVICETYPE_RC 0x0
340#define IB_CC_CLEC_SERVICETYPE_UC 0x1
341#define IB_CC_CLEC_SERVICETYPE_RD 0x2
342#define IB_CC_CLEC_SERVICETYPE_UD 0x3
343
344struct ib_cc_log_event {
345 u8 local_qp_cn_entry;
346 u8 remote_qp_number_cn_entry[3];
347 u8 sl_cn_entry:4;
348 u8 service_type_cn_entry:4;
349 __be32 remote_lid_cn_entry;
350 __be32 timestamp_cn_entry;
351} __packed;
352
353/* Sixteen congestion entries */
354#define IB_CC_CCS_ENTRIES 16
355
356/* Port control flags */
357#define IB_CC_CCS_PC_SL_BASED 0x01
358
359struct ib_cc_congestion_entry {
360 u8 ccti_increase;
361 __be16 ccti_timer;
362 u8 trigger_threshold;
363 u8 ccti_min; /* min CCTI for cc table */
364} __packed;
365
366struct ib_cc_congestion_entry_shadow {
367 u8 ccti_increase;
368 u16 ccti_timer;
369 u8 trigger_threshold;
370 u8 ccti_min; /* min CCTI for cc table */
371} __packed;
372
373struct ib_cc_congestion_setting_attr {
374 __be16 port_control;
375 __be16 control_map;
376 struct ib_cc_congestion_entry entries[IB_CC_CCS_ENTRIES];
377} __packed;
378
379struct ib_cc_congestion_setting_attr_shadow {
380 u16 port_control;
381 u16 control_map;
382 struct ib_cc_congestion_entry_shadow entries[IB_CC_CCS_ENTRIES];
383} __packed;
384
385#define IB_CC_TABLE_ENTRY_INCREASE_DEFAULT 1
386#define IB_CC_TABLE_ENTRY_TIMER_DEFAULT 1
387
388/* 64 Congestion Control table entries in a single MAD */
389#define IB_CCT_ENTRIES 64
390#define IB_CCT_MIN_ENTRIES (IB_CCT_ENTRIES * 2)
391
392struct ib_cc_table_entry {
393 __be16 entry; /* shift:2, multiplier:14 */
394};
395
396struct ib_cc_table_entry_shadow {
397 u16 entry; /* shift:2, multiplier:14 */
398};
399
400struct ib_cc_table_attr {
401 __be16 ccti_limit; /* max CCTI for cc table */
402 struct ib_cc_table_entry ccti_entries[IB_CCT_ENTRIES];
403} __packed;
404
405struct ib_cc_table_attr_shadow {
406 u16 ccti_limit; /* max CCTI for cc table */
407 struct ib_cc_table_entry_shadow ccti_entries[IB_CCT_ENTRIES];
408} __packed;
409
410#define CC_TABLE_SHADOW_MAX \
411 (IB_CC_TABLE_CAP_DEFAULT * IB_CCT_ENTRIES)
412
413struct cc_table_shadow {
414 u16 ccti_last_entry;
415 struct ib_cc_table_entry_shadow entries[CC_TABLE_SHADOW_MAX];
416} __packed;
417
418#endif /* _QIB_MAD_H */
419/*
226 * The PortSamplesControl.CounterMasks field is an array of 3 bit fields 420 * The PortSamplesControl.CounterMasks field is an array of 3 bit fields
227 * which specify the N'th counter's capabilities. See ch. 16.1.3.2. 421 * which specify the N'th counter's capabilities. See ch. 16.1.3.2.
228 * We support 5 counters which only count the mandatory quantities. 422 * We support 5 counters which only count the mandatory quantities.
diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
index dd9cd49d0979..ae78305b59d0 100644
--- a/drivers/infiniband/hw/qib/qib_sysfs.c
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. 2 * Copyright (c) 2012 Intel Corporation. All rights reserved.
3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2006 PathScale, Inc. All rights reserved. 4 * Copyright (c) 2006 PathScale, Inc. All rights reserved.
4 * 5 *
5 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
@@ -33,6 +34,7 @@
33#include <linux/ctype.h> 34#include <linux/ctype.h>
34 35
35#include "qib.h" 36#include "qib.h"
37#include "qib_mad.h"
36 38
37/** 39/**
38 * qib_parse_ushort - parse an unsigned short value in an arbitrary base 40 * qib_parse_ushort - parse an unsigned short value in an arbitrary base
@@ -231,6 +233,98 @@ static struct attribute *port_default_attributes[] = {
231 NULL 233 NULL
232}; 234};
233 235
236/*
237 * Start of per-port congestion control structures and support code
238 */
239
240/*
241 * Congestion control table size followed by table entries
242 */
243static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj,
244 struct bin_attribute *bin_attr,
245 char *buf, loff_t pos, size_t count)
246{
247 int ret;
248 struct qib_pportdata *ppd =
249 container_of(kobj, struct qib_pportdata, pport_cc_kobj);
250
251 if (!qib_cc_table_size || !ppd->ccti_entries_shadow)
252 return -EINVAL;
253
254 ret = ppd->total_cct_entry * sizeof(struct ib_cc_table_entry_shadow)
255 + sizeof(__be16);
256
257 if (pos > ret)
258 return -EINVAL;
259
260 if (count > ret - pos)
261 count = ret - pos;
262
263 if (!count)
264 return count;
265
266 spin_lock(&ppd->cc_shadow_lock);
267 memcpy(buf, ppd->ccti_entries_shadow, count);
268 spin_unlock(&ppd->cc_shadow_lock);
269
270 return count;
271}
272
273static void qib_port_release(struct kobject *kobj)
274{
275 /* nothing to do since memory is freed by qib_free_devdata() */
276}
277
278static struct kobj_type qib_port_cc_ktype = {
279 .release = qib_port_release,
280};
281
282static struct bin_attribute cc_table_bin_attr = {
283 .attr = {.name = "cc_table_bin", .mode = 0444},
284 .read = read_cc_table_bin,
285 .size = PAGE_SIZE,
286};
287
288/*
289 * Congestion settings: port control, control map and an array of 16
290 * entries for the congestion entries - increase, timer, event log
291 * trigger threshold and the minimum injection rate delay.
292 */
293static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj,
294 struct bin_attribute *bin_attr,
295 char *buf, loff_t pos, size_t count)
296{
297 int ret;
298 struct qib_pportdata *ppd =
299 container_of(kobj, struct qib_pportdata, pport_cc_kobj);
300
301 if (!qib_cc_table_size || !ppd->congestion_entries_shadow)
302 return -EINVAL;
303
304 ret = sizeof(struct ib_cc_congestion_setting_attr_shadow);
305
306 if (pos > ret)
307 return -EINVAL;
308 if (count > ret - pos)
309 count = ret - pos;
310
311 if (!count)
312 return count;
313
314 spin_lock(&ppd->cc_shadow_lock);
315 memcpy(buf, ppd->congestion_entries_shadow, count);
316 spin_unlock(&ppd->cc_shadow_lock);
317
318 return count;
319}
320
321static struct bin_attribute cc_setting_bin_attr = {
322 .attr = {.name = "cc_settings_bin", .mode = 0444},
323 .read = read_cc_setting_bin,
324 .size = PAGE_SIZE,
325};
326
327
234static ssize_t qib_portattr_show(struct kobject *kobj, 328static ssize_t qib_portattr_show(struct kobject *kobj,
235 struct attribute *attr, char *buf) 329 struct attribute *attr, char *buf)
236{ 330{
@@ -253,10 +347,6 @@ static ssize_t qib_portattr_store(struct kobject *kobj,
253 return pattr->store(ppd, buf, len); 347 return pattr->store(ppd, buf, len);
254} 348}
255 349
256static void qib_port_release(struct kobject *kobj)
257{
258 /* nothing to do since memory is freed by qib_free_devdata() */
259}
260 350
261static const struct sysfs_ops qib_port_ops = { 351static const struct sysfs_ops qib_port_ops = {
262 .show = qib_portattr_show, 352 .show = qib_portattr_show,
@@ -670,7 +760,7 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
670 if (ret) { 760 if (ret) {
671 qib_dev_err(dd, "Skipping sl2vl sysfs info, " 761 qib_dev_err(dd, "Skipping sl2vl sysfs info, "
672 "(err %d) port %u\n", ret, port_num); 762 "(err %d) port %u\n", ret, port_num);
673 goto bail_sl; 763 goto bail_link;
674 } 764 }
675 kobject_uevent(&ppd->sl2vl_kobj, KOBJ_ADD); 765 kobject_uevent(&ppd->sl2vl_kobj, KOBJ_ADD);
676 766
@@ -679,15 +769,57 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
679 if (ret) { 769 if (ret) {
680 qib_dev_err(dd, "Skipping diag_counters sysfs info, " 770 qib_dev_err(dd, "Skipping diag_counters sysfs info, "
681 "(err %d) port %u\n", ret, port_num); 771 "(err %d) port %u\n", ret, port_num);
682 goto bail_diagc; 772 goto bail_sl;
683 } 773 }
684 kobject_uevent(&ppd->diagc_kobj, KOBJ_ADD); 774 kobject_uevent(&ppd->diagc_kobj, KOBJ_ADD);
685 775
776 if (!qib_cc_table_size || !ppd->congestion_entries_shadow)
777 return 0;
778
779 ret = kobject_init_and_add(&ppd->pport_cc_kobj, &qib_port_cc_ktype,
780 kobj, "CCMgtA");
781 if (ret) {
782 qib_dev_err(dd,
783 "Skipping Congestion Control sysfs info, (err %d) port %u\n",
784 ret, port_num);
785 goto bail_diagc;
786 }
787
788 kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD);
789
790 ret = sysfs_create_bin_file(&ppd->pport_cc_kobj,
791 &cc_setting_bin_attr);
792 if (ret) {
793 qib_dev_err(dd,
794 "Skipping Congestion Control setting sysfs info, (err %d) port %u\n",
795 ret, port_num);
796 goto bail_cc;
797 }
798
799 ret = sysfs_create_bin_file(&ppd->pport_cc_kobj,
800 &cc_table_bin_attr);
801 if (ret) {
802 qib_dev_err(dd,
803 "Skipping Congestion Control table sysfs info, (err %d) port %u\n",
804 ret, port_num);
805 goto bail_cc_entry_bin;
806 }
807
808 qib_devinfo(dd->pcidev,
809 "IB%u: Congestion Control Agent enabled for port %d\n",
810 dd->unit, port_num);
811
686 return 0; 812 return 0;
687 813
814bail_cc_entry_bin:
815 sysfs_remove_bin_file(&ppd->pport_cc_kobj, &cc_setting_bin_attr);
816bail_cc:
817 kobject_put(&ppd->pport_cc_kobj);
688bail_diagc: 818bail_diagc:
689 kobject_put(&ppd->sl2vl_kobj); 819 kobject_put(&ppd->diagc_kobj);
690bail_sl: 820bail_sl:
821 kobject_put(&ppd->sl2vl_kobj);
822bail_link:
691 kobject_put(&ppd->pport_kobj); 823 kobject_put(&ppd->pport_kobj);
692bail: 824bail:
693 return ret; 825 return ret;
@@ -720,7 +852,15 @@ void qib_verbs_unregister_sysfs(struct qib_devdata *dd)
720 852
721 for (i = 0; i < dd->num_pports; i++) { 853 for (i = 0; i < dd->num_pports; i++) {
722 ppd = &dd->pport[i]; 854 ppd = &dd->pport[i];
723 kobject_put(&ppd->pport_kobj); 855 if (qib_cc_table_size &&
856 ppd->congestion_entries_shadow) {
857 sysfs_remove_bin_file(&ppd->pport_cc_kobj,
858 &cc_setting_bin_attr);
859 sysfs_remove_bin_file(&ppd->pport_cc_kobj,
860 &cc_table_bin_attr);
861 kobject_put(&ppd->pport_cc_kobj);
862 }
724 kobject_put(&ppd->sl2vl_kobj); 863 kobject_put(&ppd->sl2vl_kobj);
864 kobject_put(&ppd->pport_kobj);
725 } 865 }
726} 866}