aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorMichael Ellerman <michael@ellerman.id.au>2009-02-16 19:21:56 -0500
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2009-02-22 23:53:03 -0500
commit448e2ca0e32a5c437650d634b6032ab732662338 (patch)
treea392c48604a7dfb09a6d68b9152d2a2ea205ff97 /arch
parentd523cc379da57f1c39f5db9c47bdaa94f74727ff (diff)
powerpc/pseries: Implement a quota system for MSIs
There are hardware limitations on the number of available MSIs, which firmware expresses using a property named "ibm,pe-total-#msi". This property tells us how many MSIs are available for devices below the point in the PCI tree where we find the property. For old firmwares which don't have the property, we assume there are 8 MSIs available per "partitionable endpoint" (PE). The PE can be found using existing EEH code, which uses the methods described in PAPR. For our purposes we want the parent of the node that's identified using this method. When a driver requests n MSIs for a device, we first establish where the "ibm,pe-total-#msi" property above that device is, or we find the PE if the property is not found. In both cases we call this node the "pe_dn". We then count all non-bridge devices below the pe_dn, to establish how many devices in total may need MSIs. The quota is then simply the total available divided by the number of devices, if the request is less than or equal to the quota, the request is fine and we're done. If the request is greater than the quota, we try to determine if there are any "spare" MSIs which we can give to this device. Spare MSIs are found by looking for other devices which can never use their full quota, because their "req#msi(-x)" property is less than the quota. If we find any spare, we divide the spares by the number of devices that could request more than their quota. This ensures the spare MSIs are spread evenly amongst all over-quota requestors. Signed-off-by: Michael Ellerman <michael@ellerman.id.au> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/platforms/pseries/msi.c178
1 files changed, 176 insertions, 2 deletions
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 081af6d7fa0..3e0d6ef3eca 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -174,12 +174,186 @@ static int check_req_msix(struct pci_dev *pdev, int nvec)
174 return check_req(pdev, nvec, "ibm,req#msi-x"); 174 return check_req(pdev, nvec, "ibm,req#msi-x");
175} 175}
176 176
177/* Quota calculation */
178
179static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
180{
181 struct device_node *dn;
182 const u32 *p;
183
184 dn = of_node_get(pci_device_to_OF_node(dev));
185 while (dn) {
186 p = of_get_property(dn, "ibm,pe-total-#msi", NULL);
187 if (p) {
188 pr_debug("rtas_msi: found prop on dn %s\n",
189 dn->full_name);
190 *total = *p;
191 return dn;
192 }
193
194 dn = of_get_next_parent(dn);
195 }
196
197 return NULL;
198}
199
200static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
201{
202 struct device_node *dn;
203
204 /* Found our PE and assume 8 at that point. */
205
206 dn = pci_device_to_OF_node(dev);
207 if (!dn)
208 return NULL;
209
210 dn = find_device_pe(dn);
211 if (!dn)
212 return NULL;
213
214 /* We actually want the parent */
215 dn = of_get_parent(dn);
216 if (!dn)
217 return NULL;
218
219 /* Hardcode of 8 for old firmwares */
220 *total = 8;
221 pr_debug("rtas_msi: using PE dn %s\n", dn->full_name);
222
223 return dn;
224}
225
226struct msi_counts {
227 struct device_node *requestor;
228 int num_devices;
229 int request;
230 int quota;
231 int spare;
232 int over_quota;
233};
234
235static void *count_non_bridge_devices(struct device_node *dn, void *data)
236{
237 struct msi_counts *counts = data;
238 const u32 *p;
239 u32 class;
240
241 pr_debug("rtas_msi: counting %s\n", dn->full_name);
242
243 p = of_get_property(dn, "class-code", NULL);
244 class = p ? *p : 0;
245
246 if ((class >> 8) != PCI_CLASS_BRIDGE_PCI)
247 counts->num_devices++;
248
249 return NULL;
250}
251
252static void *count_spare_msis(struct device_node *dn, void *data)
253{
254 struct msi_counts *counts = data;
255 const u32 *p;
256 int req;
257
258 if (dn == counts->requestor)
259 req = counts->request;
260 else {
261 /* We don't know if a driver will try to use MSI or MSI-X,
262 * so we just have to punt and use the larger of the two. */
263 req = 0;
264 p = of_get_property(dn, "ibm,req#msi", NULL);
265 if (p)
266 req = *p;
267
268 p = of_get_property(dn, "ibm,req#msi-x", NULL);
269 if (p)
270 req = max(req, (int)*p);
271 }
272
273 if (req < counts->quota)
274 counts->spare += counts->quota - req;
275 else if (req > counts->quota)
276 counts->over_quota++;
277
278 return NULL;
279}
280
281static int msi_quota_for_device(struct pci_dev *dev, int request)
282{
283 struct device_node *pe_dn;
284 struct msi_counts counts;
285 int total;
286
287 pr_debug("rtas_msi: calc quota for %s, request %d\n", pci_name(dev),
288 request);
289
290 pe_dn = find_pe_total_msi(dev, &total);
291 if (!pe_dn)
292 pe_dn = find_pe_dn(dev, &total);
293
294 if (!pe_dn) {
295 pr_err("rtas_msi: couldn't find PE for %s\n", pci_name(dev));
296 goto out;
297 }
298
299 pr_debug("rtas_msi: found PE %s\n", pe_dn->full_name);
300
301 memset(&counts, 0, sizeof(struct msi_counts));
302
303 /* Work out how many devices we have below this PE */
304 traverse_pci_devices(pe_dn, count_non_bridge_devices, &counts);
305
306 if (counts.num_devices == 0) {
307 pr_err("rtas_msi: found 0 devices under PE for %s\n",
308 pci_name(dev));
309 goto out;
310 }
311
312 counts.quota = total / counts.num_devices;
313 if (request <= counts.quota)
314 goto out;
315
316 /* else, we have some more calculating to do */
317 counts.requestor = pci_device_to_OF_node(dev);
318 counts.request = request;
319 traverse_pci_devices(pe_dn, count_spare_msis, &counts);
320
321 /* If the quota isn't an integer multiple of the total, we can
322 * use the remainder as spare MSIs for anyone that wants them. */
323 counts.spare += total % counts.num_devices;
324
325 /* Divide any spare by the number of over-quota requestors */
326 if (counts.over_quota)
327 counts.quota += counts.spare / counts.over_quota;
328
329 /* And finally clamp the request to the possibly adjusted quota */
330 request = min(counts.quota, request);
331
332 pr_debug("rtas_msi: request clamped to quota %d\n", request);
333out:
334 of_node_put(pe_dn);
335
336 return request;
337}
338
177static int rtas_msi_check_device(struct pci_dev *pdev, int nvec, int type) 339static int rtas_msi_check_device(struct pci_dev *pdev, int nvec, int type)
178{ 340{
341 int quota, rc;
342
179 if (type == PCI_CAP_ID_MSIX) 343 if (type == PCI_CAP_ID_MSIX)
180 return check_req_msix(pdev, nvec); 344 rc = check_req_msix(pdev, nvec);
345 else
346 rc = check_req_msi(pdev, nvec);
347
348 if (rc)
349 return rc;
181 350
182 return check_req_msi(pdev, nvec); 351 quota = msi_quota_for_device(pdev, nvec);
352
353 if (quota && quota < nvec)
354 return quota;
355
356 return 0;
183} 357}
184 358
185static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) 359static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)