aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRobin Holt <holt@sgi.com>2012-08-21 19:16:02 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-08-21 19:45:03 -0400
commit7838f994b4fceff24c343f4e26a6cf4393869579 (patch)
tree42f62833ebc44bf8160aaa7ee95f83e2d883983c
parentc3a5ce0416b6c172a23bc8a3760d8704d3d1535b (diff)
drivers/misc/sgi-xp/xpc_uv.c: SGI XPC fails to load when cpu 0 is out of IRQ resources
On many of our larger systems, CPU 0 has had all of its IRQ resources consumed before XPC loads. Worst cases on machines with multiple 10 GigE cards and multiple IB cards have depleted the entire first socket of IRQs. This patch makes selecting the node upon which IRQs are allocated (as well as all the other GRU Message Queue structures) specifiable as a module load param and has a default behavior of searching all nodes/cpus for an available resources. [akpm@linux-foundation.org: fix build: include cpu.h and module.h] Signed-off-by: Robin Holt <holt@sgi.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/misc/sgi-xp/xpc_uv.c84
1 files changed, 65 insertions, 19 deletions
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index 87b251ab6ec5..b9e2000969f0 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -18,6 +18,8 @@
18#include <linux/interrupt.h> 18#include <linux/interrupt.h>
19#include <linux/delay.h> 19#include <linux/delay.h>
20#include <linux/device.h> 20#include <linux/device.h>
21#include <linux/cpu.h>
22#include <linux/module.h>
21#include <linux/err.h> 23#include <linux/err.h>
22#include <linux/slab.h> 24#include <linux/slab.h>
23#include <asm/uv/uv_hub.h> 25#include <asm/uv/uv_hub.h>
@@ -59,6 +61,8 @@ static struct xpc_heartbeat_uv *xpc_heartbeat_uv;
59 XPC_NOTIFY_MSG_SIZE_UV) 61 XPC_NOTIFY_MSG_SIZE_UV)
60#define XPC_NOTIFY_IRQ_NAME "xpc_notify" 62#define XPC_NOTIFY_IRQ_NAME "xpc_notify"
61 63
64static int xpc_mq_node = -1;
65
62static struct xpc_gru_mq_uv *xpc_activate_mq_uv; 66static struct xpc_gru_mq_uv *xpc_activate_mq_uv;
63static struct xpc_gru_mq_uv *xpc_notify_mq_uv; 67static struct xpc_gru_mq_uv *xpc_notify_mq_uv;
64 68
@@ -109,11 +113,8 @@ xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name)
109#if defined CONFIG_X86_64 113#if defined CONFIG_X86_64
110 mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset, 114 mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset,
111 UV_AFFINITY_CPU); 115 UV_AFFINITY_CPU);
112 if (mq->irq < 0) { 116 if (mq->irq < 0)
113 dev_err(xpc_part, "uv_setup_irq() returned error=%d\n",
114 -mq->irq);
115 return mq->irq; 117 return mq->irq;
116 }
117 118
118 mq->mmr_value = uv_read_global_mmr64(mmr_pnode, mq->mmr_offset); 119 mq->mmr_value = uv_read_global_mmr64(mmr_pnode, mq->mmr_offset);
119 120
@@ -238,8 +239,9 @@ xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name,
238 mq->mmr_blade = uv_cpu_to_blade_id(cpu); 239 mq->mmr_blade = uv_cpu_to_blade_id(cpu);
239 240
240 nid = cpu_to_node(cpu); 241 nid = cpu_to_node(cpu);
241 page = alloc_pages_exact_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, 242 page = alloc_pages_exact_node(nid,
242 pg_order); 243 GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
244 pg_order);
243 if (page == NULL) { 245 if (page == NULL) {
244 dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d " 246 dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d "
245 "bytes of memory on nid=%d for GRU mq\n", mq_size, nid); 247 "bytes of memory on nid=%d for GRU mq\n", mq_size, nid);
@@ -1731,9 +1733,50 @@ static struct xpc_arch_operations xpc_arch_ops_uv = {
1731 .notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv, 1733 .notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv,
1732}; 1734};
1733 1735
1736static int
1737xpc_init_mq_node(int nid)
1738{
1739 int cpu;
1740
1741 get_online_cpus();
1742
1743 for_each_cpu(cpu, cpumask_of_node(nid)) {
1744 xpc_activate_mq_uv =
1745 xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, nid,
1746 XPC_ACTIVATE_IRQ_NAME,
1747 xpc_handle_activate_IRQ_uv);
1748 if (!IS_ERR(xpc_activate_mq_uv))
1749 break;
1750 }
1751 if (IS_ERR(xpc_activate_mq_uv)) {
1752 put_online_cpus();
1753 return PTR_ERR(xpc_activate_mq_uv);
1754 }
1755
1756 for_each_cpu(cpu, cpumask_of_node(nid)) {
1757 xpc_notify_mq_uv =
1758 xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, nid,
1759 XPC_NOTIFY_IRQ_NAME,
1760 xpc_handle_notify_IRQ_uv);
1761 if (!IS_ERR(xpc_notify_mq_uv))
1762 break;
1763 }
1764 if (IS_ERR(xpc_notify_mq_uv)) {
1765 xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
1766 put_online_cpus();
1767 return PTR_ERR(xpc_notify_mq_uv);
1768 }
1769
1770 put_online_cpus();
1771 return 0;
1772}
1773
1734int 1774int
1735xpc_init_uv(void) 1775xpc_init_uv(void)
1736{ 1776{
1777 int nid;
1778 int ret = 0;
1779
1737 xpc_arch_ops = xpc_arch_ops_uv; 1780 xpc_arch_ops = xpc_arch_ops_uv;
1738 1781
1739 if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) { 1782 if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) {
@@ -1742,21 +1785,21 @@ xpc_init_uv(void)
1742 return -E2BIG; 1785 return -E2BIG;
1743 } 1786 }
1744 1787
1745 xpc_activate_mq_uv = xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, 0, 1788 if (xpc_mq_node < 0)
1746 XPC_ACTIVATE_IRQ_NAME, 1789 for_each_online_node(nid) {
1747 xpc_handle_activate_IRQ_uv); 1790 ret = xpc_init_mq_node(nid);
1748 if (IS_ERR(xpc_activate_mq_uv))
1749 return PTR_ERR(xpc_activate_mq_uv);
1750 1791
1751 xpc_notify_mq_uv = xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, 0, 1792 if (!ret)
1752 XPC_NOTIFY_IRQ_NAME, 1793 break;
1753 xpc_handle_notify_IRQ_uv); 1794 }
1754 if (IS_ERR(xpc_notify_mq_uv)) { 1795 else
1755 xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); 1796 ret = xpc_init_mq_node(xpc_mq_node);
1756 return PTR_ERR(xpc_notify_mq_uv);
1757 }
1758 1797
1759 return 0; 1798 if (ret < 0)
1799 dev_err(xpc_part, "xpc_init_mq_node() returned error=%d\n",
1800 -ret);
1801
1802 return ret;
1760} 1803}
1761 1804
1762void 1805void
@@ -1765,3 +1808,6 @@ xpc_exit_uv(void)
1765 xpc_destroy_gru_mq_uv(xpc_notify_mq_uv); 1808 xpc_destroy_gru_mq_uv(xpc_notify_mq_uv);
1766 xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); 1809 xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
1767} 1810}
1811
1812module_param(xpc_mq_node, int, 0);
1813MODULE_PARM_DESC(xpc_mq_node, "Node number on which to allocate message queues.");