aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/acpi/thermal.c104
-rw-r--r--drivers/infiniband/Kconfig7
-rw-r--r--drivers/infiniband/Makefile1
-rw-r--r--drivers/infiniband/core/Makefile4
-rw-r--r--drivers/infiniband/core/device.c2
-rw-r--r--drivers/infiniband/core/umem.c (renamed from drivers/infiniband/core/uverbs_mem.c)153
-rw-r--r--drivers/infiniband/core/uverbs.h6
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c60
-rw-r--r--drivers/infiniband/core/uverbs_main.c11
-rw-r--r--drivers/infiniband/hw/amso1100/c2_provider.c42
-rw-r--r--drivers/infiniband/hw/amso1100/c2_provider.h1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c28
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_iverbs.h3
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.c69
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mr.c38
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h5
-rw-r--r--drivers/infiniband/hw/mlx4/Kconfig9
-rw-r--r--drivers/infiniband/hw/mlx4/Makefile3
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c100
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c525
-rw-r--r--drivers/infiniband/hw/mlx4/doorbell.c216
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c339
-rw-r--r--drivers/infiniband/hw/mlx4/main.c651
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h285
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c184
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c1294
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c334
-rw-r--r--drivers/infiniband/hw/mlx4/user.h92
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c38
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h1
-rw-r--r--drivers/net/Kconfig14
-rw-r--r--drivers/net/Makefile1
-rw-r--r--drivers/net/mlx4/Makefile4
-rw-r--r--drivers/net/mlx4/alloc.c179
-rw-r--r--drivers/net/mlx4/catas.c70
-rw-r--r--drivers/net/mlx4/cmd.c429
-rw-r--r--drivers/net/mlx4/cq.c254
-rw-r--r--drivers/net/mlx4/eq.c696
-rw-r--r--drivers/net/mlx4/fw.c775
-rw-r--r--drivers/net/mlx4/fw.h167
-rw-r--r--drivers/net/mlx4/icm.c379
-rw-r--r--drivers/net/mlx4/icm.h135
-rw-r--r--drivers/net/mlx4/intf.c165
-rw-r--r--drivers/net/mlx4/main.c936
-rw-r--r--drivers/net/mlx4/mcg.c380
-rw-r--r--drivers/net/mlx4/mlx4.h348
-rw-r--r--drivers/net/mlx4/mr.c479
-rw-r--r--drivers/net/mlx4/pd.c102
-rw-r--r--drivers/net/mlx4/profile.c238
-rw-r--r--drivers/net/mlx4/qp.c280
-rw-r--r--drivers/net/mlx4/reset.c181
-rw-r--r--drivers/net/mlx4/srq.c227
54 files changed, 10750 insertions, 296 deletions
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 589b98b7b216..1ada017d01ef 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -59,8 +59,6 @@
59#define ACPI_THERMAL_NOTIFY_CRITICAL 0xF0 59#define ACPI_THERMAL_NOTIFY_CRITICAL 0xF0
60#define ACPI_THERMAL_NOTIFY_HOT 0xF1 60#define ACPI_THERMAL_NOTIFY_HOT 0xF1
61#define ACPI_THERMAL_MODE_ACTIVE 0x00 61#define ACPI_THERMAL_MODE_ACTIVE 0x00
62#define ACPI_THERMAL_MODE_PASSIVE 0x01
63#define ACPI_THERMAL_MODE_CRITICAL 0xff
64#define ACPI_THERMAL_PATH_POWEROFF "/sbin/poweroff" 62#define ACPI_THERMAL_PATH_POWEROFF "/sbin/poweroff"
65 63
66#define ACPI_THERMAL_MAX_ACTIVE 10 64#define ACPI_THERMAL_MAX_ACTIVE 10
@@ -86,9 +84,6 @@ static int acpi_thermal_resume(struct acpi_device *device);
86static int acpi_thermal_state_open_fs(struct inode *inode, struct file *file); 84static int acpi_thermal_state_open_fs(struct inode *inode, struct file *file);
87static int acpi_thermal_temp_open_fs(struct inode *inode, struct file *file); 85static int acpi_thermal_temp_open_fs(struct inode *inode, struct file *file);
88static int acpi_thermal_trip_open_fs(struct inode *inode, struct file *file); 86static int acpi_thermal_trip_open_fs(struct inode *inode, struct file *file);
89static ssize_t acpi_thermal_write_trip_points(struct file *,
90 const char __user *, size_t,
91 loff_t *);
92static int acpi_thermal_cooling_open_fs(struct inode *inode, struct file *file); 87static int acpi_thermal_cooling_open_fs(struct inode *inode, struct file *file);
93static ssize_t acpi_thermal_write_cooling_mode(struct file *, 88static ssize_t acpi_thermal_write_cooling_mode(struct file *,
94 const char __user *, size_t, 89 const char __user *, size_t,
@@ -167,7 +162,6 @@ struct acpi_thermal {
167 unsigned long temperature; 162 unsigned long temperature;
168 unsigned long last_temperature; 163 unsigned long last_temperature;
169 unsigned long polling_frequency; 164 unsigned long polling_frequency;
170 u8 cooling_mode;
171 volatile u8 zombie; 165 volatile u8 zombie;
172 struct acpi_thermal_flags flags; 166 struct acpi_thermal_flags flags;
173 struct acpi_thermal_state state; 167 struct acpi_thermal_state state;
@@ -193,7 +187,6 @@ static const struct file_operations acpi_thermal_temp_fops = {
193static const struct file_operations acpi_thermal_trip_fops = { 187static const struct file_operations acpi_thermal_trip_fops = {
194 .open = acpi_thermal_trip_open_fs, 188 .open = acpi_thermal_trip_open_fs,
195 .read = seq_read, 189 .read = seq_read,
196 .write = acpi_thermal_write_trip_points,
197 .llseek = seq_lseek, 190 .llseek = seq_lseek,
198 .release = single_release, 191 .release = single_release,
199}; 192};
@@ -297,11 +290,6 @@ static int acpi_thermal_set_cooling_mode(struct acpi_thermal *tz, int mode)
297 if (ACPI_FAILURE(status)) 290 if (ACPI_FAILURE(status))
298 return -ENODEV; 291 return -ENODEV;
299 292
300 tz->cooling_mode = mode;
301
302 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Cooling mode [%s]\n",
303 mode ? "passive" : "active"));
304
305 return 0; 293 return 0;
306} 294}
307 295
@@ -889,67 +877,6 @@ static int acpi_thermal_trip_open_fs(struct inode *inode, struct file *file)
889 return single_open(file, acpi_thermal_trip_seq_show, PDE(inode)->data); 877 return single_open(file, acpi_thermal_trip_seq_show, PDE(inode)->data);
890} 878}
891 879
892static ssize_t
893acpi_thermal_write_trip_points(struct file *file,
894 const char __user * buffer,
895 size_t count, loff_t * ppos)
896{
897 struct seq_file *m = file->private_data;
898 struct acpi_thermal *tz = m->private;
899
900 char *limit_string;
901 int num, critical, hot, passive;
902 int *active;
903 int i = 0;
904
905
906 limit_string = kzalloc(ACPI_THERMAL_MAX_LIMIT_STR_LEN, GFP_KERNEL);
907 if (!limit_string)
908 return -ENOMEM;
909
910 active = kmalloc(ACPI_THERMAL_MAX_ACTIVE * sizeof(int), GFP_KERNEL);
911 if (!active) {
912 kfree(limit_string);
913 return -ENOMEM;
914 }
915
916 if (!tz || (count > ACPI_THERMAL_MAX_LIMIT_STR_LEN - 1)) {
917 count = -EINVAL;
918 goto end;
919 }
920
921 if (copy_from_user(limit_string, buffer, count)) {
922 count = -EFAULT;
923 goto end;
924 }
925
926 limit_string[count] = '\0';
927
928 num = sscanf(limit_string, "%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d",
929 &critical, &hot, &passive,
930 &active[0], &active[1], &active[2], &active[3], &active[4],
931 &active[5], &active[6], &active[7], &active[8],
932 &active[9]);
933 if (!(num >= 5 && num < (ACPI_THERMAL_MAX_ACTIVE + 3))) {
934 count = -EINVAL;
935 goto end;
936 }
937
938 tz->trips.critical.temperature = CELSIUS_TO_KELVIN(critical);
939 tz->trips.hot.temperature = CELSIUS_TO_KELVIN(hot);
940 tz->trips.passive.temperature = CELSIUS_TO_KELVIN(passive);
941 for (i = 0; i < num - 3; i++) {
942 if (!(tz->trips.active[i].flags.valid))
943 break;
944 tz->trips.active[i].temperature = CELSIUS_TO_KELVIN(active[i]);
945 }
946
947 end:
948 kfree(active);
949 kfree(limit_string);
950 return count;
951}
952
953static int acpi_thermal_cooling_seq_show(struct seq_file *seq, void *offset) 880static int acpi_thermal_cooling_seq_show(struct seq_file *seq, void *offset)
954{ 881{
955 struct acpi_thermal *tz = seq->private; 882 struct acpi_thermal *tz = seq->private;
@@ -958,15 +885,10 @@ static int acpi_thermal_cooling_seq_show(struct seq_file *seq, void *offset)
958 if (!tz) 885 if (!tz)
959 goto end; 886 goto end;
960 887
961 if (!tz->flags.cooling_mode) { 888 if (!tz->flags.cooling_mode)
962 seq_puts(seq, "<setting not supported>\n"); 889 seq_puts(seq, "<setting not supported>\n");
963 }
964
965 if (tz->cooling_mode == ACPI_THERMAL_MODE_CRITICAL)
966 seq_printf(seq, "cooling mode: critical\n");
967 else 890 else
968 seq_printf(seq, "cooling mode: %s\n", 891 seq_puts(seq, "0 - Active; 1 - Passive\n");
969 tz->cooling_mode ? "passive" : "active");
970 892
971 end: 893 end:
972 return 0; 894 return 0;
@@ -1223,28 +1145,6 @@ static int acpi_thermal_get_info(struct acpi_thermal *tz)
1223 result = acpi_thermal_set_cooling_mode(tz, ACPI_THERMAL_MODE_ACTIVE); 1145 result = acpi_thermal_set_cooling_mode(tz, ACPI_THERMAL_MODE_ACTIVE);
1224 if (!result) 1146 if (!result)
1225 tz->flags.cooling_mode = 1; 1147 tz->flags.cooling_mode = 1;
1226 else {
1227 /* Oh,we have not _SCP method.
1228 Generally show cooling_mode by _ACx, _PSV,spec 12.2 */
1229 tz->flags.cooling_mode = 0;
1230 if (tz->trips.active[0].flags.valid
1231 && tz->trips.passive.flags.valid) {
1232 if (tz->trips.passive.temperature >
1233 tz->trips.active[0].temperature)
1234 tz->cooling_mode = ACPI_THERMAL_MODE_ACTIVE;
1235 else
1236 tz->cooling_mode = ACPI_THERMAL_MODE_PASSIVE;
1237 } else if (!tz->trips.active[0].flags.valid
1238 && tz->trips.passive.flags.valid) {
1239 tz->cooling_mode = ACPI_THERMAL_MODE_PASSIVE;
1240 } else if (tz->trips.active[0].flags.valid
1241 && !tz->trips.passive.flags.valid) {
1242 tz->cooling_mode = ACPI_THERMAL_MODE_ACTIVE;
1243 } else {
1244 /* _ACx and _PSV are optional, but _CRT is required */
1245 tz->cooling_mode = ACPI_THERMAL_MODE_CRITICAL;
1246 }
1247 }
1248 1148
1249 /* Get default polling frequency [_TZP] (optional) */ 1149 /* Get default polling frequency [_TZP] (optional) */
1250 if (tzp) 1150 if (tzp)
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 66b36de9fa6f..37deaae49190 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -29,6 +29,11 @@ config INFINIBAND_USER_ACCESS
29 libibverbs, libibcm and a hardware driver library from 29 libibverbs, libibcm and a hardware driver library from
30 <http://www.openib.org>. 30 <http://www.openib.org>.
31 31
32config INFINIBAND_USER_MEM
33 bool
34 depends on INFINIBAND_USER_ACCESS != n
35 default y
36
32config INFINIBAND_ADDR_TRANS 37config INFINIBAND_ADDR_TRANS
33 bool 38 bool
34 depends on INFINIBAND && INET 39 depends on INFINIBAND && INET
@@ -40,6 +45,8 @@ source "drivers/infiniband/hw/ehca/Kconfig"
40source "drivers/infiniband/hw/amso1100/Kconfig" 45source "drivers/infiniband/hw/amso1100/Kconfig"
41source "drivers/infiniband/hw/cxgb3/Kconfig" 46source "drivers/infiniband/hw/cxgb3/Kconfig"
42 47
48source "drivers/infiniband/hw/mlx4/Kconfig"
49
43source "drivers/infiniband/ulp/ipoib/Kconfig" 50source "drivers/infiniband/ulp/ipoib/Kconfig"
44 51
45source "drivers/infiniband/ulp/srp/Kconfig" 52source "drivers/infiniband/ulp/srp/Kconfig"
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index da2066c4f22c..75f325e40b54 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_INFINIBAND_IPATH) += hw/ipath/
4obj-$(CONFIG_INFINIBAND_EHCA) += hw/ehca/ 4obj-$(CONFIG_INFINIBAND_EHCA) += hw/ehca/
5obj-$(CONFIG_INFINIBAND_AMSO1100) += hw/amso1100/ 5obj-$(CONFIG_INFINIBAND_AMSO1100) += hw/amso1100/
6obj-$(CONFIG_INFINIBAND_CXGB3) += hw/cxgb3/ 6obj-$(CONFIG_INFINIBAND_CXGB3) += hw/cxgb3/
7obj-$(CONFIG_MLX4_INFINIBAND) += hw/mlx4/
7obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ 8obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/
8obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/ 9obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/
9obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/ 10obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 189e5d4b9b17..cb1ab3ea4998 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
9 9
10ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ 10ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
11 device.o fmr_pool.o cache.o 11 device.o fmr_pool.o cache.o
12ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
12 13
13ib_mad-y := mad.o smi.o agent.o mad_rmpp.o 14ib_mad-y := mad.o smi.o agent.o mad_rmpp.o
14 15
@@ -28,5 +29,4 @@ ib_umad-y := user_mad.o
28 29
29ib_ucm-y := ucm.o 30ib_ucm-y := ucm.o
30 31
31ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ 32ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o
32 uverbs_marshall.o
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 7fabb425b033..592c90aa3183 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -613,6 +613,8 @@ static void __exit ib_core_cleanup(void)
613{ 613{
614 ib_cache_cleanup(); 614 ib_cache_cleanup();
615 ib_sysfs_cleanup(); 615 ib_sysfs_cleanup();
616 /* Make sure that any pending umem accounting work is done. */
617 flush_scheduled_work();
616} 618}
617 619
618module_init(ib_core_init); 620module_init(ib_core_init);
diff --git a/drivers/infiniband/core/uverbs_mem.c b/drivers/infiniband/core/umem.c
index c95fe952abd5..f32ca5fbb26b 100644
--- a/drivers/infiniband/core/uverbs_mem.c
+++ b/drivers/infiniband/core/umem.c
@@ -39,13 +39,6 @@
39 39
40#include "uverbs.h" 40#include "uverbs.h"
41 41
42struct ib_umem_account_work {
43 struct work_struct work;
44 struct mm_struct *mm;
45 unsigned long diff;
46};
47
48
49static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty) 42static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
50{ 43{
51 struct ib_umem_chunk *chunk, *tmp; 44 struct ib_umem_chunk *chunk, *tmp;
@@ -64,35 +57,56 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
64 } 57 }
65} 58}
66 59
67int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, 60/**
68 void *addr, size_t size, int write) 61 * ib_umem_get - Pin and DMA map userspace memory.
62 * @context: userspace context to pin memory for
63 * @addr: userspace virtual address to start at
64 * @size: length of region to pin
65 * @access: IB_ACCESS_xxx flags for memory being pinned
66 */
67struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
68 size_t size, int access)
69{ 69{
70 struct ib_umem *umem;
70 struct page **page_list; 71 struct page **page_list;
71 struct ib_umem_chunk *chunk; 72 struct ib_umem_chunk *chunk;
72 unsigned long locked; 73 unsigned long locked;
73 unsigned long lock_limit; 74 unsigned long lock_limit;
74 unsigned long cur_base; 75 unsigned long cur_base;
75 unsigned long npages; 76 unsigned long npages;
76 int ret = 0; 77 int ret;
77 int off; 78 int off;
78 int i; 79 int i;
79 80
80 if (!can_do_mlock()) 81 if (!can_do_mlock())
81 return -EPERM; 82 return ERR_PTR(-EPERM);
82 83
83 page_list = (struct page **) __get_free_page(GFP_KERNEL); 84 umem = kmalloc(sizeof *umem, GFP_KERNEL);
84 if (!page_list) 85 if (!umem)
85 return -ENOMEM; 86 return ERR_PTR(-ENOMEM);
87
88 umem->context = context;
89 umem->length = size;
90 umem->offset = addr & ~PAGE_MASK;
91 umem->page_size = PAGE_SIZE;
92 /*
93 * We ask for writable memory if any access flags other than
94 * "remote read" are set. "Local write" and "remote write"
95 * obviously require write access. "Remote atomic" can do
96 * things like fetch and add, which will modify memory, and
97 * "MW bind" can change permissions by binding a window.
98 */
99 umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ);
86 100
87 mem->user_base = (unsigned long) addr; 101 INIT_LIST_HEAD(&umem->chunk_list);
88 mem->length = size;
89 mem->offset = (unsigned long) addr & ~PAGE_MASK;
90 mem->page_size = PAGE_SIZE;
91 mem->writable = write;
92 102
93 INIT_LIST_HEAD(&mem->chunk_list); 103 page_list = (struct page **) __get_free_page(GFP_KERNEL);
104 if (!page_list) {
105 kfree(umem);
106 return ERR_PTR(-ENOMEM);
107 }
94 108
95 npages = PAGE_ALIGN(size + mem->offset) >> PAGE_SHIFT; 109 npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT;
96 110
97 down_write(&current->mm->mmap_sem); 111 down_write(&current->mm->mmap_sem);
98 112
@@ -104,13 +118,13 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
104 goto out; 118 goto out;
105 } 119 }
106 120
107 cur_base = (unsigned long) addr & PAGE_MASK; 121 cur_base = addr & PAGE_MASK;
108 122
109 while (npages) { 123 while (npages) {
110 ret = get_user_pages(current, current->mm, cur_base, 124 ret = get_user_pages(current, current->mm, cur_base,
111 min_t(int, npages, 125 min_t(int, npages,
112 PAGE_SIZE / sizeof (struct page *)), 126 PAGE_SIZE / sizeof (struct page *)),
113 1, !write, page_list, NULL); 127 1, !umem->writable, page_list, NULL);
114 128
115 if (ret < 0) 129 if (ret < 0)
116 goto out; 130 goto out;
@@ -136,7 +150,7 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
136 chunk->page_list[i].length = PAGE_SIZE; 150 chunk->page_list[i].length = PAGE_SIZE;
137 } 151 }
138 152
139 chunk->nmap = ib_dma_map_sg(dev, 153 chunk->nmap = ib_dma_map_sg(context->device,
140 &chunk->page_list[0], 154 &chunk->page_list[0],
141 chunk->nents, 155 chunk->nents,
142 DMA_BIDIRECTIONAL); 156 DMA_BIDIRECTIONAL);
@@ -151,75 +165,94 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
151 165
152 ret -= chunk->nents; 166 ret -= chunk->nents;
153 off += chunk->nents; 167 off += chunk->nents;
154 list_add_tail(&chunk->list, &mem->chunk_list); 168 list_add_tail(&chunk->list, &umem->chunk_list);
155 } 169 }
156 170
157 ret = 0; 171 ret = 0;
158 } 172 }
159 173
160out: 174out:
161 if (ret < 0) 175 if (ret < 0) {
162 __ib_umem_release(dev, mem, 0); 176 __ib_umem_release(context->device, umem, 0);
163 else 177 kfree(umem);
178 } else
164 current->mm->locked_vm = locked; 179 current->mm->locked_vm = locked;
165 180
166 up_write(&current->mm->mmap_sem); 181 up_write(&current->mm->mmap_sem);
167 free_page((unsigned long) page_list); 182 free_page((unsigned long) page_list);
168 183
169 return ret; 184 return ret < 0 ? ERR_PTR(ret) : umem;
170} 185}
186EXPORT_SYMBOL(ib_umem_get);
171 187
172void ib_umem_release(struct ib_device *dev, struct ib_umem *umem) 188static void ib_umem_account(struct work_struct *work)
173{ 189{
174 __ib_umem_release(dev, umem, 1); 190 struct ib_umem *umem = container_of(work, struct ib_umem, work);
175
176 down_write(&current->mm->mmap_sem);
177 current->mm->locked_vm -=
178 PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
179 up_write(&current->mm->mmap_sem);
180}
181 191
182static void ib_umem_account(struct work_struct *_work) 192 down_write(&umem->mm->mmap_sem);
183{ 193 umem->mm->locked_vm -= umem->diff;
184 struct ib_umem_account_work *work = 194 up_write(&umem->mm->mmap_sem);
185 container_of(_work, struct ib_umem_account_work, work); 195 mmput(umem->mm);
186 196 kfree(umem);
187 down_write(&work->mm->mmap_sem);
188 work->mm->locked_vm -= work->diff;
189 up_write(&work->mm->mmap_sem);
190 mmput(work->mm);
191 kfree(work);
192} 197}
193 198
194void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem) 199/**
200 * ib_umem_release - release memory pinned with ib_umem_get
201 * @umem: umem struct to release
202 */
203void ib_umem_release(struct ib_umem *umem)
195{ 204{
196 struct ib_umem_account_work *work; 205 struct ib_ucontext *context = umem->context;
197 struct mm_struct *mm; 206 struct mm_struct *mm;
207 unsigned long diff;
198 208
199 __ib_umem_release(dev, umem, 1); 209 __ib_umem_release(umem->context->device, umem, 1);
200 210
201 mm = get_task_mm(current); 211 mm = get_task_mm(current);
202 if (!mm) 212 if (!mm)
203 return; 213 return;
204 214
215 diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
216
205 /* 217 /*
206 * We may be called with the mm's mmap_sem already held. This 218 * We may be called with the mm's mmap_sem already held. This
207 * can happen when a userspace munmap() is the call that drops 219 * can happen when a userspace munmap() is the call that drops
208 * the last reference to our file and calls our release 220 * the last reference to our file and calls our release
209 * method. If there are memory regions to destroy, we'll end 221 * method. If there are memory regions to destroy, we'll end
210 * up here and not be able to take the mmap_sem. Therefore we 222 * up here and not be able to take the mmap_sem. In that case
211 * defer the vm_locked accounting to the system workqueue. 223 * we defer the vm_locked accounting to the system workqueue.
212 */ 224 */
225 if (context->closing && !down_write_trylock(&mm->mmap_sem)) {
226 INIT_WORK(&umem->work, ib_umem_account);
227 umem->mm = mm;
228 umem->diff = diff;
213 229
214 work = kmalloc(sizeof *work, GFP_KERNEL); 230 schedule_work(&umem->work);
215 if (!work) {
216 mmput(mm);
217 return; 231 return;
218 } 232 } else
233 down_write(&mm->mmap_sem);
234
235 current->mm->locked_vm -= diff;
236 up_write(&mm->mmap_sem);
237 mmput(mm);
238 kfree(umem);
239}
240EXPORT_SYMBOL(ib_umem_release);
241
242int ib_umem_page_count(struct ib_umem *umem)
243{
244 struct ib_umem_chunk *chunk;
245 int shift;
246 int i;
247 int n;
248
249 shift = ilog2(umem->page_size);
219 250
220 INIT_WORK(&work->work, ib_umem_account); 251 n = 0;
221 work->mm = mm; 252 list_for_each_entry(chunk, &umem->chunk_list, list)
222 work->diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; 253 for (i = 0; i < chunk->nmap; ++i)
254 n += sg_dma_len(&chunk->page_list[i]) >> shift;
223 255
224 schedule_work(&work->work); 256 return n;
225} 257}
258EXPORT_SYMBOL(ib_umem_page_count);
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 102a59c033ff..c33546f9e961 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -45,6 +45,7 @@
45#include <linux/completion.h> 45#include <linux/completion.h>
46 46
47#include <rdma/ib_verbs.h> 47#include <rdma/ib_verbs.h>
48#include <rdma/ib_umem.h>
48#include <rdma/ib_user_verbs.h> 49#include <rdma/ib_user_verbs.h>
49 50
50/* 51/*
@@ -163,11 +164,6 @@ void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
163void ib_uverbs_event_handler(struct ib_event_handler *handler, 164void ib_uverbs_event_handler(struct ib_event_handler *handler,
164 struct ib_event *event); 165 struct ib_event *event);
165 166
166int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
167 void *addr, size_t size, int write);
168void ib_umem_release(struct ib_device *dev, struct ib_umem *umem);
169void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem);
170
171#define IB_UVERBS_DECLARE_CMD(name) \ 167#define IB_UVERBS_DECLARE_CMD(name) \
172 ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ 168 ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
173 const char __user *buf, int in_len, \ 169 const char __user *buf, int in_len, \
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index bab66769be14..01d70084aebe 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved. 2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. 3 * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
4 * Copyright (c) 2005 PathScale, Inc. All rights reserved. 4 * Copyright (c) 2005 PathScale, Inc. All rights reserved.
5 * Copyright (c) 2006 Mellanox Technologies. All rights reserved. 5 * Copyright (c) 2006 Mellanox Technologies. All rights reserved.
6 * 6 *
@@ -295,6 +295,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
295 INIT_LIST_HEAD(&ucontext->qp_list); 295 INIT_LIST_HEAD(&ucontext->qp_list);
296 INIT_LIST_HEAD(&ucontext->srq_list); 296 INIT_LIST_HEAD(&ucontext->srq_list);
297 INIT_LIST_HEAD(&ucontext->ah_list); 297 INIT_LIST_HEAD(&ucontext->ah_list);
298 ucontext->closing = 0;
298 299
299 resp.num_comp_vectors = file->device->num_comp_vectors; 300 resp.num_comp_vectors = file->device->num_comp_vectors;
300 301
@@ -573,7 +574,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
573 struct ib_uverbs_reg_mr cmd; 574 struct ib_uverbs_reg_mr cmd;
574 struct ib_uverbs_reg_mr_resp resp; 575 struct ib_uverbs_reg_mr_resp resp;
575 struct ib_udata udata; 576 struct ib_udata udata;
576 struct ib_umem_object *obj; 577 struct ib_uobject *uobj;
577 struct ib_pd *pd; 578 struct ib_pd *pd;
578 struct ib_mr *mr; 579 struct ib_mr *mr;
579 int ret; 580 int ret;
@@ -599,35 +600,21 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
599 !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE)) 600 !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE))
600 return -EINVAL; 601 return -EINVAL;
601 602
602 obj = kmalloc(sizeof *obj, GFP_KERNEL); 603 uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
603 if (!obj) 604 if (!uobj)
604 return -ENOMEM; 605 return -ENOMEM;
605 606
606 init_uobj(&obj->uobject, 0, file->ucontext, &mr_lock_key); 607 init_uobj(uobj, 0, file->ucontext, &mr_lock_key);
607 down_write(&obj->uobject.mutex); 608 down_write(&uobj->mutex);
608
609 /*
610 * We ask for writable memory if any access flags other than
611 * "remote read" are set. "Local write" and "remote write"
612 * obviously require write access. "Remote atomic" can do
613 * things like fetch and add, which will modify memory, and
614 * "MW bind" can change permissions by binding a window.
615 */
616 ret = ib_umem_get(file->device->ib_dev, &obj->umem,
617 (void *) (unsigned long) cmd.start, cmd.length,
618 !!(cmd.access_flags & ~IB_ACCESS_REMOTE_READ));
619 if (ret)
620 goto err_free;
621
622 obj->umem.virt_base = cmd.hca_va;
623 609
624 pd = idr_read_pd(cmd.pd_handle, file->ucontext); 610 pd = idr_read_pd(cmd.pd_handle, file->ucontext);
625 if (!pd) { 611 if (!pd) {
626 ret = -EINVAL; 612 ret = -EINVAL;
627 goto err_release; 613 goto err_free;
628 } 614 }
629 615
630 mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata); 616 mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
617 cmd.access_flags, &udata);
631 if (IS_ERR(mr)) { 618 if (IS_ERR(mr)) {
632 ret = PTR_ERR(mr); 619 ret = PTR_ERR(mr);
633 goto err_put; 620 goto err_put;
@@ -635,19 +622,19 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
635 622
636 mr->device = pd->device; 623 mr->device = pd->device;
637 mr->pd = pd; 624 mr->pd = pd;
638 mr->uobject = &obj->uobject; 625 mr->uobject = uobj;
639 atomic_inc(&pd->usecnt); 626 atomic_inc(&pd->usecnt);
640 atomic_set(&mr->usecnt, 0); 627 atomic_set(&mr->usecnt, 0);
641 628
642 obj->uobject.object = mr; 629 uobj->object = mr;
643 ret = idr_add_uobj(&ib_uverbs_mr_idr, &obj->uobject); 630 ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
644 if (ret) 631 if (ret)
645 goto err_unreg; 632 goto err_unreg;
646 633
647 memset(&resp, 0, sizeof resp); 634 memset(&resp, 0, sizeof resp);
648 resp.lkey = mr->lkey; 635 resp.lkey = mr->lkey;
649 resp.rkey = mr->rkey; 636 resp.rkey = mr->rkey;
650 resp.mr_handle = obj->uobject.id; 637 resp.mr_handle = uobj->id;
651 638
652 if (copy_to_user((void __user *) (unsigned long) cmd.response, 639 if (copy_to_user((void __user *) (unsigned long) cmd.response,
653 &resp, sizeof resp)) { 640 &resp, sizeof resp)) {
@@ -658,17 +645,17 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
658 put_pd_read(pd); 645 put_pd_read(pd);
659 646
660 mutex_lock(&file->mutex); 647 mutex_lock(&file->mutex);
661 list_add_tail(&obj->uobject.list, &file->ucontext->mr_list); 648 list_add_tail(&uobj->list, &file->ucontext->mr_list);
662 mutex_unlock(&file->mutex); 649 mutex_unlock(&file->mutex);
663 650
664 obj->uobject.live = 1; 651 uobj->live = 1;
665 652
666 up_write(&obj->uobject.mutex); 653 up_write(&uobj->mutex);
667 654
668 return in_len; 655 return in_len;
669 656
670err_copy: 657err_copy:
671 idr_remove_uobj(&ib_uverbs_mr_idr, &obj->uobject); 658 idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
672 659
673err_unreg: 660err_unreg:
674 ib_dereg_mr(mr); 661 ib_dereg_mr(mr);
@@ -676,11 +663,8 @@ err_unreg:
676err_put: 663err_put:
677 put_pd_read(pd); 664 put_pd_read(pd);
678 665
679err_release:
680 ib_umem_release(file->device->ib_dev, &obj->umem);
681
682err_free: 666err_free:
683 put_uobj_write(&obj->uobject); 667 put_uobj_write(uobj);
684 return ret; 668 return ret;
685} 669}
686 670
@@ -691,7 +675,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
691 struct ib_uverbs_dereg_mr cmd; 675 struct ib_uverbs_dereg_mr cmd;
692 struct ib_mr *mr; 676 struct ib_mr *mr;
693 struct ib_uobject *uobj; 677 struct ib_uobject *uobj;
694 struct ib_umem_object *memobj;
695 int ret = -EINVAL; 678 int ret = -EINVAL;
696 679
697 if (copy_from_user(&cmd, buf, sizeof cmd)) 680 if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -701,8 +684,7 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
701 if (!uobj) 684 if (!uobj)
702 return -EINVAL; 685 return -EINVAL;
703 686
704 memobj = container_of(uobj, struct ib_umem_object, uobject); 687 mr = uobj->object;
705 mr = uobj->object;
706 688
707 ret = ib_dereg_mr(mr); 689 ret = ib_dereg_mr(mr);
708 if (!ret) 690 if (!ret)
@@ -719,8 +701,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
719 list_del(&uobj->list); 701 list_del(&uobj->list);
720 mutex_unlock(&file->mutex); 702 mutex_unlock(&file->mutex);
721 703
722 ib_umem_release(file->device->ib_dev, &memobj->umem);
723
724 put_uobj(uobj); 704 put_uobj(uobj);
725 705
726 return in_len; 706 return in_len;
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index d44e54799651..14d7ccd89195 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -183,6 +183,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
183 if (!context) 183 if (!context)
184 return 0; 184 return 0;
185 185
186 context->closing = 1;
187
186 list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { 188 list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
187 struct ib_ah *ah = uobj->object; 189 struct ib_ah *ah = uobj->object;
188 190
@@ -230,16 +232,10 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
230 232
231 list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { 233 list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
232 struct ib_mr *mr = uobj->object; 234 struct ib_mr *mr = uobj->object;
233 struct ib_device *mrdev = mr->device;
234 struct ib_umem_object *memobj;
235 235
236 idr_remove_uobj(&ib_uverbs_mr_idr, uobj); 236 idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
237 ib_dereg_mr(mr); 237 ib_dereg_mr(mr);
238 238 kfree(uobj);
239 memobj = container_of(uobj, struct ib_umem_object, uobject);
240 ib_umem_release_on_close(mrdev, &memobj->umem);
241
242 kfree(memobj);
243 } 239 }
244 240
245 list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { 241 list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
@@ -906,7 +902,6 @@ static void __exit ib_uverbs_cleanup(void)
906 unregister_filesystem(&uverbs_event_fs); 902 unregister_filesystem(&uverbs_event_fs);
907 class_destroy(uverbs_class); 903 class_destroy(uverbs_class);
908 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); 904 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
909 flush_scheduled_work();
910 idr_destroy(&ib_uverbs_pd_idr); 905 idr_destroy(&ib_uverbs_pd_idr);
911 idr_destroy(&ib_uverbs_mr_idr); 906 idr_destroy(&ib_uverbs_mr_idr);
912 idr_destroy(&ib_uverbs_mw_idr); 907 idr_destroy(&ib_uverbs_mw_idr);
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index 109166223c09..997cf1530762 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -56,6 +56,7 @@
56#include <asm/byteorder.h> 56#include <asm/byteorder.h>
57 57
58#include <rdma/ib_smi.h> 58#include <rdma/ib_smi.h>
59#include <rdma/ib_umem.h>
59#include <rdma/ib_user_verbs.h> 60#include <rdma/ib_user_verbs.h>
60#include "c2.h" 61#include "c2.h"
61#include "c2_provider.h" 62#include "c2_provider.h"
@@ -396,6 +397,7 @@ static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
396 } 397 }
397 398
398 mr->pd = to_c2pd(ib_pd); 399 mr->pd = to_c2pd(ib_pd);
400 mr->umem = NULL;
399 pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, " 401 pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
400 "*iova_start %llx, first pa %llx, last pa %llx\n", 402 "*iova_start %llx, first pa %llx, last pa %llx\n",
401 __FUNCTION__, page_shift, pbl_depth, total_len, 403 __FUNCTION__, page_shift, pbl_depth, total_len,
@@ -428,8 +430,8 @@ static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
428 return c2_reg_phys_mr(pd, &bl, 1, acc, &kva); 430 return c2_reg_phys_mr(pd, &bl, 1, acc, &kva);
429} 431}
430 432
431static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, 433static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
432 int acc, struct ib_udata *udata) 434 u64 virt, int acc, struct ib_udata *udata)
433{ 435{
434 u64 *pages; 436 u64 *pages;
435 u64 kva = 0; 437 u64 kva = 0;
@@ -441,15 +443,23 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
441 struct c2_mr *c2mr; 443 struct c2_mr *c2mr;
442 444
443 pr_debug("%s:%u\n", __FUNCTION__, __LINE__); 445 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
444 shift = ffs(region->page_size) - 1;
445 446
446 c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL); 447 c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL);
447 if (!c2mr) 448 if (!c2mr)
448 return ERR_PTR(-ENOMEM); 449 return ERR_PTR(-ENOMEM);
449 c2mr->pd = c2pd; 450 c2mr->pd = c2pd;
450 451
452 c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc);
453 if (IS_ERR(c2mr->umem)) {
454 err = PTR_ERR(c2mr->umem);
455 kfree(c2mr);
456 return ERR_PTR(err);
457 }
458
459 shift = ffs(c2mr->umem->page_size) - 1;
460
451 n = 0; 461 n = 0;
452 list_for_each_entry(chunk, &region->chunk_list, list) 462 list_for_each_entry(chunk, &c2mr->umem->chunk_list, list)
453 n += chunk->nents; 463 n += chunk->nents;
454 464
455 pages = kmalloc(n * sizeof(u64), GFP_KERNEL); 465 pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
@@ -459,35 +469,34 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
459 } 469 }
460 470
461 i = 0; 471 i = 0;
462 list_for_each_entry(chunk, &region->chunk_list, list) { 472 list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) {
463 for (j = 0; j < chunk->nmap; ++j) { 473 for (j = 0; j < chunk->nmap; ++j) {
464 len = sg_dma_len(&chunk->page_list[j]) >> shift; 474 len = sg_dma_len(&chunk->page_list[j]) >> shift;
465 for (k = 0; k < len; ++k) { 475 for (k = 0; k < len; ++k) {
466 pages[i++] = 476 pages[i++] =
467 sg_dma_address(&chunk->page_list[j]) + 477 sg_dma_address(&chunk->page_list[j]) +
468 (region->page_size * k); 478 (c2mr->umem->page_size * k);
469 } 479 }
470 } 480 }
471 } 481 }
472 482
473 kva = (u64)region->virt_base; 483 kva = virt;
474 err = c2_nsmr_register_phys_kern(to_c2dev(pd->device), 484 err = c2_nsmr_register_phys_kern(to_c2dev(pd->device),
475 pages, 485 pages,
476 region->page_size, 486 c2mr->umem->page_size,
477 i, 487 i,
478 region->length, 488 length,
479 region->offset, 489 c2mr->umem->offset,
480 &kva, 490 &kva,
481 c2_convert_access(acc), 491 c2_convert_access(acc),
482 c2mr); 492 c2mr);
483 kfree(pages); 493 kfree(pages);
484 if (err) { 494 if (err)
485 kfree(c2mr); 495 goto err;
486 return ERR_PTR(err);
487 }
488 return &c2mr->ibmr; 496 return &c2mr->ibmr;
489 497
490err: 498err:
499 ib_umem_release(c2mr->umem);
491 kfree(c2mr); 500 kfree(c2mr);
492 return ERR_PTR(err); 501 return ERR_PTR(err);
493} 502}
@@ -502,8 +511,11 @@ static int c2_dereg_mr(struct ib_mr *ib_mr)
502 err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey); 511 err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey);
503 if (err) 512 if (err)
504 pr_debug("c2_stag_dealloc failed: %d\n", err); 513 pr_debug("c2_stag_dealloc failed: %d\n", err);
505 else 514 else {
515 if (mr->umem)
516 ib_umem_release(mr->umem);
506 kfree(mr); 517 kfree(mr);
518 }
507 519
508 return err; 520 return err;
509} 521}
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.h b/drivers/infiniband/hw/amso1100/c2_provider.h
index fc906223220f..1076df2ee96a 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.h
+++ b/drivers/infiniband/hw/amso1100/c2_provider.h
@@ -73,6 +73,7 @@ struct c2_pd {
73struct c2_mr { 73struct c2_mr {
74 struct ib_mr ibmr; 74 struct ib_mr ibmr;
75 struct c2_pd *pd; 75 struct c2_pd *pd;
76 struct ib_umem *umem;
76}; 77};
77 78
78struct c2_av; 79struct c2_av;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index a891493fd340..e7c2c3948037 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -47,6 +47,7 @@
47#include <rdma/iw_cm.h> 47#include <rdma/iw_cm.h>
48#include <rdma/ib_verbs.h> 48#include <rdma/ib_verbs.h>
49#include <rdma/ib_smi.h> 49#include <rdma/ib_smi.h>
50#include <rdma/ib_umem.h>
50#include <rdma/ib_user_verbs.h> 51#include <rdma/ib_user_verbs.h>
51 52
52#include "cxio_hal.h" 53#include "cxio_hal.h"
@@ -443,6 +444,8 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
443 remove_handle(rhp, &rhp->mmidr, mmid); 444 remove_handle(rhp, &rhp->mmidr, mmid);
444 if (mhp->kva) 445 if (mhp->kva)
445 kfree((void *) (unsigned long) mhp->kva); 446 kfree((void *) (unsigned long) mhp->kva);
447 if (mhp->umem)
448 ib_umem_release(mhp->umem);
446 PDBG("%s mmid 0x%x ptr %p\n", __FUNCTION__, mmid, mhp); 449 PDBG("%s mmid 0x%x ptr %p\n", __FUNCTION__, mmid, mhp);
447 kfree(mhp); 450 kfree(mhp);
448 return 0; 451 return 0;
@@ -577,8 +580,8 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
577} 580}
578 581
579 582
580static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, 583static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
581 int acc, struct ib_udata *udata) 584 u64 virt, int acc, struct ib_udata *udata)
582{ 585{
583 __be64 *pages; 586 __be64 *pages;
584 int shift, n, len; 587 int shift, n, len;
@@ -591,7 +594,6 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
591 struct iwch_reg_user_mr_resp uresp; 594 struct iwch_reg_user_mr_resp uresp;
592 595
593 PDBG("%s ib_pd %p\n", __FUNCTION__, pd); 596 PDBG("%s ib_pd %p\n", __FUNCTION__, pd);
594 shift = ffs(region->page_size) - 1;
595 597
596 php = to_iwch_pd(pd); 598 php = to_iwch_pd(pd);
597 rhp = php->rhp; 599 rhp = php->rhp;
@@ -599,8 +601,17 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
599 if (!mhp) 601 if (!mhp)
600 return ERR_PTR(-ENOMEM); 602 return ERR_PTR(-ENOMEM);
601 603
604 mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc);
605 if (IS_ERR(mhp->umem)) {
606 err = PTR_ERR(mhp->umem);
607 kfree(mhp);
608 return ERR_PTR(err);
609 }
610
611 shift = ffs(mhp->umem->page_size) - 1;
612
602 n = 0; 613 n = 0;
603 list_for_each_entry(chunk, &region->chunk_list, list) 614 list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
604 n += chunk->nents; 615 n += chunk->nents;
605 616
606 pages = kmalloc(n * sizeof(u64), GFP_KERNEL); 617 pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
@@ -611,13 +622,13 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
611 622
612 i = n = 0; 623 i = n = 0;
613 624
614 list_for_each_entry(chunk, &region->chunk_list, list) 625 list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
615 for (j = 0; j < chunk->nmap; ++j) { 626 for (j = 0; j < chunk->nmap; ++j) {
616 len = sg_dma_len(&chunk->page_list[j]) >> shift; 627 len = sg_dma_len(&chunk->page_list[j]) >> shift;
617 for (k = 0; k < len; ++k) { 628 for (k = 0; k < len; ++k) {
618 pages[i++] = cpu_to_be64(sg_dma_address( 629 pages[i++] = cpu_to_be64(sg_dma_address(
619 &chunk->page_list[j]) + 630 &chunk->page_list[j]) +
620 region->page_size * k); 631 mhp->umem->page_size * k);
621 } 632 }
622 } 633 }
623 634
@@ -625,9 +636,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
625 mhp->attr.pdid = php->pdid; 636 mhp->attr.pdid = php->pdid;
626 mhp->attr.zbva = 0; 637 mhp->attr.zbva = 0;
627 mhp->attr.perms = iwch_ib_to_tpt_access(acc); 638 mhp->attr.perms = iwch_ib_to_tpt_access(acc);
628 mhp->attr.va_fbo = region->virt_base; 639 mhp->attr.va_fbo = virt;
629 mhp->attr.page_size = shift - 12; 640 mhp->attr.page_size = shift - 12;
630 mhp->attr.len = (u32) region->length; 641 mhp->attr.len = (u32) length;
631 mhp->attr.pbl_size = i; 642 mhp->attr.pbl_size = i;
632 err = iwch_register_mem(rhp, php, mhp, shift, pages); 643 err = iwch_register_mem(rhp, php, mhp, shift, pages);
633 kfree(pages); 644 kfree(pages);
@@ -650,6 +661,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
650 return &mhp->ibmr; 661 return &mhp->ibmr;
651 662
652err: 663err:
664 ib_umem_release(mhp->umem);
653 kfree(mhp); 665 kfree(mhp);
654 return ERR_PTR(err); 666 return ERR_PTR(err);
655} 667}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 93bcc56756bd..48833f3f3bd0 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -73,6 +73,7 @@ struct tpt_attributes {
73 73
74struct iwch_mr { 74struct iwch_mr {
75 struct ib_mr ibmr; 75 struct ib_mr ibmr;
76 struct ib_umem *umem;
76 struct iwch_dev *rhp; 77 struct iwch_dev *rhp;
77 u64 kva; 78 u64 kva;
78 struct tpt_attributes attr; 79 struct tpt_attributes attr;
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 10fb8fbafa0c..f64d42b08674 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -176,6 +176,7 @@ struct ehca_mr {
176 struct ib_mr ib_mr; /* must always be first in ehca_mr */ 176 struct ib_mr ib_mr; /* must always be first in ehca_mr */
177 struct ib_fmr ib_fmr; /* must always be first in ehca_mr */ 177 struct ib_fmr ib_fmr; /* must always be first in ehca_mr */
178 } ib; 178 } ib;
179 struct ib_umem *umem;
179 spinlock_t mrlock; 180 spinlock_t mrlock;
180 181
181 enum ehca_mr_flag flags; 182 enum ehca_mr_flag flags;
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h
index e14b029332c8..37e7fe0908cf 100644
--- a/drivers/infiniband/hw/ehca/ehca_iverbs.h
+++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
@@ -78,8 +78,7 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
78 int num_phys_buf, 78 int num_phys_buf,
79 int mr_access_flags, u64 *iova_start); 79 int mr_access_flags, u64 *iova_start);
80 80
81struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, 81struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt,
82 struct ib_umem *region,
83 int mr_access_flags, struct ib_udata *udata); 82 int mr_access_flags, struct ib_udata *udata);
84 83
85int ehca_rereg_phys_mr(struct ib_mr *mr, 84int ehca_rereg_phys_mr(struct ib_mr *mr,
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index d22ab563633f..84c5bb498563 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -39,6 +39,8 @@
39 * POSSIBILITY OF SUCH DAMAGE. 39 * POSSIBILITY OF SUCH DAMAGE.
40 */ 40 */
41 41
42#include <rdma/ib_umem.h>
43
42#include <asm/current.h> 44#include <asm/current.h>
43 45
44#include "ehca_iverbs.h" 46#include "ehca_iverbs.h"
@@ -238,10 +240,8 @@ reg_phys_mr_exit0:
238 240
239/*----------------------------------------------------------------------*/ 241/*----------------------------------------------------------------------*/
240 242
241struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, 243struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt,
242 struct ib_umem *region, 244 int mr_access_flags, struct ib_udata *udata)
243 int mr_access_flags,
244 struct ib_udata *udata)
245{ 245{
246 struct ib_mr *ib_mr; 246 struct ib_mr *ib_mr;
247 struct ehca_mr *e_mr; 247 struct ehca_mr *e_mr;
@@ -257,11 +257,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
257 ehca_gen_err("bad pd=%p", pd); 257 ehca_gen_err("bad pd=%p", pd);
258 return ERR_PTR(-EFAULT); 258 return ERR_PTR(-EFAULT);
259 } 259 }
260 if (!region) { 260
261 ehca_err(pd->device, "bad input values: region=%p", region);
262 ib_mr = ERR_PTR(-EINVAL);
263 goto reg_user_mr_exit0;
264 }
265 if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && 261 if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
266 !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || 262 !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
267 ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && 263 ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
@@ -275,17 +271,10 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
275 ib_mr = ERR_PTR(-EINVAL); 271 ib_mr = ERR_PTR(-EINVAL);
276 goto reg_user_mr_exit0; 272 goto reg_user_mr_exit0;
277 } 273 }
278 if (region->page_size != PAGE_SIZE) {
279 ehca_err(pd->device, "page size not supported, "
280 "region->page_size=%x", region->page_size);
281 ib_mr = ERR_PTR(-EINVAL);
282 goto reg_user_mr_exit0;
283 }
284 274
285 if ((region->length == 0) || 275 if (length == 0 || virt + length < virt) {
286 ((region->virt_base + region->length) < region->virt_base)) {
287 ehca_err(pd->device, "bad input values: length=%lx " 276 ehca_err(pd->device, "bad input values: length=%lx "
288 "virt_base=%lx", region->length, region->virt_base); 277 "virt_base=%lx", length, virt);
289 ib_mr = ERR_PTR(-EINVAL); 278 ib_mr = ERR_PTR(-EINVAL);
290 goto reg_user_mr_exit0; 279 goto reg_user_mr_exit0;
291 } 280 }
@@ -297,40 +286,55 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
297 goto reg_user_mr_exit0; 286 goto reg_user_mr_exit0;
298 } 287 }
299 288
289 e_mr->umem = ib_umem_get(pd->uobject->context, start, length,
290 mr_access_flags);
291 if (IS_ERR(e_mr->umem)) {
292 ib_mr = (void *) e_mr->umem;
293 goto reg_user_mr_exit1;
294 }
295
296 if (e_mr->umem->page_size != PAGE_SIZE) {
297 ehca_err(pd->device, "page size not supported, "
298 "e_mr->umem->page_size=%x", e_mr->umem->page_size);
299 ib_mr = ERR_PTR(-EINVAL);
300 goto reg_user_mr_exit2;
301 }
302
300 /* determine number of MR pages */ 303 /* determine number of MR pages */
301 num_pages_mr = (((region->virt_base % PAGE_SIZE) + region->length + 304 num_pages_mr = (((virt % PAGE_SIZE) + length + PAGE_SIZE - 1) /
302 PAGE_SIZE - 1) / PAGE_SIZE); 305 PAGE_SIZE);
303 num_pages_4k = (((region->virt_base % EHCA_PAGESIZE) + region->length + 306 num_pages_4k = (((virt % EHCA_PAGESIZE) + length + EHCA_PAGESIZE - 1) /
304 EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); 307 EHCA_PAGESIZE);
305 308
306 /* register MR on HCA */ 309 /* register MR on HCA */
307 pginfo.type = EHCA_MR_PGI_USER; 310 pginfo.type = EHCA_MR_PGI_USER;
308 pginfo.num_pages = num_pages_mr; 311 pginfo.num_pages = num_pages_mr;
309 pginfo.num_4k = num_pages_4k; 312 pginfo.num_4k = num_pages_4k;
310 pginfo.region = region; 313 pginfo.region = e_mr->umem;
311 pginfo.next_4k = region->offset / EHCA_PAGESIZE; 314 pginfo.next_4k = e_mr->umem->offset / EHCA_PAGESIZE;
312 pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk, 315 pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk,
313 (&region->chunk_list), 316 (&e_mr->umem->chunk_list),
314 list); 317 list);
315 318
316 ret = ehca_reg_mr(shca, e_mr, (u64*)region->virt_base, 319 ret = ehca_reg_mr(shca, e_mr, (u64*) virt, length, mr_access_flags, e_pd,
317 region->length, mr_access_flags, e_pd, &pginfo, 320 &pginfo, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey);
318 &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey);
319 if (ret) { 321 if (ret) {
320 ib_mr = ERR_PTR(ret); 322 ib_mr = ERR_PTR(ret);
321 goto reg_user_mr_exit1; 323 goto reg_user_mr_exit2;
322 } 324 }
323 325
324 /* successful registration of all pages */ 326 /* successful registration of all pages */
325 return &e_mr->ib.ib_mr; 327 return &e_mr->ib.ib_mr;
326 328
329reg_user_mr_exit2:
330 ib_umem_release(e_mr->umem);
327reg_user_mr_exit1: 331reg_user_mr_exit1:
328 ehca_mr_delete(e_mr); 332 ehca_mr_delete(e_mr);
329reg_user_mr_exit0: 333reg_user_mr_exit0:
330 if (IS_ERR(ib_mr)) 334 if (IS_ERR(ib_mr))
331 ehca_err(pd->device, "rc=%lx pd=%p region=%p mr_access_flags=%x" 335 ehca_err(pd->device, "rc=%lx pd=%p mr_access_flags=%x"
332 " udata=%p", 336 " udata=%p",
333 PTR_ERR(ib_mr), pd, region, mr_access_flags, udata); 337 PTR_ERR(ib_mr), pd, mr_access_flags, udata);
334 return ib_mr; 338 return ib_mr;
335} /* end ehca_reg_user_mr() */ 339} /* end ehca_reg_user_mr() */
336 340
@@ -596,6 +600,9 @@ int ehca_dereg_mr(struct ib_mr *mr)
596 goto dereg_mr_exit0; 600 goto dereg_mr_exit0;
597 } 601 }
598 602
603 if (e_mr->umem)
604 ib_umem_release(e_mr->umem);
605
599 /* successful deregistration */ 606 /* successful deregistration */
600 ehca_mr_delete(e_mr); 607 ehca_mr_delete(e_mr);
601 608
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index 31e70732e369..bdeef8d4f279 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -31,6 +31,7 @@
31 * SOFTWARE. 31 * SOFTWARE.
32 */ 32 */
33 33
34#include <rdma/ib_umem.h>
34#include <rdma/ib_pack.h> 35#include <rdma/ib_pack.h>
35#include <rdma/ib_smi.h> 36#include <rdma/ib_smi.h>
36 37
@@ -147,6 +148,7 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
147 mr->mr.offset = 0; 148 mr->mr.offset = 0;
148 mr->mr.access_flags = acc; 149 mr->mr.access_flags = acc;
149 mr->mr.max_segs = num_phys_buf; 150 mr->mr.max_segs = num_phys_buf;
151 mr->umem = NULL;
150 152
151 m = 0; 153 m = 0;
152 n = 0; 154 n = 0;
@@ -170,46 +172,56 @@ bail:
170/** 172/**
171 * ipath_reg_user_mr - register a userspace memory region 173 * ipath_reg_user_mr - register a userspace memory region
172 * @pd: protection domain for this memory region 174 * @pd: protection domain for this memory region
173 * @region: the user memory region 175 * @start: starting userspace address
176 * @length: length of region to register
177 * @virt_addr: virtual address to use (from HCA's point of view)
174 * @mr_access_flags: access flags for this memory region 178 * @mr_access_flags: access flags for this memory region
175 * @udata: unused by the InfiniPath driver 179 * @udata: unused by the InfiniPath driver
176 * 180 *
177 * Returns the memory region on success, otherwise returns an errno. 181 * Returns the memory region on success, otherwise returns an errno.
178 */ 182 */
179struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, 183struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
180 int mr_access_flags, struct ib_udata *udata) 184 u64 virt_addr, int mr_access_flags,
185 struct ib_udata *udata)
181{ 186{
182 struct ipath_mr *mr; 187 struct ipath_mr *mr;
188 struct ib_umem *umem;
183 struct ib_umem_chunk *chunk; 189 struct ib_umem_chunk *chunk;
184 int n, m, i; 190 int n, m, i;
185 struct ib_mr *ret; 191 struct ib_mr *ret;
186 192
187 if (region->length == 0) { 193 if (length == 0) {
188 ret = ERR_PTR(-EINVAL); 194 ret = ERR_PTR(-EINVAL);
189 goto bail; 195 goto bail;
190 } 196 }
191 197
198 umem = ib_umem_get(pd->uobject->context, start, length, mr_access_flags);
199 if (IS_ERR(umem))
200 return (void *) umem;
201
192 n = 0; 202 n = 0;
193 list_for_each_entry(chunk, &region->chunk_list, list) 203 list_for_each_entry(chunk, &umem->chunk_list, list)
194 n += chunk->nents; 204 n += chunk->nents;
195 205
196 mr = alloc_mr(n, &to_idev(pd->device)->lk_table); 206 mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
197 if (!mr) { 207 if (!mr) {
198 ret = ERR_PTR(-ENOMEM); 208 ret = ERR_PTR(-ENOMEM);
209 ib_umem_release(umem);
199 goto bail; 210 goto bail;
200 } 211 }
201 212
202 mr->mr.pd = pd; 213 mr->mr.pd = pd;
203 mr->mr.user_base = region->user_base; 214 mr->mr.user_base = start;
204 mr->mr.iova = region->virt_base; 215 mr->mr.iova = virt_addr;
205 mr->mr.length = region->length; 216 mr->mr.length = length;
206 mr->mr.offset = region->offset; 217 mr->mr.offset = umem->offset;
207 mr->mr.access_flags = mr_access_flags; 218 mr->mr.access_flags = mr_access_flags;
208 mr->mr.max_segs = n; 219 mr->mr.max_segs = n;
220 mr->umem = umem;
209 221
210 m = 0; 222 m = 0;
211 n = 0; 223 n = 0;
212 list_for_each_entry(chunk, &region->chunk_list, list) { 224 list_for_each_entry(chunk, &umem->chunk_list, list) {
213 for (i = 0; i < chunk->nents; i++) { 225 for (i = 0; i < chunk->nents; i++) {
214 void *vaddr; 226 void *vaddr;
215 227
@@ -219,7 +231,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
219 goto bail; 231 goto bail;
220 } 232 }
221 mr->mr.map[m]->segs[n].vaddr = vaddr; 233 mr->mr.map[m]->segs[n].vaddr = vaddr;
222 mr->mr.map[m]->segs[n].length = region->page_size; 234 mr->mr.map[m]->segs[n].length = umem->page_size;
223 n++; 235 n++;
224 if (n == IPATH_SEGSZ) { 236 if (n == IPATH_SEGSZ) {
225 m++; 237 m++;
@@ -253,6 +265,10 @@ int ipath_dereg_mr(struct ib_mr *ibmr)
253 i--; 265 i--;
254 kfree(mr->mr.map[i]); 266 kfree(mr->mr.map[i]);
255 } 267 }
268
269 if (mr->umem)
270 ib_umem_release(mr->umem);
271
256 kfree(mr); 272 kfree(mr);
257 return 0; 273 return 0;
258} 274}
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 7064fc222727..088b837ebea8 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -251,6 +251,7 @@ struct ipath_sge {
251/* Memory region */ 251/* Memory region */
252struct ipath_mr { 252struct ipath_mr {
253 struct ib_mr ibmr; 253 struct ib_mr ibmr;
254 struct ib_umem *umem;
254 struct ipath_mregion mr; /* must be last */ 255 struct ipath_mregion mr; /* must be last */
255}; 256};
256 257
@@ -751,8 +752,8 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
751 struct ib_phys_buf *buffer_list, 752 struct ib_phys_buf *buffer_list,
752 int num_phys_buf, int acc, u64 *iova_start); 753 int num_phys_buf, int acc, u64 *iova_start);
753 754
754struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, 755struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
755 int mr_access_flags, 756 u64 virt_addr, int mr_access_flags,
756 struct ib_udata *udata); 757 struct ib_udata *udata);
757 758
758int ipath_dereg_mr(struct ib_mr *ibmr); 759int ipath_dereg_mr(struct ib_mr *ibmr);
diff --git a/drivers/infiniband/hw/mlx4/Kconfig b/drivers/infiniband/hw/mlx4/Kconfig
new file mode 100644
index 000000000000..b8912cdb9663
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/Kconfig
@@ -0,0 +1,9 @@
1config MLX4_INFINIBAND
2 tristate "Mellanox ConnectX HCA support"
3 depends on INFINIBAND
4 select MLX4_CORE
5 ---help---
6 This driver provides low-level InfiniBand support for
7 Mellanox ConnectX PCI Express host channel adapters (HCAs).
8 This is required to use InfiniBand protocols such as
9 IP-over-IB or SRP with these devices.
diff --git a/drivers/infiniband/hw/mlx4/Makefile b/drivers/infiniband/hw/mlx4/Makefile
new file mode 100644
index 000000000000..70f09c7826da
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o
2
3mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
new file mode 100644
index 000000000000..c75ac9463e20
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -0,0 +1,100 @@
1/*
2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include "mlx4_ib.h"
34
35struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
36{
37 struct mlx4_dev *dev = to_mdev(pd->device)->dev;
38 struct mlx4_ib_ah *ah;
39
40 ah = kmalloc(sizeof *ah, GFP_ATOMIC);
41 if (!ah)
42 return ERR_PTR(-ENOMEM);
43
44 memset(&ah->av, 0, sizeof ah->av);
45
46 ah->av.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
47 ah->av.g_slid = ah_attr->src_path_bits;
48 ah->av.dlid = cpu_to_be16(ah_attr->dlid);
49 if (ah_attr->static_rate) {
50 ah->av.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
51 while (ah->av.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
52 !(1 << ah->av.stat_rate & dev->caps.stat_rate_support))
53 --ah->av.stat_rate;
54 }
55 ah->av.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
56 if (ah_attr->ah_flags & IB_AH_GRH) {
57 ah->av.g_slid |= 0x80;
58 ah->av.gid_index = ah_attr->grh.sgid_index;
59 ah->av.hop_limit = ah_attr->grh.hop_limit;
60 ah->av.sl_tclass_flowlabel |=
61 cpu_to_be32((ah_attr->grh.traffic_class << 20) |
62 ah_attr->grh.flow_label);
63 memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16);
64 }
65
66 return &ah->ibah;
67}
68
69int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
70{
71 struct mlx4_ib_ah *ah = to_mah(ibah);
72
73 memset(ah_attr, 0, sizeof *ah_attr);
74 ah_attr->dlid = be16_to_cpu(ah->av.dlid);
75 ah_attr->sl = be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28;
76 ah_attr->port_num = be32_to_cpu(ah->av.port_pd) >> 24;
77 if (ah->av.stat_rate)
78 ah_attr->static_rate = ah->av.stat_rate - MLX4_STAT_RATE_OFFSET;
79 ah_attr->src_path_bits = ah->av.g_slid & 0x7F;
80
81 if (mlx4_ib_ah_grh_present(ah)) {
82 ah_attr->ah_flags = IB_AH_GRH;
83
84 ah_attr->grh.traffic_class =
85 be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20;
86 ah_attr->grh.flow_label =
87 be32_to_cpu(ah->av.sl_tclass_flowlabel) & 0xfffff;
88 ah_attr->grh.hop_limit = ah->av.hop_limit;
89 ah_attr->grh.sgid_index = ah->av.gid_index;
90 memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, 16);
91 }
92
93 return 0;
94}
95
96int mlx4_ib_destroy_ah(struct ib_ah *ah)
97{
98 kfree(to_mah(ah));
99 return 0;
100}
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
new file mode 100644
index 000000000000..b2a290c6703a
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -0,0 +1,525 @@
1/*
2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/mlx4/cq.h>
34#include <linux/mlx4/qp.h>
35
36#include "mlx4_ib.h"
37#include "user.h"
38
39static void mlx4_ib_cq_comp(struct mlx4_cq *cq)
40{
41 struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;
42 ibcq->comp_handler(ibcq, ibcq->cq_context);
43}
44
45static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)
46{
47 struct ib_event event;
48 struct ib_cq *ibcq;
49
50 if (type != MLX4_EVENT_TYPE_CQ_ERROR) {
51 printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
52 "on CQ %06x\n", type, cq->cqn);
53 return;
54 }
55
56 ibcq = &to_mibcq(cq)->ibcq;
57 if (ibcq->event_handler) {
58 event.device = ibcq->device;
59 event.event = IB_EVENT_CQ_ERR;
60 event.element.cq = ibcq;
61 ibcq->event_handler(&event, ibcq->cq_context);
62 }
63}
64
65static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n)
66{
67 int offset = n * sizeof (struct mlx4_cqe);
68
69 if (buf->buf.nbufs == 1)
70 return buf->buf.u.direct.buf + offset;
71 else
72 return buf->buf.u.page_list[offset >> PAGE_SHIFT].buf +
73 (offset & (PAGE_SIZE - 1));
74}
75
76static void *get_cqe(struct mlx4_ib_cq *cq, int n)
77{
78 return get_cqe_from_buf(&cq->buf, n);
79}
80
81static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n)
82{
83 struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe);
84
85 return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
86 !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
87}
88
89static struct mlx4_cqe *next_cqe_sw(struct mlx4_ib_cq *cq)
90{
91 return get_sw_cqe(cq, cq->mcq.cons_index);
92}
93
94struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector,
95 struct ib_ucontext *context,
96 struct ib_udata *udata)
97{
98 struct mlx4_ib_dev *dev = to_mdev(ibdev);
99 struct mlx4_ib_cq *cq;
100 struct mlx4_uar *uar;
101 int buf_size;
102 int err;
103
104 if (entries < 1 || entries > dev->dev->caps.max_cqes)
105 return ERR_PTR(-EINVAL);
106
107 cq = kmalloc(sizeof *cq, GFP_KERNEL);
108 if (!cq)
109 return ERR_PTR(-ENOMEM);
110
111 entries = roundup_pow_of_two(entries + 1);
112 cq->ibcq.cqe = entries - 1;
113 buf_size = entries * sizeof (struct mlx4_cqe);
114 spin_lock_init(&cq->lock);
115
116 if (context) {
117 struct mlx4_ib_create_cq ucmd;
118
119 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
120 err = -EFAULT;
121 goto err_cq;
122 }
123
124 cq->umem = ib_umem_get(context, ucmd.buf_addr, buf_size,
125 IB_ACCESS_LOCAL_WRITE);
126 if (IS_ERR(cq->umem)) {
127 err = PTR_ERR(cq->umem);
128 goto err_cq;
129 }
130
131 err = mlx4_mtt_init(dev->dev, ib_umem_page_count(cq->umem),
132 ilog2(cq->umem->page_size), &cq->buf.mtt);
133 if (err)
134 goto err_buf;
135
136 err = mlx4_ib_umem_write_mtt(dev, &cq->buf.mtt, cq->umem);
137 if (err)
138 goto err_mtt;
139
140 err = mlx4_ib_db_map_user(to_mucontext(context), ucmd.db_addr,
141 &cq->db);
142 if (err)
143 goto err_mtt;
144
145 uar = &to_mucontext(context)->uar;
146 } else {
147 err = mlx4_ib_db_alloc(dev, &cq->db, 1);
148 if (err)
149 goto err_cq;
150
151 cq->mcq.set_ci_db = cq->db.db;
152 cq->mcq.arm_db = cq->db.db + 1;
153 *cq->mcq.set_ci_db = 0;
154 *cq->mcq.arm_db = 0;
155
156 if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &cq->buf.buf)) {
157 err = -ENOMEM;
158 goto err_db;
159 }
160
161 err = mlx4_mtt_init(dev->dev, cq->buf.buf.npages, cq->buf.buf.page_shift,
162 &cq->buf.mtt);
163 if (err)
164 goto err_buf;
165
166 err = mlx4_buf_write_mtt(dev->dev, &cq->buf.mtt, &cq->buf.buf);
167 if (err)
168 goto err_mtt;
169
170 uar = &dev->priv_uar;
171 }
172
173 err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
174 cq->db.dma, &cq->mcq);
175 if (err)
176 goto err_dbmap;
177
178 cq->mcq.comp = mlx4_ib_cq_comp;
179 cq->mcq.event = mlx4_ib_cq_event;
180
181 if (context)
182 if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) {
183 err = -EFAULT;
184 goto err_dbmap;
185 }
186
187 return &cq->ibcq;
188
189err_dbmap:
190 if (context)
191 mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db);
192
193err_mtt:
194 mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt);
195
196err_buf:
197 if (context)
198 ib_umem_release(cq->umem);
199 else
200 mlx4_buf_free(dev->dev, entries * sizeof (struct mlx4_cqe),
201 &cq->buf.buf);
202
203err_db:
204 if (!context)
205 mlx4_ib_db_free(dev, &cq->db);
206
207err_cq:
208 kfree(cq);
209
210 return ERR_PTR(err);
211}
212
213int mlx4_ib_destroy_cq(struct ib_cq *cq)
214{
215 struct mlx4_ib_dev *dev = to_mdev(cq->device);
216 struct mlx4_ib_cq *mcq = to_mcq(cq);
217
218 mlx4_cq_free(dev->dev, &mcq->mcq);
219 mlx4_mtt_cleanup(dev->dev, &mcq->buf.mtt);
220
221 if (cq->uobject) {
222 mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db);
223 ib_umem_release(mcq->umem);
224 } else {
225 mlx4_buf_free(dev->dev, (cq->cqe + 1) * sizeof (struct mlx4_cqe),
226 &mcq->buf.buf);
227 mlx4_ib_db_free(dev, &mcq->db);
228 }
229
230 kfree(mcq);
231
232 return 0;
233}
234
235static void dump_cqe(void *cqe)
236{
237 __be32 *buf = cqe;
238
239 printk(KERN_DEBUG "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
240 be32_to_cpu(buf[0]), be32_to_cpu(buf[1]), be32_to_cpu(buf[2]),
241 be32_to_cpu(buf[3]), be32_to_cpu(buf[4]), be32_to_cpu(buf[5]),
242 be32_to_cpu(buf[6]), be32_to_cpu(buf[7]));
243}
244
245static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe,
246 struct ib_wc *wc)
247{
248 if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) {
249 printk(KERN_DEBUG "local QP operation err "
250 "(QPN %06x, WQE index %x, vendor syndrome %02x, "
251 "opcode = %02x)\n",
252 be32_to_cpu(cqe->my_qpn), be16_to_cpu(cqe->wqe_index),
253 cqe->vendor_err_syndrome,
254 cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK);
255 dump_cqe(cqe);
256 }
257
258 switch (cqe->syndrome) {
259 case MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR:
260 wc->status = IB_WC_LOC_LEN_ERR;
261 break;
262 case MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR:
263 wc->status = IB_WC_LOC_QP_OP_ERR;
264 break;
265 case MLX4_CQE_SYNDROME_LOCAL_PROT_ERR:
266 wc->status = IB_WC_LOC_PROT_ERR;
267 break;
268 case MLX4_CQE_SYNDROME_WR_FLUSH_ERR:
269 wc->status = IB_WC_WR_FLUSH_ERR;
270 break;
271 case MLX4_CQE_SYNDROME_MW_BIND_ERR:
272 wc->status = IB_WC_MW_BIND_ERR;
273 break;
274 case MLX4_CQE_SYNDROME_BAD_RESP_ERR:
275 wc->status = IB_WC_BAD_RESP_ERR;
276 break;
277 case MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR:
278 wc->status = IB_WC_LOC_ACCESS_ERR;
279 break;
280 case MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
281 wc->status = IB_WC_REM_INV_REQ_ERR;
282 break;
283 case MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR:
284 wc->status = IB_WC_REM_ACCESS_ERR;
285 break;
286 case MLX4_CQE_SYNDROME_REMOTE_OP_ERR:
287 wc->status = IB_WC_REM_OP_ERR;
288 break;
289 case MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
290 wc->status = IB_WC_RETRY_EXC_ERR;
291 break;
292 case MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
293 wc->status = IB_WC_RNR_RETRY_EXC_ERR;
294 break;
295 case MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR:
296 wc->status = IB_WC_REM_ABORT_ERR;
297 break;
298 default:
299 wc->status = IB_WC_GENERAL_ERR;
300 break;
301 }
302
303 wc->vendor_err = cqe->vendor_err_syndrome;
304}
305
306static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
307 struct mlx4_ib_qp **cur_qp,
308 struct ib_wc *wc)
309{
310 struct mlx4_cqe *cqe;
311 struct mlx4_qp *mqp;
312 struct mlx4_ib_wq *wq;
313 struct mlx4_ib_srq *srq;
314 int is_send;
315 int is_error;
316 u16 wqe_ctr;
317
318 cqe = next_cqe_sw(cq);
319 if (!cqe)
320 return -EAGAIN;
321
322 ++cq->mcq.cons_index;
323
324 /*
325 * Make sure we read CQ entry contents after we've checked the
326 * ownership bit.
327 */
328 rmb();
329
330 is_send = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK;
331 is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
332 MLX4_CQE_OPCODE_ERROR;
333
334 if (!*cur_qp ||
335 (be32_to_cpu(cqe->my_qpn) & 0xffffff) != (*cur_qp)->mqp.qpn) {
336 /*
337 * We do not have to take the QP table lock here,
338 * because CQs will be locked while QPs are removed
339 * from the table.
340 */
341 mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev,
342 be32_to_cpu(cqe->my_qpn));
343 if (unlikely(!mqp)) {
344 printk(KERN_WARNING "CQ %06x with entry for unknown QPN %06x\n",
345 cq->mcq.cqn, be32_to_cpu(cqe->my_qpn) & 0xffffff);
346 return -EINVAL;
347 }
348
349 *cur_qp = to_mibqp(mqp);
350 }
351
352 wc->qp = &(*cur_qp)->ibqp;
353
354 if (is_send) {
355 wq = &(*cur_qp)->sq;
356 wqe_ctr = be16_to_cpu(cqe->wqe_index);
357 wq->tail += wqe_ctr - (u16) wq->tail;
358 wc->wr_id = wq->wrid[wq->tail & (wq->max - 1)];
359 ++wq->tail;
360 } else if ((*cur_qp)->ibqp.srq) {
361 srq = to_msrq((*cur_qp)->ibqp.srq);
362 wqe_ctr = be16_to_cpu(cqe->wqe_index);
363 wc->wr_id = srq->wrid[wqe_ctr];
364 mlx4_ib_free_srq_wqe(srq, wqe_ctr);
365 } else {
366 wq = &(*cur_qp)->rq;
367 wc->wr_id = wq->wrid[wq->tail & (wq->max - 1)];
368 ++wq->tail;
369 }
370
371 if (unlikely(is_error)) {
372 mlx4_ib_handle_error_cqe((struct mlx4_err_cqe *) cqe, wc);
373 return 0;
374 }
375
376 wc->status = IB_WC_SUCCESS;
377
378 if (is_send) {
379 wc->wc_flags = 0;
380 switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
381 case MLX4_OPCODE_RDMA_WRITE_IMM:
382 wc->wc_flags |= IB_WC_WITH_IMM;
383 case MLX4_OPCODE_RDMA_WRITE:
384 wc->opcode = IB_WC_RDMA_WRITE;
385 break;
386 case MLX4_OPCODE_SEND_IMM:
387 wc->wc_flags |= IB_WC_WITH_IMM;
388 case MLX4_OPCODE_SEND:
389 wc->opcode = IB_WC_SEND;
390 break;
391 case MLX4_OPCODE_RDMA_READ:
392 wc->opcode = IB_WC_SEND;
393 wc->byte_len = be32_to_cpu(cqe->byte_cnt);
394 break;
395 case MLX4_OPCODE_ATOMIC_CS:
396 wc->opcode = IB_WC_COMP_SWAP;
397 wc->byte_len = 8;
398 break;
399 case MLX4_OPCODE_ATOMIC_FA:
400 wc->opcode = IB_WC_FETCH_ADD;
401 wc->byte_len = 8;
402 break;
403 case MLX4_OPCODE_BIND_MW:
404 wc->opcode = IB_WC_BIND_MW;
405 break;
406 }
407 } else {
408 wc->byte_len = be32_to_cpu(cqe->byte_cnt);
409
410 switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
411 case MLX4_RECV_OPCODE_RDMA_WRITE_IMM:
412 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
413 wc->wc_flags = IB_WC_WITH_IMM;
414 wc->imm_data = cqe->immed_rss_invalid;
415 break;
416 case MLX4_RECV_OPCODE_SEND:
417 wc->opcode = IB_WC_RECV;
418 wc->wc_flags = 0;
419 break;
420 case MLX4_RECV_OPCODE_SEND_IMM:
421 wc->opcode = IB_WC_RECV;
422 wc->wc_flags = IB_WC_WITH_IMM;
423 wc->imm_data = cqe->immed_rss_invalid;
424 break;
425 }
426
427 wc->slid = be16_to_cpu(cqe->rlid);
428 wc->sl = cqe->sl >> 4;
429 wc->src_qp = be32_to_cpu(cqe->g_mlpath_rqpn) & 0xffffff;
430 wc->dlid_path_bits = (be32_to_cpu(cqe->g_mlpath_rqpn) >> 24) & 0x7f;
431 wc->wc_flags |= be32_to_cpu(cqe->g_mlpath_rqpn) & 0x80000000 ?
432 IB_WC_GRH : 0;
433 wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) >> 16;
434 }
435
436 return 0;
437}
438
439int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
440{
441 struct mlx4_ib_cq *cq = to_mcq(ibcq);
442 struct mlx4_ib_qp *cur_qp = NULL;
443 unsigned long flags;
444 int npolled;
445 int err = 0;
446
447 spin_lock_irqsave(&cq->lock, flags);
448
449 for (npolled = 0; npolled < num_entries; ++npolled) {
450 err = mlx4_ib_poll_one(cq, &cur_qp, wc + npolled);
451 if (err)
452 break;
453 }
454
455 if (npolled)
456 mlx4_cq_set_ci(&cq->mcq);
457
458 spin_unlock_irqrestore(&cq->lock, flags);
459
460 if (err == 0 || err == -EAGAIN)
461 return npolled;
462 else
463 return err;
464}
465
466int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
467{
468 mlx4_cq_arm(&to_mcq(ibcq)->mcq,
469 (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
470 MLX4_CQ_DB_REQ_NOT_SOL : MLX4_CQ_DB_REQ_NOT,
471 to_mdev(ibcq->device)->uar_map,
472 MLX4_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->uar_lock));
473
474 return 0;
475}
476
477void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
478{
479 u32 prod_index;
480 int nfreed = 0;
481 struct mlx4_cqe *cqe;
482
483 /*
484 * First we need to find the current producer index, so we
485 * know where to start cleaning from. It doesn't matter if HW
486 * adds new entries after this loop -- the QP we're worried
487 * about is already in RESET, so the new entries won't come
488 * from our QP and therefore don't need to be checked.
489 */
490 for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); ++prod_index)
491 if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe)
492 break;
493
494 /*
495 * Now sweep backwards through the CQ, removing CQ entries
496 * that match our QP by copying older entries on top of them.
497 */
498 while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
499 cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
500 if ((be32_to_cpu(cqe->my_qpn) & 0xffffff) == qpn) {
501 if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
502 mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index));
503 ++nfreed;
504 } else if (nfreed)
505 memcpy(get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe),
506 cqe, sizeof *cqe);
507 }
508
509 if (nfreed) {
510 cq->mcq.cons_index += nfreed;
511 /*
512 * Make sure update of buffer contents is done before
513 * updating consumer index.
514 */
515 wmb();
516 mlx4_cq_set_ci(&cq->mcq);
517 }
518}
519
520void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
521{
522 spin_lock_irq(&cq->lock);
523 __mlx4_ib_cq_clean(cq, qpn, srq);
524 spin_unlock_irq(&cq->lock);
525}
diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c
new file mode 100644
index 000000000000..1c36087aef14
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/doorbell.c
@@ -0,0 +1,216 @@
1/*
2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/slab.h>
34
35#include "mlx4_ib.h"
36
37struct mlx4_ib_db_pgdir {
38 struct list_head list;
39 DECLARE_BITMAP(order0, MLX4_IB_DB_PER_PAGE);
40 DECLARE_BITMAP(order1, MLX4_IB_DB_PER_PAGE / 2);
41 unsigned long *bits[2];
42 __be32 *db_page;
43 dma_addr_t db_dma;
44};
45
46static struct mlx4_ib_db_pgdir *mlx4_ib_alloc_db_pgdir(struct mlx4_ib_dev *dev)
47{
48 struct mlx4_ib_db_pgdir *pgdir;
49
50 pgdir = kzalloc(sizeof *pgdir, GFP_KERNEL);
51 if (!pgdir)
52 return NULL;
53
54 bitmap_fill(pgdir->order1, MLX4_IB_DB_PER_PAGE / 2);
55 pgdir->bits[0] = pgdir->order0;
56 pgdir->bits[1] = pgdir->order1;
57 pgdir->db_page = dma_alloc_coherent(dev->ib_dev.dma_device,
58 PAGE_SIZE, &pgdir->db_dma,
59 GFP_KERNEL);
60 if (!pgdir->db_page) {
61 kfree(pgdir);
62 return NULL;
63 }
64
65 return pgdir;
66}
67
68static int mlx4_ib_alloc_db_from_pgdir(struct mlx4_ib_db_pgdir *pgdir,
69 struct mlx4_ib_db *db, int order)
70{
71 int o;
72 int i;
73
74 for (o = order; o <= 1; ++o) {
75 i = find_first_bit(pgdir->bits[o], MLX4_IB_DB_PER_PAGE >> o);
76 if (i < MLX4_IB_DB_PER_PAGE >> o)
77 goto found;
78 }
79
80 return -ENOMEM;
81
82found:
83 clear_bit(i, pgdir->bits[o]);
84
85 i <<= o;
86
87 if (o > order)
88 set_bit(i ^ 1, pgdir->bits[order]);
89
90 db->u.pgdir = pgdir;
91 db->index = i;
92 db->db = pgdir->db_page + db->index;
93 db->dma = pgdir->db_dma + db->index * 4;
94 db->order = order;
95
96 return 0;
97}
98
99int mlx4_ib_db_alloc(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db, int order)
100{
101 struct mlx4_ib_db_pgdir *pgdir;
102 int ret = 0;
103
104 mutex_lock(&dev->pgdir_mutex);
105
106 list_for_each_entry(pgdir, &dev->pgdir_list, list)
107 if (!mlx4_ib_alloc_db_from_pgdir(pgdir, db, order))
108 goto out;
109
110 pgdir = mlx4_ib_alloc_db_pgdir(dev);
111 if (!pgdir) {
112 ret = -ENOMEM;
113 goto out;
114 }
115
116 list_add(&pgdir->list, &dev->pgdir_list);
117
118 /* This should never fail -- we just allocated an empty page: */
119 WARN_ON(mlx4_ib_alloc_db_from_pgdir(pgdir, db, order));
120
121out:
122 mutex_unlock(&dev->pgdir_mutex);
123
124 return ret;
125}
126
127void mlx4_ib_db_free(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db)
128{
129 int o;
130 int i;
131
132 mutex_lock(&dev->pgdir_mutex);
133
134 o = db->order;
135 i = db->index;
136
137 if (db->order == 0 && test_bit(i ^ 1, db->u.pgdir->order0)) {
138 clear_bit(i ^ 1, db->u.pgdir->order0);
139 ++o;
140 }
141
142 i >>= o;
143 set_bit(i, db->u.pgdir->bits[o]);
144
145 if (bitmap_full(db->u.pgdir->order1, MLX4_IB_DB_PER_PAGE / 2)) {
146 dma_free_coherent(dev->ib_dev.dma_device, PAGE_SIZE,
147 db->u.pgdir->db_page, db->u.pgdir->db_dma);
148 list_del(&db->u.pgdir->list);
149 kfree(db->u.pgdir);
150 }
151
152 mutex_unlock(&dev->pgdir_mutex);
153}
154
155struct mlx4_ib_user_db_page {
156 struct list_head list;
157 struct ib_umem *umem;
158 unsigned long user_virt;
159 int refcnt;
160};
161
162int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
163 struct mlx4_ib_db *db)
164{
165 struct mlx4_ib_user_db_page *page;
166 struct ib_umem_chunk *chunk;
167 int err = 0;
168
169 mutex_lock(&context->db_page_mutex);
170
171 list_for_each_entry(page, &context->db_page_list, list)
172 if (page->user_virt == (virt & PAGE_MASK))
173 goto found;
174
175 page = kmalloc(sizeof *page, GFP_KERNEL);
176 if (!page) {
177 err = -ENOMEM;
178 goto out;
179 }
180
181 page->user_virt = (virt & PAGE_MASK);
182 page->refcnt = 0;
183 page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK,
184 PAGE_SIZE, 0);
185 if (IS_ERR(page->umem)) {
186 err = PTR_ERR(page->umem);
187 kfree(page);
188 goto out;
189 }
190
191 list_add(&page->list, &context->db_page_list);
192
193found:
194 chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list);
195 db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK);
196 db->u.user_page = page;
197 ++page->refcnt;
198
199out:
200 mutex_unlock(&context->db_page_mutex);
201
202 return err;
203}
204
205void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_ib_db *db)
206{
207 mutex_lock(&context->db_page_mutex);
208
209 if (!--db->u.user_page->refcnt) {
210 list_del(&db->u.user_page->list);
211 ib_umem_release(db->u.user_page->umem);
212 kfree(db->u.user_page);
213 }
214
215 mutex_unlock(&context->db_page_mutex);
216}
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
new file mode 100644
index 000000000000..333091787c5f
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -0,0 +1,339 @@
1/*
2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <rdma/ib_mad.h>
34#include <rdma/ib_smi.h>
35
36#include <linux/mlx4/cmd.h>
37
38#include "mlx4_ib.h"
39
40enum {
41 MLX4_IB_VENDOR_CLASS1 = 0x9,
42 MLX4_IB_VENDOR_CLASS2 = 0xa
43};
44
45int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
46 int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
47 void *in_mad, void *response_mad)
48{
49 struct mlx4_cmd_mailbox *inmailbox, *outmailbox;
50 void *inbox;
51 int err;
52 u32 in_modifier = port;
53 u8 op_modifier = 0;
54
55 inmailbox = mlx4_alloc_cmd_mailbox(dev->dev);
56 if (IS_ERR(inmailbox))
57 return PTR_ERR(inmailbox);
58 inbox = inmailbox->buf;
59
60 outmailbox = mlx4_alloc_cmd_mailbox(dev->dev);
61 if (IS_ERR(outmailbox)) {
62 mlx4_free_cmd_mailbox(dev->dev, inmailbox);
63 return PTR_ERR(outmailbox);
64 }
65
66 memcpy(inbox, in_mad, 256);
67
68 /*
69 * Key check traps can't be generated unless we have in_wc to
70 * tell us where to send the trap.
71 */
72 if (ignore_mkey || !in_wc)
73 op_modifier |= 0x1;
74 if (ignore_bkey || !in_wc)
75 op_modifier |= 0x2;
76
77 if (in_wc) {
78 struct {
79 __be32 my_qpn;
80 u32 reserved1;
81 __be32 rqpn;
82 u8 sl;
83 u8 g_path;
84 u16 reserved2[2];
85 __be16 pkey;
86 u32 reserved3[11];
87 u8 grh[40];
88 } *ext_info;
89
90 memset(inbox + 256, 0, 256);
91 ext_info = inbox + 256;
92
93 ext_info->my_qpn = cpu_to_be32(in_wc->qp->qp_num);
94 ext_info->rqpn = cpu_to_be32(in_wc->src_qp);
95 ext_info->sl = in_wc->sl << 4;
96 ext_info->g_path = in_wc->dlid_path_bits |
97 (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0);
98 ext_info->pkey = cpu_to_be16(in_wc->pkey_index);
99
100 if (in_grh)
101 memcpy(ext_info->grh, in_grh, 40);
102
103 op_modifier |= 0x4;
104
105 in_modifier |= in_wc->slid << 16;
106 }
107
108 err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma,
109 in_modifier, op_modifier,
110 MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C);
111
112 if (!err);
113 memcpy(response_mad, outmailbox->buf, 256);
114
115 mlx4_free_cmd_mailbox(dev->dev, inmailbox);
116 mlx4_free_cmd_mailbox(dev->dev, outmailbox);
117
118 return err;
119}
120
121static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl)
122{
123 struct ib_ah *new_ah;
124 struct ib_ah_attr ah_attr;
125
126 if (!dev->send_agent[port_num - 1][0])
127 return;
128
129 memset(&ah_attr, 0, sizeof ah_attr);
130 ah_attr.dlid = lid;
131 ah_attr.sl = sl;
132 ah_attr.port_num = port_num;
133
134 new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
135 &ah_attr);
136 if (IS_ERR(new_ah))
137 return;
138
139 spin_lock(&dev->sm_lock);
140 if (dev->sm_ah[port_num - 1])
141 ib_destroy_ah(dev->sm_ah[port_num - 1]);
142 dev->sm_ah[port_num - 1] = new_ah;
143 spin_unlock(&dev->sm_lock);
144}
145
146/*
147 * Snoop SM MADs for port info and P_Key table sets, so we can
148 * synthesize LID change and P_Key change events.
149 */
150static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad)
151{
152 struct ib_event event;
153
154 if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
155 mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
156 mad->mad_hdr.method == IB_MGMT_METHOD_SET) {
157 if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) {
158 struct ib_port_info *pinfo =
159 (struct ib_port_info *) ((struct ib_smp *) mad)->data;
160
161 update_sm_ah(to_mdev(ibdev), port_num,
162 be16_to_cpu(pinfo->sm_lid),
163 pinfo->neighbormtu_mastersmsl & 0xf);
164
165 event.device = ibdev;
166 event.element.port_num = port_num;
167
168 if(pinfo->clientrereg_resv_subnetto & 0x80)
169 event.event = IB_EVENT_CLIENT_REREGISTER;
170 else
171 event.event = IB_EVENT_LID_CHANGE;
172
173 ib_dispatch_event(&event);
174 }
175
176 if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) {
177 event.device = ibdev;
178 event.event = IB_EVENT_PKEY_CHANGE;
179 event.element.port_num = port_num;
180 ib_dispatch_event(&event);
181 }
182 }
183}
184
185static void node_desc_override(struct ib_device *dev,
186 struct ib_mad *mad)
187{
188 if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
189 mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
190 mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP &&
191 mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) {
192 spin_lock(&to_mdev(dev)->sm_lock);
193 memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64);
194 spin_unlock(&to_mdev(dev)->sm_lock);
195 }
196}
197
198static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *mad)
199{
200 int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED;
201 struct ib_mad_send_buf *send_buf;
202 struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn];
203 int ret;
204
205 if (agent) {
206 send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
207 IB_MGMT_MAD_DATA, GFP_ATOMIC);
208 /*
209 * We rely here on the fact that MLX QPs don't use the
210 * address handle after the send is posted (this is
211 * wrong following the IB spec strictly, but we know
212 * it's OK for our devices).
213 */
214 spin_lock(&dev->sm_lock);
215 memcpy(send_buf->mad, mad, sizeof *mad);
216 if ((send_buf->ah = dev->sm_ah[port_num - 1]))
217 ret = ib_post_send_mad(send_buf, NULL);
218 else
219 ret = -EINVAL;
220 spin_unlock(&dev->sm_lock);
221
222 if (ret)
223 ib_free_send_mad(send_buf);
224 }
225}
226
227int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
228 struct ib_wc *in_wc, struct ib_grh *in_grh,
229 struct ib_mad *in_mad, struct ib_mad *out_mad)
230{
231 u16 slid;
232 int err;
233
234 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
235
236 if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) {
237 forward_trap(to_mdev(ibdev), port_num, in_mad);
238 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
239 }
240
241 if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
242 in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
243 if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
244 in_mad->mad_hdr.method != IB_MGMT_METHOD_SET &&
245 in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS)
246 return IB_MAD_RESULT_SUCCESS;
247
248 /*
249 * Don't process SMInfo queries or vendor-specific
250 * MADs -- the SMA can't handle them.
251 */
252 if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO ||
253 ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) ==
254 IB_SMP_ATTR_VENDOR_MASK))
255 return IB_MAD_RESULT_SUCCESS;
256 } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
257 in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 ||
258 in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS2) {
259 if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
260 in_mad->mad_hdr.method != IB_MGMT_METHOD_SET)
261 return IB_MAD_RESULT_SUCCESS;
262 } else
263 return IB_MAD_RESULT_SUCCESS;
264
265 err = mlx4_MAD_IFC(to_mdev(ibdev),
266 mad_flags & IB_MAD_IGNORE_MKEY,
267 mad_flags & IB_MAD_IGNORE_BKEY,
268 port_num, in_wc, in_grh, in_mad, out_mad);
269 if (err)
270 return IB_MAD_RESULT_FAILURE;
271
272 if (!out_mad->mad_hdr.status) {
273 smp_snoop(ibdev, port_num, in_mad);
274 node_desc_override(ibdev, out_mad);
275 }
276
277 /* set return bit in status of directed route responses */
278 if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
279 out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
280
281 if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
282 /* no response for trap repress */
283 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
284
285 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
286}
287
288static void send_handler(struct ib_mad_agent *agent,
289 struct ib_mad_send_wc *mad_send_wc)
290{
291 ib_free_send_mad(mad_send_wc->send_buf);
292}
293
294int mlx4_ib_mad_init(struct mlx4_ib_dev *dev)
295{
296 struct ib_mad_agent *agent;
297 int p, q;
298 int ret;
299
300 for (p = 0; p < dev->dev->caps.num_ports; ++p)
301 for (q = 0; q <= 1; ++q) {
302 agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
303 q ? IB_QPT_GSI : IB_QPT_SMI,
304 NULL, 0, send_handler,
305 NULL, NULL);
306 if (IS_ERR(agent)) {
307 ret = PTR_ERR(agent);
308 goto err;
309 }
310 dev->send_agent[p][q] = agent;
311 }
312
313 return 0;
314
315err:
316 for (p = 0; p < dev->dev->caps.num_ports; ++p)
317 for (q = 0; q <= 1; ++q)
318 if (dev->send_agent[p][q])
319 ib_unregister_mad_agent(dev->send_agent[p][q]);
320
321 return ret;
322}
323
324void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev)
325{
326 struct ib_mad_agent *agent;
327 int p, q;
328
329 for (p = 0; p < dev->dev->caps.num_ports; ++p) {
330 for (q = 0; q <= 1; ++q) {
331 agent = dev->send_agent[p][q];
332 dev->send_agent[p][q] = NULL;
333 ib_unregister_mad_agent(agent);
334 }
335
336 if (dev->sm_ah[p])
337 ib_destroy_ah(dev->sm_ah[p]);
338 }
339}
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
new file mode 100644
index 000000000000..688ecb4c39f3
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -0,0 +1,651 @@
1/*
2 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/module.h>
34#include <linux/init.h>
35#include <linux/errno.h>
36
37#include <rdma/ib_smi.h>
38#include <rdma/ib_user_verbs.h>
39
40#include <linux/mlx4/driver.h>
41#include <linux/mlx4/cmd.h>
42
43#include "mlx4_ib.h"
44#include "user.h"
45
46#define DRV_NAME "mlx4_ib"
47#define DRV_VERSION "0.01"
48#define DRV_RELDATE "May 1, 2006"
49
50MODULE_AUTHOR("Roland Dreier");
51MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
52MODULE_LICENSE("Dual BSD/GPL");
53MODULE_VERSION(DRV_VERSION);
54
55static const char mlx4_ib_version[] __devinitdata =
56 DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
57 DRV_VERSION " (" DRV_RELDATE ")\n";
58
59static void init_query_mad(struct ib_smp *mad)
60{
61 mad->base_version = 1;
62 mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
63 mad->class_version = 1;
64 mad->method = IB_MGMT_METHOD_GET;
65}
66
67static int mlx4_ib_query_device(struct ib_device *ibdev,
68 struct ib_device_attr *props)
69{
70 struct mlx4_ib_dev *dev = to_mdev(ibdev);
71 struct ib_smp *in_mad = NULL;
72 struct ib_smp *out_mad = NULL;
73 int err = -ENOMEM;
74
75 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
76 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
77 if (!in_mad || !out_mad)
78 goto out;
79
80 init_query_mad(in_mad);
81 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
82
83 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad);
84 if (err)
85 goto out;
86
87 memset(props, 0, sizeof *props);
88
89 props->fw_ver = dev->dev->caps.fw_ver;
90 props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
91 IB_DEVICE_PORT_ACTIVE_EVENT |
92 IB_DEVICE_SYS_IMAGE_GUID |
93 IB_DEVICE_RC_RNR_NAK_GEN;
94 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
95 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
96 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
97 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
98 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM)
99 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
100 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT)
101 props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
102
103 props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
104 0xffffff;
105 props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30));
106 props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32));
107 memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
108
109 props->max_mr_size = ~0ull;
110 props->page_size_cap = dev->dev->caps.page_size_cap;
111 props->max_qp = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps;
112 props->max_qp_wr = dev->dev->caps.max_wqes;
113 props->max_sge = min(dev->dev->caps.max_sq_sg,
114 dev->dev->caps.max_rq_sg);
115 props->max_cq = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs;
116 props->max_cqe = dev->dev->caps.max_cqes;
117 props->max_mr = dev->dev->caps.num_mpts - dev->dev->caps.reserved_mrws;
118 props->max_pd = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds;
119 props->max_qp_rd_atom = dev->dev->caps.max_qp_dest_rdma;
120 props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
121 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
122 props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs;
123 props->max_srq_wr = dev->dev->caps.max_srq_wqes;
124 props->max_srq_sge = dev->dev->caps.max_srq_sge;
125 props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay;
126 props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
127 IB_ATOMIC_HCA : IB_ATOMIC_NONE;
128 props->max_pkeys = dev->dev->caps.pkey_table_len;
129 props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
130 props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
131 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
132 props->max_mcast_grp;
133 props->max_map_per_fmr = (1 << (32 - ilog2(dev->dev->caps.num_mpts))) - 1;
134
135out:
136 kfree(in_mad);
137 kfree(out_mad);
138
139 return err;
140}
141
142static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
143 struct ib_port_attr *props)
144{
145 struct ib_smp *in_mad = NULL;
146 struct ib_smp *out_mad = NULL;
147 int err = -ENOMEM;
148
149 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
150 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
151 if (!in_mad || !out_mad)
152 goto out;
153
154 memset(props, 0, sizeof *props);
155
156 init_query_mad(in_mad);
157 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
158 in_mad->attr_mod = cpu_to_be32(port);
159
160 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
161 if (err)
162 goto out;
163
164 props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16));
165 props->lmc = out_mad->data[34] & 0x7;
166 props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18));
167 props->sm_sl = out_mad->data[36] & 0xf;
168 props->state = out_mad->data[32] & 0xf;
169 props->phys_state = out_mad->data[33] >> 4;
170 props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20));
171 props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len;
172 props->max_msg_sz = 0x80000000;
173 props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len;
174 props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
175 props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48));
176 props->active_width = out_mad->data[31] & 0xf;
177 props->active_speed = out_mad->data[35] >> 4;
178 props->max_mtu = out_mad->data[41] & 0xf;
179 props->active_mtu = out_mad->data[36] >> 4;
180 props->subnet_timeout = out_mad->data[51] & 0x1f;
181 props->max_vl_num = out_mad->data[37] >> 4;
182 props->init_type_reply = out_mad->data[41] >> 4;
183
184out:
185 kfree(in_mad);
186 kfree(out_mad);
187
188 return err;
189}
190
191static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
192 union ib_gid *gid)
193{
194 struct ib_smp *in_mad = NULL;
195 struct ib_smp *out_mad = NULL;
196 int err = -ENOMEM;
197
198 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
199 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
200 if (!in_mad || !out_mad)
201 goto out;
202
203 init_query_mad(in_mad);
204 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
205 in_mad->attr_mod = cpu_to_be32(port);
206
207 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
208 if (err)
209 goto out;
210
211 memcpy(gid->raw, out_mad->data + 8, 8);
212
213 init_query_mad(in_mad);
214 in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
215 in_mad->attr_mod = cpu_to_be32(index / 8);
216
217 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
218 if (err)
219 goto out;
220
221 memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
222
223out:
224 kfree(in_mad);
225 kfree(out_mad);
226 return err;
227}
228
229static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
230 u16 *pkey)
231{
232 struct ib_smp *in_mad = NULL;
233 struct ib_smp *out_mad = NULL;
234 int err = -ENOMEM;
235
236 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
237 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
238 if (!in_mad || !out_mad)
239 goto out;
240
241 init_query_mad(in_mad);
242 in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
243 in_mad->attr_mod = cpu_to_be32(index / 32);
244
245 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
246 if (err)
247 goto out;
248
249 *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
250
251out:
252 kfree(in_mad);
253 kfree(out_mad);
254 return err;
255}
256
257static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
258 struct ib_device_modify *props)
259{
260 if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
261 return -EOPNOTSUPP;
262
263 if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
264 spin_lock(&to_mdev(ibdev)->sm_lock);
265 memcpy(ibdev->node_desc, props->node_desc, 64);
266 spin_unlock(&to_mdev(ibdev)->sm_lock);
267 }
268
269 return 0;
270}
271
272static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
273 u32 cap_mask)
274{
275 struct mlx4_cmd_mailbox *mailbox;
276 int err;
277
278 mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
279 if (IS_ERR(mailbox))
280 return PTR_ERR(mailbox);
281
282 memset(mailbox->buf, 0, 256);
283 *(u8 *) mailbox->buf = !!reset_qkey_viols << 6;
284 ((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask);
285
286 err = mlx4_cmd(dev->dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT,
287 MLX4_CMD_TIME_CLASS_B);
288
289 mlx4_free_cmd_mailbox(dev->dev, mailbox);
290 return err;
291}
292
293static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
294 struct ib_port_modify *props)
295{
296 struct ib_port_attr attr;
297 u32 cap_mask;
298 int err;
299
300 mutex_lock(&to_mdev(ibdev)->cap_mask_mutex);
301
302 err = mlx4_ib_query_port(ibdev, port, &attr);
303 if (err)
304 goto out;
305
306 cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
307 ~props->clr_port_cap_mask;
308
309 err = mlx4_SET_PORT(to_mdev(ibdev), port,
310 !!(mask & IB_PORT_RESET_QKEY_CNTR),
311 cap_mask);
312
313out:
314 mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
315 return err;
316}
317
318static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
319 struct ib_udata *udata)
320{
321 struct mlx4_ib_dev *dev = to_mdev(ibdev);
322 struct mlx4_ib_ucontext *context;
323 struct mlx4_ib_alloc_ucontext_resp resp;
324 int err;
325
326 resp.qp_tab_size = dev->dev->caps.num_qps;
327 resp.bf_reg_size = dev->dev->caps.bf_reg_size;
328 resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
329
330 context = kmalloc(sizeof *context, GFP_KERNEL);
331 if (!context)
332 return ERR_PTR(-ENOMEM);
333
334 err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar);
335 if (err) {
336 kfree(context);
337 return ERR_PTR(err);
338 }
339
340 INIT_LIST_HEAD(&context->db_page_list);
341 mutex_init(&context->db_page_mutex);
342
343 err = ib_copy_to_udata(udata, &resp, sizeof resp);
344 if (err) {
345 mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
346 kfree(context);
347 return ERR_PTR(-EFAULT);
348 }
349
350 return &context->ibucontext;
351}
352
353static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
354{
355 struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
356
357 mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar);
358 kfree(context);
359
360 return 0;
361}
362
363static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
364{
365 struct mlx4_ib_dev *dev = to_mdev(context->device);
366
367 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
368 return -EINVAL;
369
370 if (vma->vm_pgoff == 0) {
371 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
372
373 if (io_remap_pfn_range(vma, vma->vm_start,
374 to_mucontext(context)->uar.pfn,
375 PAGE_SIZE, vma->vm_page_prot))
376 return -EAGAIN;
377 } else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) {
378 /* FIXME want pgprot_writecombine() for BlueFlame pages */
379 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
380
381 if (io_remap_pfn_range(vma, vma->vm_start,
382 to_mucontext(context)->uar.pfn +
383 dev->dev->caps.num_uars,
384 PAGE_SIZE, vma->vm_page_prot))
385 return -EAGAIN;
386 } else
387 return -EINVAL;
388
389 return 0;
390}
391
392static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
393 struct ib_ucontext *context,
394 struct ib_udata *udata)
395{
396 struct mlx4_ib_pd *pd;
397 int err;
398
399 pd = kmalloc(sizeof *pd, GFP_KERNEL);
400 if (!pd)
401 return ERR_PTR(-ENOMEM);
402
403 err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn);
404 if (err) {
405 kfree(pd);
406 return ERR_PTR(err);
407 }
408
409 if (context)
410 if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) {
411 mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
412 kfree(pd);
413 return ERR_PTR(-EFAULT);
414 }
415
416 return &pd->ibpd;
417}
418
419static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
420{
421 mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
422 kfree(pd);
423
424 return 0;
425}
426
427static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
428{
429 return mlx4_multicast_attach(to_mdev(ibqp->device)->dev,
430 &to_mqp(ibqp)->mqp, gid->raw);
431}
432
433static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
434{
435 return mlx4_multicast_detach(to_mdev(ibqp->device)->dev,
436 &to_mqp(ibqp)->mqp, gid->raw);
437}
438
439static int init_node_data(struct mlx4_ib_dev *dev)
440{
441 struct ib_smp *in_mad = NULL;
442 struct ib_smp *out_mad = NULL;
443 int err = -ENOMEM;
444
445 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
446 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
447 if (!in_mad || !out_mad)
448 goto out;
449
450 init_query_mad(in_mad);
451 in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
452
453 err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
454 if (err)
455 goto out;
456
457 memcpy(dev->ib_dev.node_desc, out_mad->data, 64);
458
459 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
460
461 err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
462 if (err)
463 goto out;
464
465 memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
466
467out:
468 kfree(in_mad);
469 kfree(out_mad);
470 return err;
471}
472
473static void *mlx4_ib_add(struct mlx4_dev *dev)
474{
475 struct mlx4_ib_dev *ibdev;
476
477 ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
478 if (!ibdev) {
479 dev_err(&dev->pdev->dev, "Device struct alloc failed\n");
480 return NULL;
481 }
482
483 if (mlx4_pd_alloc(dev, &ibdev->priv_pdn))
484 goto err_dealloc;
485
486 if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
487 goto err_pd;
488
489 ibdev->uar_map = ioremap(ibdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
490 if (!ibdev->uar_map)
491 goto err_uar;
492
493 INIT_LIST_HEAD(&ibdev->pgdir_list);
494 mutex_init(&ibdev->pgdir_mutex);
495
496 ibdev->dev = dev;
497
498 strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
499 ibdev->ib_dev.owner = THIS_MODULE;
500 ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
501 ibdev->ib_dev.phys_port_cnt = dev->caps.num_ports;
502 ibdev->ib_dev.num_comp_vectors = 1;
503 ibdev->ib_dev.dma_device = &dev->pdev->dev;
504
505 ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
506 ibdev->ib_dev.uverbs_cmd_mask =
507 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
508 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
509 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
510 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
511 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
512 (1ull << IB_USER_VERBS_CMD_REG_MR) |
513 (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
514 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
515 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
516 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
517 (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
518 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
519 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
520 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
521 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
522 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
523 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
524 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
525
526 ibdev->ib_dev.query_device = mlx4_ib_query_device;
527 ibdev->ib_dev.query_port = mlx4_ib_query_port;
528 ibdev->ib_dev.query_gid = mlx4_ib_query_gid;
529 ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey;
530 ibdev->ib_dev.modify_device = mlx4_ib_modify_device;
531 ibdev->ib_dev.modify_port = mlx4_ib_modify_port;
532 ibdev->ib_dev.alloc_ucontext = mlx4_ib_alloc_ucontext;
533 ibdev->ib_dev.dealloc_ucontext = mlx4_ib_dealloc_ucontext;
534 ibdev->ib_dev.mmap = mlx4_ib_mmap;
535 ibdev->ib_dev.alloc_pd = mlx4_ib_alloc_pd;
536 ibdev->ib_dev.dealloc_pd = mlx4_ib_dealloc_pd;
537 ibdev->ib_dev.create_ah = mlx4_ib_create_ah;
538 ibdev->ib_dev.query_ah = mlx4_ib_query_ah;
539 ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah;
540 ibdev->ib_dev.create_srq = mlx4_ib_create_srq;
541 ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq;
542 ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq;
543 ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv;
544 ibdev->ib_dev.create_qp = mlx4_ib_create_qp;
545 ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp;
546 ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp;
547 ibdev->ib_dev.post_send = mlx4_ib_post_send;
548 ibdev->ib_dev.post_recv = mlx4_ib_post_recv;
549 ibdev->ib_dev.create_cq = mlx4_ib_create_cq;
550 ibdev->ib_dev.destroy_cq = mlx4_ib_destroy_cq;
551 ibdev->ib_dev.poll_cq = mlx4_ib_poll_cq;
552 ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq;
553 ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr;
554 ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr;
555 ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr;
556 ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach;
557 ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
558 ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
559
560 if (init_node_data(ibdev))
561 goto err_map;
562
563 spin_lock_init(&ibdev->sm_lock);
564 mutex_init(&ibdev->cap_mask_mutex);
565
566 if (ib_register_device(&ibdev->ib_dev))
567 goto err_map;
568
569 if (mlx4_ib_mad_init(ibdev))
570 goto err_reg;
571
572 return ibdev;
573
574err_reg:
575 ib_unregister_device(&ibdev->ib_dev);
576
577err_map:
578 iounmap(ibdev->uar_map);
579
580err_uar:
581 mlx4_uar_free(dev, &ibdev->priv_uar);
582
583err_pd:
584 mlx4_pd_free(dev, ibdev->priv_pdn);
585
586err_dealloc:
587 ib_dealloc_device(&ibdev->ib_dev);
588
589 return NULL;
590}
591
592static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
593{
594 struct mlx4_ib_dev *ibdev = ibdev_ptr;
595 int p;
596
597 for (p = 1; p <= dev->caps.num_ports; ++p)
598 mlx4_CLOSE_PORT(dev, p);
599
600 mlx4_ib_mad_cleanup(ibdev);
601 ib_unregister_device(&ibdev->ib_dev);
602 iounmap(ibdev->uar_map);
603 mlx4_uar_free(dev, &ibdev->priv_uar);
604 mlx4_pd_free(dev, ibdev->priv_pdn);
605 ib_dealloc_device(&ibdev->ib_dev);
606}
607
608static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
609 enum mlx4_dev_event event, int subtype,
610 int port)
611{
612 struct ib_event ibev;
613
614 switch (event) {
615 case MLX4_EVENT_TYPE_PORT_CHANGE:
616 ibev.event = subtype == MLX4_PORT_CHANGE_SUBTYPE_ACTIVE ?
617 IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
618 break;
619
620 case MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR:
621 ibev.event = IB_EVENT_DEVICE_FATAL;
622 break;
623
624 default:
625 return;
626 }
627
628 ibev.device = ibdev_ptr;
629 ibev.element.port_num = port;
630
631 ib_dispatch_event(&ibev);
632}
633
634static struct mlx4_interface mlx4_ib_interface = {
635 .add = mlx4_ib_add,
636 .remove = mlx4_ib_remove,
637 .event = mlx4_ib_event
638};
639
640static int __init mlx4_ib_init(void)
641{
642 return mlx4_register_interface(&mlx4_ib_interface);
643}
644
645static void __exit mlx4_ib_cleanup(void)
646{
647 mlx4_unregister_interface(&mlx4_ib_interface);
648}
649
650module_init(mlx4_ib_init);
651module_exit(mlx4_ib_cleanup);
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
new file mode 100644
index 000000000000..93dac71f3230
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -0,0 +1,285 @@
1/*
2 * Copyright (c) 2006, 2007 Cisco Systems. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef MLX4_IB_H
34#define MLX4_IB_H
35
36#include <linux/compiler.h>
37#include <linux/list.h>
38
39#include <rdma/ib_verbs.h>
40#include <rdma/ib_umem.h>
41
42#include <linux/mlx4/device.h>
43#include <linux/mlx4/doorbell.h>
44
45enum {
46 MLX4_IB_DB_PER_PAGE = PAGE_SIZE / 4
47};
48
49struct mlx4_ib_db_pgdir;
50struct mlx4_ib_user_db_page;
51
52struct mlx4_ib_db {
53 __be32 *db;
54 union {
55 struct mlx4_ib_db_pgdir *pgdir;
56 struct mlx4_ib_user_db_page *user_page;
57 } u;
58 dma_addr_t dma;
59 int index;
60 int order;
61};
62
63struct mlx4_ib_ucontext {
64 struct ib_ucontext ibucontext;
65 struct mlx4_uar uar;
66 struct list_head db_page_list;
67 struct mutex db_page_mutex;
68};
69
70struct mlx4_ib_pd {
71 struct ib_pd ibpd;
72 u32 pdn;
73};
74
75struct mlx4_ib_cq_buf {
76 struct mlx4_buf buf;
77 struct mlx4_mtt mtt;
78};
79
80struct mlx4_ib_cq {
81 struct ib_cq ibcq;
82 struct mlx4_cq mcq;
83 struct mlx4_ib_cq_buf buf;
84 struct mlx4_ib_db db;
85 spinlock_t lock;
86 struct ib_umem *umem;
87};
88
89struct mlx4_ib_mr {
90 struct ib_mr ibmr;
91 struct mlx4_mr mmr;
92 struct ib_umem *umem;
93};
94
95struct mlx4_ib_wq {
96 u64 *wrid;
97 spinlock_t lock;
98 int max;
99 int max_gs;
100 int offset;
101 int wqe_shift;
102 unsigned head;
103 unsigned tail;
104};
105
106struct mlx4_ib_qp {
107 struct ib_qp ibqp;
108 struct mlx4_qp mqp;
109 struct mlx4_buf buf;
110
111 struct mlx4_ib_db db;
112 struct mlx4_ib_wq rq;
113
114 u32 doorbell_qpn;
115 __be32 sq_signal_bits;
116 struct mlx4_ib_wq sq;
117
118 struct ib_umem *umem;
119 struct mlx4_mtt mtt;
120 int buf_size;
121 struct mutex mutex;
122 u8 port;
123 u8 alt_port;
124 u8 atomic_rd_en;
125 u8 resp_depth;
126 u8 state;
127};
128
129struct mlx4_ib_srq {
130 struct ib_srq ibsrq;
131 struct mlx4_srq msrq;
132 struct mlx4_buf buf;
133 struct mlx4_ib_db db;
134 u64 *wrid;
135 spinlock_t lock;
136 int head;
137 int tail;
138 u16 wqe_ctr;
139 struct ib_umem *umem;
140 struct mlx4_mtt mtt;
141 struct mutex mutex;
142};
143
144struct mlx4_ib_ah {
145 struct ib_ah ibah;
146 struct mlx4_av av;
147};
148
149struct mlx4_ib_dev {
150 struct ib_device ib_dev;
151 struct mlx4_dev *dev;
152 void __iomem *uar_map;
153
154 struct list_head pgdir_list;
155 struct mutex pgdir_mutex;
156
157 struct mlx4_uar priv_uar;
158 u32 priv_pdn;
159 MLX4_DECLARE_DOORBELL_LOCK(uar_lock);
160
161 struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2];
162 struct ib_ah *sm_ah[MLX4_MAX_PORTS];
163 spinlock_t sm_lock;
164
165 struct mutex cap_mask_mutex;
166};
167
168static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
169{
170 return container_of(ibdev, struct mlx4_ib_dev, ib_dev);
171}
172
173static inline struct mlx4_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
174{
175 return container_of(ibucontext, struct mlx4_ib_ucontext, ibucontext);
176}
177
178static inline struct mlx4_ib_pd *to_mpd(struct ib_pd *ibpd)
179{
180 return container_of(ibpd, struct mlx4_ib_pd, ibpd);
181}
182
183static inline struct mlx4_ib_cq *to_mcq(struct ib_cq *ibcq)
184{
185 return container_of(ibcq, struct mlx4_ib_cq, ibcq);
186}
187
188static inline struct mlx4_ib_cq *to_mibcq(struct mlx4_cq *mcq)
189{
190 return container_of(mcq, struct mlx4_ib_cq, mcq);
191}
192
193static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr)
194{
195 return container_of(ibmr, struct mlx4_ib_mr, ibmr);
196}
197
198static inline struct mlx4_ib_qp *to_mqp(struct ib_qp *ibqp)
199{
200 return container_of(ibqp, struct mlx4_ib_qp, ibqp);
201}
202
203static inline struct mlx4_ib_qp *to_mibqp(struct mlx4_qp *mqp)
204{
205 return container_of(mqp, struct mlx4_ib_qp, mqp);
206}
207
208static inline struct mlx4_ib_srq *to_msrq(struct ib_srq *ibsrq)
209{
210 return container_of(ibsrq, struct mlx4_ib_srq, ibsrq);
211}
212
213static inline struct mlx4_ib_srq *to_mibsrq(struct mlx4_srq *msrq)
214{
215 return container_of(msrq, struct mlx4_ib_srq, msrq);
216}
217
218static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah)
219{
220 return container_of(ibah, struct mlx4_ib_ah, ibah);
221}
222
223int mlx4_ib_db_alloc(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db, int order);
224void mlx4_ib_db_free(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db);
225int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
226 struct mlx4_ib_db *db);
227void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_ib_db *db);
228
229struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc);
230int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
231 struct ib_umem *umem);
232struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
233 u64 virt_addr, int access_flags,
234 struct ib_udata *udata);
235int mlx4_ib_dereg_mr(struct ib_mr *mr);
236
237struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector,
238 struct ib_ucontext *context,
239 struct ib_udata *udata);
240int mlx4_ib_destroy_cq(struct ib_cq *cq);
241int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
242int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
243void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
244void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
245
246struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
247int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
248int mlx4_ib_destroy_ah(struct ib_ah *ah);
249
250struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
251 struct ib_srq_init_attr *init_attr,
252 struct ib_udata *udata);
253int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
254 enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
255int mlx4_ib_destroy_srq(struct ib_srq *srq);
256void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index);
257int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
258 struct ib_recv_wr **bad_wr);
259
260struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
261 struct ib_qp_init_attr *init_attr,
262 struct ib_udata *udata);
263int mlx4_ib_destroy_qp(struct ib_qp *qp);
264int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
265 int attr_mask, struct ib_udata *udata);
266int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
267 struct ib_send_wr **bad_wr);
268int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
269 struct ib_recv_wr **bad_wr);
270
271int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
272 int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
273 void *in_mad, void *response_mad);
274int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
275 struct ib_wc *in_wc, struct ib_grh *in_grh,
276 struct ib_mad *in_mad, struct ib_mad *out_mad);
277int mlx4_ib_mad_init(struct mlx4_ib_dev *dev);
278void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev);
279
280static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
281{
282 return !!(ah->av.g_slid & 0x80);
283}
284
285#endif /* MLX4_IB_H */
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
new file mode 100644
index 000000000000..85ae906f1d12
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -0,0 +1,184 @@
1/*
2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include "mlx4_ib.h"
34
35static u32 convert_access(int acc)
36{
37 return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX4_PERM_ATOMIC : 0) |
38 (acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) |
39 (acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) |
40 (acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) |
41 MLX4_PERM_LOCAL_READ;
42}
43
44struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
45{
46 struct mlx4_ib_mr *mr;
47 int err;
48
49 mr = kmalloc(sizeof *mr, GFP_KERNEL);
50 if (!mr)
51 return ERR_PTR(-ENOMEM);
52
53 err = mlx4_mr_alloc(to_mdev(pd->device)->dev, to_mpd(pd)->pdn, 0,
54 ~0ull, convert_access(acc), 0, 0, &mr->mmr);
55 if (err)
56 goto err_free;
57
58 err = mlx4_mr_enable(to_mdev(pd->device)->dev, &mr->mmr);
59 if (err)
60 goto err_mr;
61
62 mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
63 mr->umem = NULL;
64
65 return &mr->ibmr;
66
67err_mr:
68 mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
69
70err_free:
71 kfree(mr);
72
73 return ERR_PTR(err);
74}
75
76int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
77 struct ib_umem *umem)
78{
79 u64 *pages;
80 struct ib_umem_chunk *chunk;
81 int i, j, k;
82 int n;
83 int len;
84 int err = 0;
85
86 pages = (u64 *) __get_free_page(GFP_KERNEL);
87 if (!pages)
88 return -ENOMEM;
89
90 i = n = 0;
91
92 list_for_each_entry(chunk, &umem->chunk_list, list)
93 for (j = 0; j < chunk->nmap; ++j) {
94 len = sg_dma_len(&chunk->page_list[j]) >> mtt->page_shift;
95 for (k = 0; k < len; ++k) {
96 pages[i++] = sg_dma_address(&chunk->page_list[j]) +
97 umem->page_size * k;
98 /*
99 * Be friendly to WRITE_MTT firmware
100 * command, and pass it chunks of
101 * appropriate size.
102 */
103 if (i == PAGE_SIZE / sizeof (u64) - 2) {
104 err = mlx4_write_mtt(dev->dev, mtt, n,
105 i, pages);
106 if (err)
107 goto out;
108 n += i;
109 i = 0;
110 }
111 }
112 }
113
114 if (i)
115 err = mlx4_write_mtt(dev->dev, mtt, n, i, pages);
116
117out:
118 free_page((unsigned long) pages);
119 return err;
120}
121
122struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
123 u64 virt_addr, int access_flags,
124 struct ib_udata *udata)
125{
126 struct mlx4_ib_dev *dev = to_mdev(pd->device);
127 struct mlx4_ib_mr *mr;
128 int shift;
129 int err;
130 int n;
131
132 mr = kmalloc(sizeof *mr, GFP_KERNEL);
133 if (!mr)
134 return ERR_PTR(-ENOMEM);
135
136 mr->umem = ib_umem_get(pd->uobject->context, start, length, access_flags);
137 if (IS_ERR(mr->umem)) {
138 err = PTR_ERR(mr->umem);
139 goto err_free;
140 }
141
142 n = ib_umem_page_count(mr->umem);
143 shift = ilog2(mr->umem->page_size);
144
145 err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
146 convert_access(access_flags), n, shift, &mr->mmr);
147 if (err)
148 goto err_umem;
149
150 err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem);
151 if (err)
152 goto err_mr;
153
154 err = mlx4_mr_enable(dev->dev, &mr->mmr);
155 if (err)
156 goto err_mr;
157
158 mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
159
160 return &mr->ibmr;
161
162err_mr:
163 mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
164
165err_umem:
166 ib_umem_release(mr->umem);
167
168err_free:
169 kfree(mr);
170
171 return ERR_PTR(err);
172}
173
174int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
175{
176 struct mlx4_ib_mr *mr = to_mmr(ibmr);
177
178 mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
179 if (mr->umem)
180 ib_umem_release(mr->umem);
181 kfree(mr);
182
183 return 0;
184}
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
new file mode 100644
index 000000000000..5cd706908450
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -0,0 +1,1294 @@
1/*
2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <rdma/ib_cache.h>
34#include <rdma/ib_pack.h>
35
36#include <linux/mlx4/qp.h>
37
38#include "mlx4_ib.h"
39#include "user.h"
40
41enum {
42 MLX4_IB_ACK_REQ_FREQ = 8,
43};
44
45enum {
46 MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83,
47 MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f
48};
49
50enum {
51 /*
52 * Largest possible UD header: send with GRH and immediate data.
53 */
54 MLX4_IB_UD_HEADER_SIZE = 72
55};
56
57struct mlx4_ib_sqp {
58 struct mlx4_ib_qp qp;
59 int pkey_index;
60 u32 qkey;
61 u32 send_psn;
62 struct ib_ud_header ud_header;
63 u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
64};
65
66static const __be32 mlx4_ib_opcode[] = {
67 [IB_WR_SEND] = __constant_cpu_to_be32(MLX4_OPCODE_SEND),
68 [IB_WR_SEND_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM),
69 [IB_WR_RDMA_WRITE] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
70 [IB_WR_RDMA_WRITE_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
71 [IB_WR_RDMA_READ] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ),
72 [IB_WR_ATOMIC_CMP_AND_SWP] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
73 [IB_WR_ATOMIC_FETCH_AND_ADD] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
74};
75
76static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
77{
78 return container_of(mqp, struct mlx4_ib_sqp, qp);
79}
80
81static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
82{
83 return qp->mqp.qpn >= dev->dev->caps.sqp_start &&
84 qp->mqp.qpn <= dev->dev->caps.sqp_start + 3;
85}
86
87static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
88{
89 return qp->mqp.qpn >= dev->dev->caps.sqp_start &&
90 qp->mqp.qpn <= dev->dev->caps.sqp_start + 1;
91}
92
93static void *get_wqe(struct mlx4_ib_qp *qp, int offset)
94{
95 if (qp->buf.nbufs == 1)
96 return qp->buf.u.direct.buf + offset;
97 else
98 return qp->buf.u.page_list[offset >> PAGE_SHIFT].buf +
99 (offset & (PAGE_SIZE - 1));
100}
101
102static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n)
103{
104 return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
105}
106
107static void *get_send_wqe(struct mlx4_ib_qp *qp, int n)
108{
109 return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift));
110}
111
112static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
113{
114 struct ib_event event;
115 struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
116
117 if (type == MLX4_EVENT_TYPE_PATH_MIG)
118 to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
119
120 if (ibqp->event_handler) {
121 event.device = ibqp->device;
122 event.element.qp = ibqp;
123 switch (type) {
124 case MLX4_EVENT_TYPE_PATH_MIG:
125 event.event = IB_EVENT_PATH_MIG;
126 break;
127 case MLX4_EVENT_TYPE_COMM_EST:
128 event.event = IB_EVENT_COMM_EST;
129 break;
130 case MLX4_EVENT_TYPE_SQ_DRAINED:
131 event.event = IB_EVENT_SQ_DRAINED;
132 break;
133 case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:
134 event.event = IB_EVENT_QP_LAST_WQE_REACHED;
135 break;
136 case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:
137 event.event = IB_EVENT_QP_FATAL;
138 break;
139 case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
140 event.event = IB_EVENT_PATH_MIG_ERR;
141 break;
142 case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
143 event.event = IB_EVENT_QP_REQ_ERR;
144 break;
145 case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
146 event.event = IB_EVENT_QP_ACCESS_ERR;
147 break;
148 default:
149 printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
150 "on QP %06x\n", type, qp->qpn);
151 return;
152 }
153
154 ibqp->event_handler(&event, ibqp->qp_context);
155 }
156}
157
158static int send_wqe_overhead(enum ib_qp_type type)
159{
160 /*
161 * UD WQEs must have a datagram segment.
162 * RC and UC WQEs might have a remote address segment.
163 * MLX WQEs need two extra inline data segments (for the UD
164 * header and space for the ICRC).
165 */
166 switch (type) {
167 case IB_QPT_UD:
168 return sizeof (struct mlx4_wqe_ctrl_seg) +
169 sizeof (struct mlx4_wqe_datagram_seg);
170 case IB_QPT_UC:
171 return sizeof (struct mlx4_wqe_ctrl_seg) +
172 sizeof (struct mlx4_wqe_raddr_seg);
173 case IB_QPT_RC:
174 return sizeof (struct mlx4_wqe_ctrl_seg) +
175 sizeof (struct mlx4_wqe_atomic_seg) +
176 sizeof (struct mlx4_wqe_raddr_seg);
177 case IB_QPT_SMI:
178 case IB_QPT_GSI:
179 return sizeof (struct mlx4_wqe_ctrl_seg) +
180 ALIGN(MLX4_IB_UD_HEADER_SIZE +
181 sizeof (struct mlx4_wqe_inline_seg),
182 sizeof (struct mlx4_wqe_data_seg)) +
183 ALIGN(4 +
184 sizeof (struct mlx4_wqe_inline_seg),
185 sizeof (struct mlx4_wqe_data_seg));
186 default:
187 return sizeof (struct mlx4_wqe_ctrl_seg);
188 }
189}
190
191static int set_qp_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
192 enum ib_qp_type type, struct mlx4_ib_qp *qp)
193{
194 /* Sanity check QP size before proceeding */
195 if (cap->max_send_wr > dev->dev->caps.max_wqes ||
196 cap->max_recv_wr > dev->dev->caps.max_wqes ||
197 cap->max_send_sge > dev->dev->caps.max_sq_sg ||
198 cap->max_recv_sge > dev->dev->caps.max_rq_sg ||
199 cap->max_inline_data + send_wqe_overhead(type) +
200 sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)
201 return -EINVAL;
202
203 /*
204 * For MLX transport we need 2 extra S/G entries:
205 * one for the header and one for the checksum at the end
206 */
207 if ((type == IB_QPT_SMI || type == IB_QPT_GSI) &&
208 cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
209 return -EINVAL;
210
211 qp->rq.max = cap->max_recv_wr ? roundup_pow_of_two(cap->max_recv_wr) : 0;
212 qp->sq.max = cap->max_send_wr ? roundup_pow_of_two(cap->max_send_wr) : 0;
213
214 qp->rq.wqe_shift = ilog2(roundup_pow_of_two(cap->max_recv_sge *
215 sizeof (struct mlx4_wqe_data_seg)));
216 qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof (struct mlx4_wqe_data_seg);
217
218 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge *
219 sizeof (struct mlx4_wqe_data_seg),
220 cap->max_inline_data +
221 sizeof (struct mlx4_wqe_inline_seg)) +
222 send_wqe_overhead(type)));
223 qp->sq.max_gs = ((1 << qp->sq.wqe_shift) - send_wqe_overhead(type)) /
224 sizeof (struct mlx4_wqe_data_seg);
225
226 qp->buf_size = (qp->rq.max << qp->rq.wqe_shift) +
227 (qp->sq.max << qp->sq.wqe_shift);
228 if (qp->rq.wqe_shift > qp->sq.wqe_shift) {
229 qp->rq.offset = 0;
230 qp->sq.offset = qp->rq.max << qp->rq.wqe_shift;
231 } else {
232 qp->rq.offset = qp->sq.max << qp->sq.wqe_shift;
233 qp->sq.offset = 0;
234 }
235
236 cap->max_send_wr = qp->sq.max;
237 cap->max_recv_wr = qp->rq.max;
238 cap->max_send_sge = qp->sq.max_gs;
239 cap->max_recv_sge = qp->rq.max_gs;
240 cap->max_inline_data = (1 << qp->sq.wqe_shift) - send_wqe_overhead(type) -
241 sizeof (struct mlx4_wqe_inline_seg);
242
243 return 0;
244}
245
246static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
247 struct ib_qp_init_attr *init_attr,
248 struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp)
249{
250 struct mlx4_wqe_ctrl_seg *ctrl;
251 int err;
252 int i;
253
254 mutex_init(&qp->mutex);
255 spin_lock_init(&qp->sq.lock);
256 spin_lock_init(&qp->rq.lock);
257
258 qp->state = IB_QPS_RESET;
259 qp->atomic_rd_en = 0;
260 qp->resp_depth = 0;
261
262 qp->rq.head = 0;
263 qp->rq.tail = 0;
264 qp->sq.head = 0;
265 qp->sq.tail = 0;
266
267 err = set_qp_size(dev, &init_attr->cap, init_attr->qp_type, qp);
268 if (err)
269 goto err;
270
271 if (pd->uobject) {
272 struct mlx4_ib_create_qp ucmd;
273
274 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
275 err = -EFAULT;
276 goto err;
277 }
278
279 qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
280 qp->buf_size, 0);
281 if (IS_ERR(qp->umem)) {
282 err = PTR_ERR(qp->umem);
283 goto err;
284 }
285
286 err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem),
287 ilog2(qp->umem->page_size), &qp->mtt);
288 if (err)
289 goto err_buf;
290
291 err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem);
292 if (err)
293 goto err_mtt;
294
295 err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
296 ucmd.db_addr, &qp->db);
297 if (err)
298 goto err_mtt;
299 } else {
300 err = mlx4_ib_db_alloc(dev, &qp->db, 0);
301 if (err)
302 goto err;
303
304 *qp->db.db = 0;
305
306 if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) {
307 err = -ENOMEM;
308 goto err_db;
309 }
310
311 err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,
312 &qp->mtt);
313 if (err)
314 goto err_buf;
315
316 err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);
317 if (err)
318 goto err_mtt;
319
320 for (i = 0; i < qp->sq.max; ++i) {
321 ctrl = get_send_wqe(qp, i);
322 ctrl->owner_opcode = cpu_to_be32(1 << 31);
323 }
324
325 qp->sq.wrid = kmalloc(qp->sq.max * sizeof (u64), GFP_KERNEL);
326 qp->rq.wrid = kmalloc(qp->rq.max * sizeof (u64), GFP_KERNEL);
327
328 if (!qp->sq.wrid || !qp->rq.wrid) {
329 err = -ENOMEM;
330 goto err_wrid;
331 }
332
333 /* We don't support inline sends for kernel QPs (yet) */
334 init_attr->cap.max_inline_data = 0;
335 }
336
337 err = mlx4_qp_alloc(dev->dev, sqpn, &qp->mqp);
338 if (err)
339 goto err_wrid;
340
341 /*
342 * Hardware wants QPN written in big-endian order (after
343 * shifting) for send doorbell. Precompute this value to save
344 * a little bit when posting sends.
345 */
346 qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
347
348 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
349 qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
350 else
351 qp->sq_signal_bits = 0;
352
353 qp->mqp.event = mlx4_ib_qp_event;
354
355 return 0;
356
357err_wrid:
358 if (pd->uobject)
359 mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
360 else {
361 kfree(qp->sq.wrid);
362 kfree(qp->rq.wrid);
363 }
364
365err_mtt:
366 mlx4_mtt_cleanup(dev->dev, &qp->mtt);
367
368err_buf:
369 if (pd->uobject)
370 ib_umem_release(qp->umem);
371 else
372 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
373
374err_db:
375 if (!pd->uobject)
376 mlx4_ib_db_free(dev, &qp->db);
377
378err:
379 return err;
380}
381
382static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state)
383{
384 switch (state) {
385 case IB_QPS_RESET: return MLX4_QP_STATE_RST;
386 case IB_QPS_INIT: return MLX4_QP_STATE_INIT;
387 case IB_QPS_RTR: return MLX4_QP_STATE_RTR;
388 case IB_QPS_RTS: return MLX4_QP_STATE_RTS;
389 case IB_QPS_SQD: return MLX4_QP_STATE_SQD;
390 case IB_QPS_SQE: return MLX4_QP_STATE_SQER;
391 case IB_QPS_ERR: return MLX4_QP_STATE_ERR;
392 default: return -1;
393 }
394}
395
396static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
397{
398 if (send_cq == recv_cq)
399 spin_lock_irq(&send_cq->lock);
400 else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
401 spin_lock_irq(&send_cq->lock);
402 spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
403 } else {
404 spin_lock_irq(&recv_cq->lock);
405 spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
406 }
407}
408
409static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
410{
411 if (send_cq == recv_cq)
412 spin_unlock_irq(&send_cq->lock);
413 else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
414 spin_unlock(&recv_cq->lock);
415 spin_unlock_irq(&send_cq->lock);
416 } else {
417 spin_unlock(&send_cq->lock);
418 spin_unlock_irq(&recv_cq->lock);
419 }
420}
421
422static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
423 int is_user)
424{
425 struct mlx4_ib_cq *send_cq, *recv_cq;
426
427 if (qp->state != IB_QPS_RESET)
428 if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state),
429 MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp))
430 printk(KERN_WARNING "mlx4_ib: modify QP %06x to RESET failed.\n",
431 qp->mqp.qpn);
432
433 send_cq = to_mcq(qp->ibqp.send_cq);
434 recv_cq = to_mcq(qp->ibqp.recv_cq);
435
436 mlx4_ib_lock_cqs(send_cq, recv_cq);
437
438 if (!is_user) {
439 __mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
440 qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL);
441 if (send_cq != recv_cq)
442 __mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
443 }
444
445 mlx4_qp_remove(dev->dev, &qp->mqp);
446
447 mlx4_ib_unlock_cqs(send_cq, recv_cq);
448
449 mlx4_qp_free(dev->dev, &qp->mqp);
450 mlx4_mtt_cleanup(dev->dev, &qp->mtt);
451
452 if (is_user) {
453 mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context),
454 &qp->db);
455 ib_umem_release(qp->umem);
456 } else {
457 kfree(qp->sq.wrid);
458 kfree(qp->rq.wrid);
459 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
460 mlx4_ib_db_free(dev, &qp->db);
461 }
462}
463
464struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
465 struct ib_qp_init_attr *init_attr,
466 struct ib_udata *udata)
467{
468 struct mlx4_ib_dev *dev = to_mdev(pd->device);
469 struct mlx4_ib_sqp *sqp;
470 struct mlx4_ib_qp *qp;
471 int err;
472
473 switch (init_attr->qp_type) {
474 case IB_QPT_RC:
475 case IB_QPT_UC:
476 case IB_QPT_UD:
477 {
478 qp = kmalloc(sizeof *qp, GFP_KERNEL);
479 if (!qp)
480 return ERR_PTR(-ENOMEM);
481
482 err = create_qp_common(dev, pd, init_attr, udata, 0, qp);
483 if (err) {
484 kfree(qp);
485 return ERR_PTR(err);
486 }
487
488 qp->ibqp.qp_num = qp->mqp.qpn;
489
490 break;
491 }
492 case IB_QPT_SMI:
493 case IB_QPT_GSI:
494 {
495 /* Userspace is not allowed to create special QPs: */
496 if (pd->uobject)
497 return ERR_PTR(-EINVAL);
498
499 sqp = kmalloc(sizeof *sqp, GFP_KERNEL);
500 if (!sqp)
501 return ERR_PTR(-ENOMEM);
502
503 qp = &sqp->qp;
504
505 err = create_qp_common(dev, pd, init_attr, udata,
506 dev->dev->caps.sqp_start +
507 (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) +
508 init_attr->port_num - 1,
509 qp);
510 if (err) {
511 kfree(sqp);
512 return ERR_PTR(err);
513 }
514
515 qp->port = init_attr->port_num;
516 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
517
518 break;
519 }
520 default:
521 /* Don't support raw QPs */
522 return ERR_PTR(-EINVAL);
523 }
524
525 return &qp->ibqp;
526}
527
528int mlx4_ib_destroy_qp(struct ib_qp *qp)
529{
530 struct mlx4_ib_dev *dev = to_mdev(qp->device);
531 struct mlx4_ib_qp *mqp = to_mqp(qp);
532
533 if (is_qp0(dev, mqp))
534 mlx4_CLOSE_PORT(dev->dev, mqp->port);
535
536 destroy_qp_common(dev, mqp, !!qp->pd->uobject);
537
538 if (is_sqp(dev, mqp))
539 kfree(to_msqp(mqp));
540 else
541 kfree(mqp);
542
543 return 0;
544}
545
546static void init_port(struct mlx4_ib_dev *dev, int port)
547{
548 struct mlx4_init_port_param param;
549 int err;
550
551 memset(&param, 0, sizeof param);
552
553 param.port_width_cap = dev->dev->caps.port_width_cap;
554 param.vl_cap = dev->dev->caps.vl_cap;
555 param.mtu = ib_mtu_enum_to_int(dev->dev->caps.mtu_cap);
556 param.max_gid = dev->dev->caps.gid_table_len;
557 param.max_pkey = dev->dev->caps.pkey_table_len;
558
559 err = mlx4_INIT_PORT(dev->dev, &param, port);
560 if (err)
561 printk(KERN_WARNING "INIT_PORT failed, return code %d.\n", err);
562}
563
564static int to_mlx4_st(enum ib_qp_type type)
565{
566 switch (type) {
567 case IB_QPT_RC: return MLX4_QP_ST_RC;
568 case IB_QPT_UC: return MLX4_QP_ST_UC;
569 case IB_QPT_UD: return MLX4_QP_ST_UD;
570 case IB_QPT_SMI:
571 case IB_QPT_GSI: return MLX4_QP_ST_MLX;
572 default: return -1;
573 }
574}
575
576static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, struct ib_qp_attr *attr,
577 int attr_mask)
578{
579 u8 dest_rd_atomic;
580 u32 access_flags;
581 u32 hw_access_flags = 0;
582
583 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
584 dest_rd_atomic = attr->max_dest_rd_atomic;
585 else
586 dest_rd_atomic = qp->resp_depth;
587
588 if (attr_mask & IB_QP_ACCESS_FLAGS)
589 access_flags = attr->qp_access_flags;
590 else
591 access_flags = qp->atomic_rd_en;
592
593 if (!dest_rd_atomic)
594 access_flags &= IB_ACCESS_REMOTE_WRITE;
595
596 if (access_flags & IB_ACCESS_REMOTE_READ)
597 hw_access_flags |= MLX4_QP_BIT_RRE;
598 if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
599 hw_access_flags |= MLX4_QP_BIT_RAE;
600 if (access_flags & IB_ACCESS_REMOTE_WRITE)
601 hw_access_flags |= MLX4_QP_BIT_RWE;
602
603 return cpu_to_be32(hw_access_flags);
604}
605
606static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, struct ib_qp_attr *attr,
607 int attr_mask)
608{
609 if (attr_mask & IB_QP_PKEY_INDEX)
610 sqp->pkey_index = attr->pkey_index;
611 if (attr_mask & IB_QP_QKEY)
612 sqp->qkey = attr->qkey;
613 if (attr_mask & IB_QP_SQ_PSN)
614 sqp->send_psn = attr->sq_psn;
615}
616
617static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
618{
619 path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);
620}
621
622static int mlx4_set_path(struct mlx4_ib_dev *dev, struct ib_ah_attr *ah,
623 struct mlx4_qp_path *path, u8 port)
624{
625 path->grh_mylmc = ah->src_path_bits & 0x7f;
626 path->rlid = cpu_to_be16(ah->dlid);
627 if (ah->static_rate) {
628 path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET;
629 while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
630 !(1 << path->static_rate & dev->dev->caps.stat_rate_support))
631 --path->static_rate;
632 } else
633 path->static_rate = 0;
634 path->counter_index = 0xff;
635
636 if (ah->ah_flags & IB_AH_GRH) {
637 if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len) {
638 printk(KERN_ERR "sgid_index (%u) too large. max is %d\n",
639 ah->grh.sgid_index, dev->dev->caps.gid_table_len - 1);
640 return -1;
641 }
642
643 path->grh_mylmc |= 1 << 7;
644 path->mgid_index = ah->grh.sgid_index;
645 path->hop_limit = ah->grh.hop_limit;
646 path->tclass_flowlabel =
647 cpu_to_be32((ah->grh.traffic_class << 20) |
648 (ah->grh.flow_label));
649 memcpy(path->rgid, ah->grh.dgid.raw, 16);
650 }
651
652 path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
653 ((port - 1) << 6) | ((ah->sl & 0xf) << 2);
654
655 return 0;
656}
657
658int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
659 int attr_mask, struct ib_udata *udata)
660{
661 struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
662 struct mlx4_ib_qp *qp = to_mqp(ibqp);
663 struct mlx4_qp_context *context;
664 enum mlx4_qp_optpar optpar = 0;
665 enum ib_qp_state cur_state, new_state;
666 int sqd_event;
667 int err = -EINVAL;
668
669 context = kzalloc(sizeof *context, GFP_KERNEL);
670 if (!context)
671 return -ENOMEM;
672
673 mutex_lock(&qp->mutex);
674
675 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
676 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
677
678 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
679 goto out;
680
681 if ((attr_mask & IB_QP_PKEY_INDEX) &&
682 attr->pkey_index >= dev->dev->caps.pkey_table_len) {
683 goto out;
684 }
685
686 if ((attr_mask & IB_QP_PORT) &&
687 (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) {
688 goto out;
689 }
690
691 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
692 attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {
693 goto out;
694 }
695
696 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
697 attr->max_dest_rd_atomic > 1 << dev->dev->caps.max_qp_dest_rdma) {
698 goto out;
699 }
700
701 context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
702 (to_mlx4_st(ibqp->qp_type) << 16));
703 context->flags |= cpu_to_be32(1 << 8); /* DE? */
704
705 if (!(attr_mask & IB_QP_PATH_MIG_STATE))
706 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
707 else {
708 optpar |= MLX4_QP_OPTPAR_PM_STATE;
709 switch (attr->path_mig_state) {
710 case IB_MIG_MIGRATED:
711 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
712 break;
713 case IB_MIG_REARM:
714 context->flags |= cpu_to_be32(MLX4_QP_PM_REARM << 11);
715 break;
716 case IB_MIG_ARMED:
717 context->flags |= cpu_to_be32(MLX4_QP_PM_ARMED << 11);
718 break;
719 }
720 }
721
722 if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
723 ibqp->qp_type == IB_QPT_UD)
724 context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
725 else if (attr_mask & IB_QP_PATH_MTU) {
726 if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) {
727 printk(KERN_ERR "path MTU (%u) is invalid\n",
728 attr->path_mtu);
729 return -EINVAL;
730 }
731 context->mtu_msgmax = (attr->path_mtu << 5) | 31;
732 }
733
734 if (qp->rq.max)
735 context->rq_size_stride = ilog2(qp->rq.max) << 3;
736 context->rq_size_stride |= qp->rq.wqe_shift - 4;
737
738 if (qp->sq.max)
739 context->sq_size_stride = ilog2(qp->sq.max) << 3;
740 context->sq_size_stride |= qp->sq.wqe_shift - 4;
741
742 if (qp->ibqp.uobject)
743 context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index);
744 else
745 context->usr_page = cpu_to_be32(dev->priv_uar.index);
746
747 if (attr_mask & IB_QP_DEST_QPN)
748 context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
749
750 if (attr_mask & IB_QP_PORT) {
751 if (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD &&
752 !(attr_mask & IB_QP_AV)) {
753 mlx4_set_sched(&context->pri_path, attr->port_num);
754 optpar |= MLX4_QP_OPTPAR_SCHED_QUEUE;
755 }
756 }
757
758 if (attr_mask & IB_QP_PKEY_INDEX) {
759 context->pri_path.pkey_index = attr->pkey_index;
760 optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;
761 }
762
763 if (attr_mask & IB_QP_RNR_RETRY) {
764 context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
765 optpar |= MLX4_QP_OPTPAR_RNR_RETRY;
766 }
767
768 if (attr_mask & IB_QP_AV) {
769 if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,
770 attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) {
771 err = -EINVAL;
772 goto out;
773 }
774
775 optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
776 MLX4_QP_OPTPAR_SCHED_QUEUE);
777 }
778
779 if (attr_mask & IB_QP_TIMEOUT) {
780 context->pri_path.ackto = attr->timeout << 3;
781 optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT;
782 }
783
784 if (attr_mask & IB_QP_ALT_PATH) {
785 if (attr->alt_pkey_index >= dev->dev->caps.pkey_table_len)
786 return -EINVAL;
787
788 if (attr->alt_port_num == 0 ||
789 attr->alt_port_num > dev->dev->caps.num_ports)
790 return -EINVAL;
791
792 if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
793 attr->alt_port_num))
794 return -EINVAL;
795
796 context->alt_path.pkey_index = attr->alt_pkey_index;
797 context->alt_path.ackto = attr->alt_timeout << 3;
798 optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;
799 }
800
801 context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pdn);
802 context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
803 if (attr_mask & IB_QP_RETRY_CNT) {
804 context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
805 optpar |= MLX4_QP_OPTPAR_RETRY_COUNT;
806 }
807
808 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
809 if (attr->max_rd_atomic)
810 context->params1 |=
811 cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
812 optpar |= MLX4_QP_OPTPAR_SRA_MAX;
813 }
814
815 if (attr_mask & IB_QP_SQ_PSN)
816 context->next_send_psn = cpu_to_be32(attr->sq_psn);
817
818 context->cqn_send = cpu_to_be32(to_mcq(ibqp->send_cq)->mcq.cqn);
819
820 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
821 if (attr->max_dest_rd_atomic)
822 context->params2 |=
823 cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
824 optpar |= MLX4_QP_OPTPAR_RRA_MAX;
825 }
826
827 if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
828 context->params2 |= to_mlx4_access_flags(qp, attr, attr_mask);
829 optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE;
830 }
831
832 if (ibqp->srq)
833 context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC);
834
835 if (attr_mask & IB_QP_MIN_RNR_TIMER) {
836 context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
837 optpar |= MLX4_QP_OPTPAR_RNR_TIMEOUT;
838 }
839 if (attr_mask & IB_QP_RQ_PSN)
840 context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
841
842 context->cqn_recv = cpu_to_be32(to_mcq(ibqp->recv_cq)->mcq.cqn);
843
844 if (attr_mask & IB_QP_QKEY) {
845 context->qkey = cpu_to_be32(attr->qkey);
846 optpar |= MLX4_QP_OPTPAR_Q_KEY;
847 }
848
849 if (ibqp->srq)
850 context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn);
851
852 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
853 context->db_rec_addr = cpu_to_be64(qp->db.dma);
854
855 if (cur_state == IB_QPS_INIT &&
856 new_state == IB_QPS_RTR &&
857 (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
858 ibqp->qp_type == IB_QPT_UD)) {
859 context->pri_path.sched_queue = (qp->port - 1) << 6;
860 if (is_qp0(dev, qp))
861 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;
862 else
863 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
864 }
865
866 if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
867 attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
868 sqd_event = 1;
869 else
870 sqd_event = 0;
871
872 err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state),
873 to_mlx4_state(new_state), context, optpar,
874 sqd_event, &qp->mqp);
875 if (err)
876 goto out;
877
878 qp->state = new_state;
879
880 if (attr_mask & IB_QP_ACCESS_FLAGS)
881 qp->atomic_rd_en = attr->qp_access_flags;
882 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
883 qp->resp_depth = attr->max_dest_rd_atomic;
884 if (attr_mask & IB_QP_PORT)
885 qp->port = attr->port_num;
886 if (attr_mask & IB_QP_ALT_PATH)
887 qp->alt_port = attr->alt_port_num;
888
889 if (is_sqp(dev, qp))
890 store_sqp_attrs(to_msqp(qp), attr, attr_mask);
891
892 /*
893 * If we moved QP0 to RTR, bring the IB link up; if we moved
894 * QP0 to RESET or ERROR, bring the link back down.
895 */
896 if (is_qp0(dev, qp)) {
897 if (cur_state != IB_QPS_RTR && new_state == IB_QPS_RTR)
898 init_port(dev, qp->port);
899
900 if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
901 (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR))
902 mlx4_CLOSE_PORT(dev->dev, qp->port);
903 }
904
905 /*
906 * If we moved a kernel QP to RESET, clean up all old CQ
907 * entries and reinitialize the QP.
908 */
909 if (new_state == IB_QPS_RESET && !ibqp->uobject) {
910 mlx4_ib_cq_clean(to_mcq(ibqp->recv_cq), qp->mqp.qpn,
911 ibqp->srq ? to_msrq(ibqp->srq): NULL);
912 if (ibqp->send_cq != ibqp->recv_cq)
913 mlx4_ib_cq_clean(to_mcq(ibqp->send_cq), qp->mqp.qpn, NULL);
914
915 qp->rq.head = 0;
916 qp->rq.tail = 0;
917 qp->sq.head = 0;
918 qp->sq.tail = 0;
919 *qp->db.db = 0;
920 }
921
922out:
923 mutex_unlock(&qp->mutex);
924 kfree(context);
925 return err;
926}
927
928static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
929 void *wqe)
930{
931 struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev;
932 struct mlx4_wqe_mlx_seg *mlx = wqe;
933 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
934 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
935 u16 pkey;
936 int send_size;
937 int header_size;
938 int i;
939
940 send_size = 0;
941 for (i = 0; i < wr->num_sge; ++i)
942 send_size += wr->sg_list[i].length;
943
944 ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), &sqp->ud_header);
945
946 sqp->ud_header.lrh.service_level =
947 be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28;
948 sqp->ud_header.lrh.destination_lid = ah->av.dlid;
949 sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.g_slid & 0x7f);
950 if (mlx4_ib_ah_grh_present(ah)) {
951 sqp->ud_header.grh.traffic_class =
952 (be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff;
953 sqp->ud_header.grh.flow_label =
954 ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
955 ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.port_pd) >> 24,
956 ah->av.gid_index, &sqp->ud_header.grh.source_gid);
957 memcpy(sqp->ud_header.grh.destination_gid.raw,
958 ah->av.dgid, 16);
959 }
960
961 mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
962 mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |
963 (sqp->ud_header.lrh.destination_lid ==
964 IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
965 (sqp->ud_header.lrh.service_level << 8));
966 mlx->rlid = sqp->ud_header.lrh.destination_lid;
967
968 switch (wr->opcode) {
969 case IB_WR_SEND:
970 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
971 sqp->ud_header.immediate_present = 0;
972 break;
973 case IB_WR_SEND_WITH_IMM:
974 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
975 sqp->ud_header.immediate_present = 1;
976 sqp->ud_header.immediate_data = wr->imm_data;
977 break;
978 default:
979 return -EINVAL;
980 }
981
982 sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;
983 if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
984 sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
985 sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
986 if (!sqp->qp.ibqp.qp_num)
987 ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
988 else
989 ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->wr.ud.pkey_index, &pkey);
990 sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
991 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
992 sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
993 sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ?
994 sqp->qkey : wr->wr.ud.remote_qkey);
995 sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
996
997 header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
998
999 if (0) {
1000 printk(KERN_ERR "built UD header of size %d:\n", header_size);
1001 for (i = 0; i < header_size / 4; ++i) {
1002 if (i % 8 == 0)
1003 printk(" [%02x] ", i * 4);
1004 printk(" %08x",
1005 be32_to_cpu(((__be32 *) sqp->header_buf)[i]));
1006 if ((i + 1) % 8 == 0)
1007 printk("\n");
1008 }
1009 printk("\n");
1010 }
1011
1012 inl->byte_count = cpu_to_be32(1 << 31 | header_size);
1013 memcpy(inl + 1, sqp->header_buf, header_size);
1014
1015 return ALIGN(sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
1016}
1017
1018static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
1019{
1020 unsigned cur;
1021 struct mlx4_ib_cq *cq;
1022
1023 cur = wq->head - wq->tail;
1024 if (likely(cur + nreq < wq->max))
1025 return 0;
1026
1027 cq = to_mcq(ib_cq);
1028 spin_lock(&cq->lock);
1029 cur = wq->head - wq->tail;
1030 spin_unlock(&cq->lock);
1031
1032 return cur + nreq >= wq->max;
1033}
1034
1035int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1036 struct ib_send_wr **bad_wr)
1037{
1038 struct mlx4_ib_qp *qp = to_mqp(ibqp);
1039 void *wqe;
1040 struct mlx4_wqe_ctrl_seg *ctrl;
1041 unsigned long flags;
1042 int nreq;
1043 int err = 0;
1044 int ind;
1045 int size;
1046 int i;
1047
1048 spin_lock_irqsave(&qp->rq.lock, flags);
1049
1050 ind = qp->sq.head;
1051
1052 for (nreq = 0; wr; ++nreq, wr = wr->next) {
1053 if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
1054 err = -ENOMEM;
1055 *bad_wr = wr;
1056 goto out;
1057 }
1058
1059 if (unlikely(wr->num_sge > qp->sq.max_gs)) {
1060 err = -EINVAL;
1061 *bad_wr = wr;
1062 goto out;
1063 }
1064
1065 ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.max - 1));
1066 qp->sq.wrid[ind & (qp->sq.max - 1)] = wr->wr_id;
1067
1068 ctrl->srcrb_flags =
1069 (wr->send_flags & IB_SEND_SIGNALED ?
1070 cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |
1071 (wr->send_flags & IB_SEND_SOLICITED ?
1072 cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) |
1073 qp->sq_signal_bits;
1074
1075 if (wr->opcode == IB_WR_SEND_WITH_IMM ||
1076 wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
1077 ctrl->imm = wr->imm_data;
1078 else
1079 ctrl->imm = 0;
1080
1081 wqe += sizeof *ctrl;
1082 size = sizeof *ctrl / 16;
1083
1084 switch (ibqp->qp_type) {
1085 case IB_QPT_RC:
1086 case IB_QPT_UC:
1087 switch (wr->opcode) {
1088 case IB_WR_ATOMIC_CMP_AND_SWP:
1089 case IB_WR_ATOMIC_FETCH_AND_ADD:
1090 ((struct mlx4_wqe_raddr_seg *) wqe)->raddr =
1091 cpu_to_be64(wr->wr.atomic.remote_addr);
1092 ((struct mlx4_wqe_raddr_seg *) wqe)->rkey =
1093 cpu_to_be32(wr->wr.atomic.rkey);
1094 ((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0;
1095
1096 wqe += sizeof (struct mlx4_wqe_raddr_seg);
1097
1098 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
1099 ((struct mlx4_wqe_atomic_seg *) wqe)->swap_add =
1100 cpu_to_be64(wr->wr.atomic.swap);
1101 ((struct mlx4_wqe_atomic_seg *) wqe)->compare =
1102 cpu_to_be64(wr->wr.atomic.compare_add);
1103 } else {
1104 ((struct mlx4_wqe_atomic_seg *) wqe)->swap_add =
1105 cpu_to_be64(wr->wr.atomic.compare_add);
1106 ((struct mlx4_wqe_atomic_seg *) wqe)->compare = 0;
1107 }
1108
1109 wqe += sizeof (struct mlx4_wqe_atomic_seg);
1110 size += (sizeof (struct mlx4_wqe_raddr_seg) +
1111 sizeof (struct mlx4_wqe_atomic_seg)) / 16;
1112
1113 break;
1114
1115 case IB_WR_RDMA_READ:
1116 case IB_WR_RDMA_WRITE:
1117 case IB_WR_RDMA_WRITE_WITH_IMM:
1118 ((struct mlx4_wqe_raddr_seg *) wqe)->raddr =
1119 cpu_to_be64(wr->wr.rdma.remote_addr);
1120 ((struct mlx4_wqe_raddr_seg *) wqe)->rkey =
1121 cpu_to_be32(wr->wr.rdma.rkey);
1122 ((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0;
1123
1124 wqe += sizeof (struct mlx4_wqe_raddr_seg);
1125 size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
1126
1127 break;
1128
1129 default:
1130 /* No extra segments required for sends */
1131 break;
1132 }
1133 break;
1134
1135 case IB_QPT_UD:
1136 memcpy(((struct mlx4_wqe_datagram_seg *) wqe)->av,
1137 &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
1138 ((struct mlx4_wqe_datagram_seg *) wqe)->dqpn =
1139 cpu_to_be32(wr->wr.ud.remote_qpn);
1140 ((struct mlx4_wqe_datagram_seg *) wqe)->qkey =
1141 cpu_to_be32(wr->wr.ud.remote_qkey);
1142
1143 wqe += sizeof (struct mlx4_wqe_datagram_seg);
1144 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
1145 break;
1146
1147 case IB_QPT_SMI:
1148 case IB_QPT_GSI:
1149 err = build_mlx_header(to_msqp(qp), wr, ctrl);
1150 if (err < 0) {
1151 *bad_wr = wr;
1152 goto out;
1153 }
1154 wqe += err;
1155 size += err / 16;
1156
1157 err = 0;
1158 break;
1159
1160 default:
1161 break;
1162 }
1163
1164 for (i = 0; i < wr->num_sge; ++i) {
1165 ((struct mlx4_wqe_data_seg *) wqe)->byte_count =
1166 cpu_to_be32(wr->sg_list[i].length);
1167 ((struct mlx4_wqe_data_seg *) wqe)->lkey =
1168 cpu_to_be32(wr->sg_list[i].lkey);
1169 ((struct mlx4_wqe_data_seg *) wqe)->addr =
1170 cpu_to_be64(wr->sg_list[i].addr);
1171
1172 wqe += sizeof (struct mlx4_wqe_data_seg);
1173 size += sizeof (struct mlx4_wqe_data_seg) / 16;
1174 }
1175
1176 /* Add one more inline data segment for ICRC for MLX sends */
1177 if (qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) {
1178 ((struct mlx4_wqe_inline_seg *) wqe)->byte_count =
1179 cpu_to_be32((1 << 31) | 4);
1180 ((u32 *) wqe)[1] = 0;
1181 wqe += sizeof (struct mlx4_wqe_data_seg);
1182 size += sizeof (struct mlx4_wqe_data_seg) / 16;
1183 }
1184
1185 ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ?
1186 MLX4_WQE_CTRL_FENCE : 0) | size;
1187
1188 /*
1189 * Make sure descriptor is fully written before
1190 * setting ownership bit (because HW can start
1191 * executing as soon as we do).
1192 */
1193 wmb();
1194
1195 if (wr->opcode < 0 || wr->opcode > ARRAY_SIZE(mlx4_ib_opcode)) {
1196 err = -EINVAL;
1197 goto out;
1198 }
1199
1200 ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
1201 (ind & qp->sq.max ? cpu_to_be32(1 << 31) : 0);
1202
1203 ++ind;
1204 }
1205
1206out:
1207 if (likely(nreq)) {
1208 qp->sq.head += nreq;
1209
1210 /*
1211 * Make sure that descriptors are written before
1212 * doorbell record.
1213 */
1214 wmb();
1215
1216 writel(qp->doorbell_qpn,
1217 to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL);
1218
1219 /*
1220 * Make sure doorbells don't leak out of SQ spinlock
1221 * and reach the HCA out of order.
1222 */
1223 mmiowb();
1224 }
1225
1226 spin_unlock_irqrestore(&qp->rq.lock, flags);
1227
1228 return err;
1229}
1230
1231int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1232 struct ib_recv_wr **bad_wr)
1233{
1234 struct mlx4_ib_qp *qp = to_mqp(ibqp);
1235 struct mlx4_wqe_data_seg *scat;
1236 unsigned long flags;
1237 int err = 0;
1238 int nreq;
1239 int ind;
1240 int i;
1241
1242 spin_lock_irqsave(&qp->rq.lock, flags);
1243
1244 ind = qp->rq.head & (qp->rq.max - 1);
1245
1246 for (nreq = 0; wr; ++nreq, wr = wr->next) {
1247 if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.send_cq)) {
1248 err = -ENOMEM;
1249 *bad_wr = wr;
1250 goto out;
1251 }
1252
1253 if (unlikely(wr->num_sge > qp->rq.max_gs)) {
1254 err = -EINVAL;
1255 *bad_wr = wr;
1256 goto out;
1257 }
1258
1259 scat = get_recv_wqe(qp, ind);
1260
1261 for (i = 0; i < wr->num_sge; ++i) {
1262 scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length);
1263 scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey);
1264 scat[i].addr = cpu_to_be64(wr->sg_list[i].addr);
1265 }
1266
1267 if (i < qp->rq.max_gs) {
1268 scat[i].byte_count = 0;
1269 scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY);
1270 scat[i].addr = 0;
1271 }
1272
1273 qp->rq.wrid[ind] = wr->wr_id;
1274
1275 ind = (ind + 1) & (qp->rq.max - 1);
1276 }
1277
1278out:
1279 if (likely(nreq)) {
1280 qp->rq.head += nreq;
1281
1282 /*
1283 * Make sure that descriptors are written before
1284 * doorbell record.
1285 */
1286 wmb();
1287
1288 *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
1289 }
1290
1291 spin_unlock_irqrestore(&qp->rq.lock, flags);
1292
1293 return err;
1294}
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
new file mode 100644
index 000000000000..42ab4a801d6a
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -0,0 +1,334 @@
1/*
2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/mlx4/qp.h>
34#include <linux/mlx4/srq.h>
35
36#include "mlx4_ib.h"
37#include "user.h"
38
39static void *get_wqe(struct mlx4_ib_srq *srq, int n)
40{
41 int offset = n << srq->msrq.wqe_shift;
42
43 if (srq->buf.nbufs == 1)
44 return srq->buf.u.direct.buf + offset;
45 else
46 return srq->buf.u.page_list[offset >> PAGE_SHIFT].buf +
47 (offset & (PAGE_SIZE - 1));
48}
49
50static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type)
51{
52 struct ib_event event;
53 struct ib_srq *ibsrq = &to_mibsrq(srq)->ibsrq;
54
55 if (ibsrq->event_handler) {
56 event.device = ibsrq->device;
57 event.element.srq = ibsrq;
58 switch (type) {
59 case MLX4_EVENT_TYPE_SRQ_LIMIT:
60 event.event = IB_EVENT_SRQ_LIMIT_REACHED;
61 break;
62 case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR:
63 event.event = IB_EVENT_SRQ_ERR;
64 break;
65 default:
66 printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
67 "on SRQ %06x\n", type, srq->srqn);
68 return;
69 }
70
71 ibsrq->event_handler(&event, ibsrq->srq_context);
72 }
73}
74
75struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
76 struct ib_srq_init_attr *init_attr,
77 struct ib_udata *udata)
78{
79 struct mlx4_ib_dev *dev = to_mdev(pd->device);
80 struct mlx4_ib_srq *srq;
81 struct mlx4_wqe_srq_next_seg *next;
82 int desc_size;
83 int buf_size;
84 int err;
85 int i;
86
87 /* Sanity check SRQ size before proceeding */
88 if (init_attr->attr.max_wr >= dev->dev->caps.max_srq_wqes ||
89 init_attr->attr.max_sge > dev->dev->caps.max_srq_sge)
90 return ERR_PTR(-EINVAL);
91
92 srq = kmalloc(sizeof *srq, GFP_KERNEL);
93 if (!srq)
94 return ERR_PTR(-ENOMEM);
95
96 mutex_init(&srq->mutex);
97 spin_lock_init(&srq->lock);
98 srq->msrq.max = roundup_pow_of_two(init_attr->attr.max_wr + 1);
99 srq->msrq.max_gs = init_attr->attr.max_sge;
100
101 desc_size = max(32UL,
102 roundup_pow_of_two(sizeof (struct mlx4_wqe_srq_next_seg) +
103 srq->msrq.max_gs *
104 sizeof (struct mlx4_wqe_data_seg)));
105 srq->msrq.wqe_shift = ilog2(desc_size);
106
107 buf_size = srq->msrq.max * desc_size;
108
109 if (pd->uobject) {
110 struct mlx4_ib_create_srq ucmd;
111
112 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
113 err = -EFAULT;
114 goto err_srq;
115 }
116
117 srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
118 buf_size, 0);
119 if (IS_ERR(srq->umem)) {
120 err = PTR_ERR(srq->umem);
121 goto err_srq;
122 }
123
124 err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem),
125 ilog2(srq->umem->page_size), &srq->mtt);
126 if (err)
127 goto err_buf;
128
129 err = mlx4_ib_umem_write_mtt(dev, &srq->mtt, srq->umem);
130 if (err)
131 goto err_mtt;
132
133 err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
134 ucmd.db_addr, &srq->db);
135 if (err)
136 goto err_mtt;
137 } else {
138 err = mlx4_ib_db_alloc(dev, &srq->db, 0);
139 if (err)
140 goto err_srq;
141
142 *srq->db.db = 0;
143
144 if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf)) {
145 err = -ENOMEM;
146 goto err_db;
147 }
148
149 srq->head = 0;
150 srq->tail = srq->msrq.max - 1;
151 srq->wqe_ctr = 0;
152
153 for (i = 0; i < srq->msrq.max; ++i) {
154 next = get_wqe(srq, i);
155 next->next_wqe_index =
156 cpu_to_be16((i + 1) & (srq->msrq.max - 1));
157 }
158
159 err = mlx4_mtt_init(dev->dev, srq->buf.npages, srq->buf.page_shift,
160 &srq->mtt);
161 if (err)
162 goto err_buf;
163
164 err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf);
165 if (err)
166 goto err_mtt;
167
168 srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);
169 if (!srq->wrid) {
170 err = -ENOMEM;
171 goto err_mtt;
172 }
173 }
174
175 err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, &srq->mtt,
176 srq->db.dma, &srq->msrq);
177 if (err)
178 goto err_wrid;
179
180 srq->msrq.event = mlx4_ib_srq_event;
181
182 if (pd->uobject)
183 if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) {
184 err = -EFAULT;
185 goto err_wrid;
186 }
187
188 init_attr->attr.max_wr = srq->msrq.max - 1;
189
190 return &srq->ibsrq;
191
192err_wrid:
193 if (pd->uobject)
194 mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
195 else
196 kfree(srq->wrid);
197
198err_mtt:
199 mlx4_mtt_cleanup(dev->dev, &srq->mtt);
200
201err_buf:
202 if (pd->uobject)
203 ib_umem_release(srq->umem);
204 else
205 mlx4_buf_free(dev->dev, buf_size, &srq->buf);
206
207err_db:
208 if (!pd->uobject)
209 mlx4_ib_db_free(dev, &srq->db);
210
211err_srq:
212 kfree(srq);
213
214 return ERR_PTR(err);
215}
216
217int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
218 enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
219{
220 struct mlx4_ib_dev *dev = to_mdev(ibsrq->device);
221 struct mlx4_ib_srq *srq = to_msrq(ibsrq);
222 int ret;
223
224 /* We don't support resizing SRQs (yet?) */
225 if (attr_mask & IB_SRQ_MAX_WR)
226 return -EINVAL;
227
228 if (attr_mask & IB_SRQ_LIMIT) {
229 if (attr->srq_limit >= srq->msrq.max)
230 return -EINVAL;
231
232 mutex_lock(&srq->mutex);
233 ret = mlx4_srq_arm(dev->dev, &srq->msrq, attr->srq_limit);
234 mutex_unlock(&srq->mutex);
235
236 if (ret)
237 return ret;
238 }
239
240 return 0;
241}
242
243int mlx4_ib_destroy_srq(struct ib_srq *srq)
244{
245 struct mlx4_ib_dev *dev = to_mdev(srq->device);
246 struct mlx4_ib_srq *msrq = to_msrq(srq);
247
248 mlx4_srq_free(dev->dev, &msrq->msrq);
249 mlx4_mtt_cleanup(dev->dev, &msrq->mtt);
250
251 if (srq->uobject) {
252 mlx4_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
253 ib_umem_release(msrq->umem);
254 } else {
255 kfree(msrq->wrid);
256 mlx4_buf_free(dev->dev, msrq->msrq.max << msrq->msrq.wqe_shift,
257 &msrq->buf);
258 mlx4_ib_db_free(dev, &msrq->db);
259 }
260
261 kfree(msrq);
262
263 return 0;
264}
265
266void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index)
267{
268 struct mlx4_wqe_srq_next_seg *next;
269
270 /* always called with interrupts disabled. */
271 spin_lock(&srq->lock);
272
273 next = get_wqe(srq, srq->tail);
274 next->next_wqe_index = cpu_to_be16(wqe_index);
275 srq->tail = wqe_index;
276
277 spin_unlock(&srq->lock);
278}
279
280int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
281 struct ib_recv_wr **bad_wr)
282{
283 struct mlx4_ib_srq *srq = to_msrq(ibsrq);
284 struct mlx4_wqe_srq_next_seg *next;
285 struct mlx4_wqe_data_seg *scat;
286 unsigned long flags;
287 int err = 0;
288 int nreq;
289 int i;
290
291 spin_lock_irqsave(&srq->lock, flags);
292
293 for (nreq = 0; wr; ++nreq, wr = wr->next) {
294 if (unlikely(wr->num_sge > srq->msrq.max_gs)) {
295 err = -EINVAL;
296 *bad_wr = wr;
297 break;
298 }
299
300 srq->wrid[srq->head] = wr->wr_id;
301
302 next = get_wqe(srq, srq->head);
303 srq->head = be16_to_cpu(next->next_wqe_index);
304 scat = (struct mlx4_wqe_data_seg *) (next + 1);
305
306 for (i = 0; i < wr->num_sge; ++i) {
307 scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length);
308 scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey);
309 scat[i].addr = cpu_to_be64(wr->sg_list[i].addr);
310 }
311
312 if (i < srq->msrq.max_gs) {
313 scat[i].byte_count = 0;
314 scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY);
315 scat[i].addr = 0;
316 }
317 }
318
319 if (likely(nreq)) {
320 srq->wqe_ctr += nreq;
321
322 /*
323 * Make sure that descriptors are written before
324 * doorbell record.
325 */
326 wmb();
327
328 *srq->db.db = cpu_to_be32(srq->wqe_ctr);
329 }
330
331 spin_unlock_irqrestore(&srq->lock, flags);
332
333 return err;
334}
diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h
new file mode 100644
index 000000000000..5b8eddc9fa83
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/user.h
@@ -0,0 +1,92 @@
1/*
2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef MLX4_IB_USER_H
34#define MLX4_IB_USER_H
35
36#include <linux/types.h>
37
38/*
39 * Increment this value if any changes that break userspace ABI
40 * compatibility are made.
41 */
42#define MLX4_IB_UVERBS_ABI_VERSION 1
43
44/*
45 * Make sure that all structs defined in this file remain laid out so
46 * that they pack the same way on 32-bit and 64-bit architectures (to
47 * avoid incompatibility between 32-bit userspace and 64-bit kernels).
48 * In particular do not use pointer types -- pass pointers in __u64
49 * instead.
50 */
51
52struct mlx4_ib_alloc_ucontext_resp {
53 __u32 qp_tab_size;
54 __u16 bf_reg_size;
55 __u16 bf_regs_per_page;
56};
57
58struct mlx4_ib_alloc_pd_resp {
59 __u32 pdn;
60 __u32 reserved;
61};
62
63struct mlx4_ib_create_cq {
64 __u64 buf_addr;
65 __u64 db_addr;
66};
67
68struct mlx4_ib_create_cq_resp {
69 __u32 cqn;
70 __u32 reserved;
71};
72
73struct mlx4_ib_resize_cq {
74 __u64 buf_addr;
75};
76
77struct mlx4_ib_create_srq {
78 __u64 buf_addr;
79 __u64 db_addr;
80};
81
82struct mlx4_ib_create_srq_resp {
83 __u32 srqn;
84 __u32 reserved;
85};
86
87struct mlx4_ib_create_qp {
88 __u64 buf_addr;
89 __u64 db_addr;
90};
91
92#endif /* MLX4_IB_USER_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 1c05486c3c68..6bcde1cb9688 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -37,6 +37,7 @@
37 */ 37 */
38 38
39#include <rdma/ib_smi.h> 39#include <rdma/ib_smi.h>
40#include <rdma/ib_umem.h>
40#include <rdma/ib_user_verbs.h> 41#include <rdma/ib_user_verbs.h>
41#include <linux/mm.h> 42#include <linux/mm.h>
42 43
@@ -908,6 +909,8 @@ static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc)
908 return ERR_PTR(err); 909 return ERR_PTR(err);
909 } 910 }
910 911
912 mr->umem = NULL;
913
911 return &mr->ibmr; 914 return &mr->ibmr;
912} 915}
913 916
@@ -1003,11 +1006,13 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd,
1003 } 1006 }
1004 1007
1005 kfree(page_list); 1008 kfree(page_list);
1009 mr->umem = NULL;
1010
1006 return &mr->ibmr; 1011 return &mr->ibmr;
1007} 1012}
1008 1013
1009static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, 1014static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1010 int acc, struct ib_udata *udata) 1015 u64 virt, int acc, struct ib_udata *udata)
1011{ 1016{
1012 struct mthca_dev *dev = to_mdev(pd->device); 1017 struct mthca_dev *dev = to_mdev(pd->device);
1013 struct ib_umem_chunk *chunk; 1018 struct ib_umem_chunk *chunk;
@@ -1018,20 +1023,26 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
1018 int err = 0; 1023 int err = 0;
1019 int write_mtt_size; 1024 int write_mtt_size;
1020 1025
1021 shift = ffs(region->page_size) - 1;
1022
1023 mr = kmalloc(sizeof *mr, GFP_KERNEL); 1026 mr = kmalloc(sizeof *mr, GFP_KERNEL);
1024 if (!mr) 1027 if (!mr)
1025 return ERR_PTR(-ENOMEM); 1028 return ERR_PTR(-ENOMEM);
1026 1029
1030 mr->umem = ib_umem_get(pd->uobject->context, start, length, acc);
1031 if (IS_ERR(mr->umem)) {
1032 err = PTR_ERR(mr->umem);
1033 goto err;
1034 }
1035
1036 shift = ffs(mr->umem->page_size) - 1;
1037
1027 n = 0; 1038 n = 0;
1028 list_for_each_entry(chunk, &region->chunk_list, list) 1039 list_for_each_entry(chunk, &mr->umem->chunk_list, list)
1029 n += chunk->nents; 1040 n += chunk->nents;
1030 1041
1031 mr->mtt = mthca_alloc_mtt(dev, n); 1042 mr->mtt = mthca_alloc_mtt(dev, n);
1032 if (IS_ERR(mr->mtt)) { 1043 if (IS_ERR(mr->mtt)) {
1033 err = PTR_ERR(mr->mtt); 1044 err = PTR_ERR(mr->mtt);
1034 goto err; 1045 goto err_umem;
1035 } 1046 }
1036 1047
1037 pages = (u64 *) __get_free_page(GFP_KERNEL); 1048 pages = (u64 *) __get_free_page(GFP_KERNEL);
@@ -1044,12 +1055,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
1044 1055
1045 write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages)); 1056 write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages));
1046 1057
1047 list_for_each_entry(chunk, &region->chunk_list, list) 1058 list_for_each_entry(chunk, &mr->umem->chunk_list, list)
1048 for (j = 0; j < chunk->nmap; ++j) { 1059 for (j = 0; j < chunk->nmap; ++j) {
1049 len = sg_dma_len(&chunk->page_list[j]) >> shift; 1060 len = sg_dma_len(&chunk->page_list[j]) >> shift;
1050 for (k = 0; k < len; ++k) { 1061 for (k = 0; k < len; ++k) {
1051 pages[i++] = sg_dma_address(&chunk->page_list[j]) + 1062 pages[i++] = sg_dma_address(&chunk->page_list[j]) +
1052 region->page_size * k; 1063 mr->umem->page_size * k;
1053 /* 1064 /*
1054 * Be friendly to write_mtt and pass it chunks 1065 * Be friendly to write_mtt and pass it chunks
1055 * of appropriate size. 1066 * of appropriate size.
@@ -1071,8 +1082,8 @@ mtt_done:
1071 if (err) 1082 if (err)
1072 goto err_mtt; 1083 goto err_mtt;
1073 1084
1074 err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, region->virt_base, 1085 err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, virt, length,
1075 region->length, convert_access(acc), mr); 1086 convert_access(acc), mr);
1076 1087
1077 if (err) 1088 if (err)
1078 goto err_mtt; 1089 goto err_mtt;
@@ -1082,6 +1093,9 @@ mtt_done:
1082err_mtt: 1093err_mtt:
1083 mthca_free_mtt(dev, mr->mtt); 1094 mthca_free_mtt(dev, mr->mtt);
1084 1095
1096err_umem:
1097 ib_umem_release(mr->umem);
1098
1085err: 1099err:
1086 kfree(mr); 1100 kfree(mr);
1087 return ERR_PTR(err); 1101 return ERR_PTR(err);
@@ -1090,8 +1104,12 @@ err:
1090static int mthca_dereg_mr(struct ib_mr *mr) 1104static int mthca_dereg_mr(struct ib_mr *mr)
1091{ 1105{
1092 struct mthca_mr *mmr = to_mmr(mr); 1106 struct mthca_mr *mmr = to_mmr(mr);
1107
1093 mthca_free_mr(to_mdev(mr->device), mmr); 1108 mthca_free_mr(to_mdev(mr->device), mmr);
1109 if (mmr->umem)
1110 ib_umem_release(mmr->umem);
1094 kfree(mmr); 1111 kfree(mmr);
1112
1095 return 0; 1113 return 0;
1096} 1114}
1097 1115
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 1d266ac2e094..262616c8ebb6 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -73,6 +73,7 @@ struct mthca_mtt;
73 73
74struct mthca_mr { 74struct mthca_mr {
75 struct ib_mr ibmr; 75 struct ib_mr ibmr;
76 struct ib_umem *umem;
76 struct mthca_mtt *mtt; 77 struct mthca_mtt *mtt;
77}; 78};
78 79
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index d9842d8544dc..fa489b10c38c 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2493,6 +2493,20 @@ config PASEMI_MAC
2493 This driver supports the on-chip 1/10Gbit Ethernet controller on 2493 This driver supports the on-chip 1/10Gbit Ethernet controller on
2494 PA Semi's PWRficient line of chips. 2494 PA Semi's PWRficient line of chips.
2495 2495
2496config MLX4_CORE
2497 tristate
2498 depends on PCI
2499 default n
2500
2501config MLX4_DEBUG
2502 bool "Verbose debugging output" if (MLX4_CORE && EMBEDDED)
2503 default y
2504 ---help---
2505 This option causes debugging code to be compiled into the
2506 mlx4_core driver. The output can be turned on via the
2507 debug_level module parameter (which can also be set after
2508 the driver is loaded through sysfs).
2509
2496endmenu 2510endmenu
2497 2511
2498source "drivers/net/tokenring/Kconfig" 2512source "drivers/net/tokenring/Kconfig"
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index c5d8423573b6..a77affa4f6e6 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -197,6 +197,7 @@ obj-$(CONFIG_SMC911X) += smc911x.o
197obj-$(CONFIG_DM9000) += dm9000.o 197obj-$(CONFIG_DM9000) += dm9000.o
198obj-$(CONFIG_FEC_8XX) += fec_8xx/ 198obj-$(CONFIG_FEC_8XX) += fec_8xx/
199obj-$(CONFIG_PASEMI_MAC) += pasemi_mac.o 199obj-$(CONFIG_PASEMI_MAC) += pasemi_mac.o
200obj-$(CONFIG_MLX4_CORE) += mlx4/
200 201
201obj-$(CONFIG_MACB) += macb.o 202obj-$(CONFIG_MACB) += macb.o
202 203
diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
new file mode 100644
index 000000000000..0952a6528f58
--- /dev/null
+++ b/drivers/net/mlx4/Makefile
@@ -0,0 +1,4 @@
1obj-$(CONFIG_MLX4_CORE) += mlx4_core.o
2
3mlx4_core-y := alloc.o catas.o cmd.o cq.o eq.o fw.o icm.o intf.o main.o mcg.o \
4 mr.o pd.o profile.o qp.o reset.o srq.o
diff --git a/drivers/net/mlx4/alloc.c b/drivers/net/mlx4/alloc.c
new file mode 100644
index 000000000000..9ffdb9d29da9
--- /dev/null
+++ b/drivers/net/mlx4/alloc.c
@@ -0,0 +1,179 @@
1/*
2 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/errno.h>
34#include <linux/slab.h>
35#include <linux/bitmap.h>
36
37#include "mlx4.h"
38
39u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap)
40{
41 u32 obj;
42
43 spin_lock(&bitmap->lock);
44
45 obj = find_next_zero_bit(bitmap->table, bitmap->max, bitmap->last);
46 if (obj >= bitmap->max) {
47 bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask;
48 obj = find_first_zero_bit(bitmap->table, bitmap->max);
49 }
50
51 if (obj < bitmap->max) {
52 set_bit(obj, bitmap->table);
53 obj |= bitmap->top;
54 bitmap->last = obj + 1;
55 } else
56 obj = -1;
57
58 spin_unlock(&bitmap->lock);
59
60 return obj;
61}
62
63void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj)
64{
65 obj &= bitmap->max - 1;
66
67 spin_lock(&bitmap->lock);
68 clear_bit(obj, bitmap->table);
69 bitmap->last = min(bitmap->last, obj);
70 bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask;
71 spin_unlock(&bitmap->lock);
72}
73
74int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, u32 reserved)
75{
76 int i;
77
78 /* num must be a power of 2 */
79 if (num != roundup_pow_of_two(num))
80 return -EINVAL;
81
82 bitmap->last = 0;
83 bitmap->top = 0;
84 bitmap->max = num;
85 bitmap->mask = mask;
86 spin_lock_init(&bitmap->lock);
87 bitmap->table = kzalloc(BITS_TO_LONGS(num) * sizeof (long), GFP_KERNEL);
88 if (!bitmap->table)
89 return -ENOMEM;
90
91 for (i = 0; i < reserved; ++i)
92 set_bit(i, bitmap->table);
93
94 return 0;
95}
96
97void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap)
98{
99 kfree(bitmap->table);
100}
101
102/*
103 * Handling for queue buffers -- we allocate a bunch of memory and
104 * register it in a memory region at HCA virtual address 0. If the
105 * requested size is > max_direct, we split the allocation into
106 * multiple pages, so we don't require too much contiguous memory.
107 */
108
109int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
110 struct mlx4_buf *buf)
111{
112 dma_addr_t t;
113
114 if (size <= max_direct) {
115 buf->nbufs = 1;
116 buf->npages = 1;
117 buf->page_shift = get_order(size) + PAGE_SHIFT;
118 buf->u.direct.buf = dma_alloc_coherent(&dev->pdev->dev,
119 size, &t, GFP_KERNEL);
120 if (!buf->u.direct.buf)
121 return -ENOMEM;
122
123 buf->u.direct.map = t;
124
125 while (t & ((1 << buf->page_shift) - 1)) {
126 --buf->page_shift;
127 buf->npages *= 2;
128 }
129
130 memset(buf->u.direct.buf, 0, size);
131 } else {
132 int i;
133
134 buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE;
135 buf->npages = buf->nbufs;
136 buf->page_shift = PAGE_SHIFT;
137 buf->u.page_list = kzalloc(buf->nbufs * sizeof *buf->u.page_list,
138 GFP_KERNEL);
139 if (!buf->u.page_list)
140 return -ENOMEM;
141
142 for (i = 0; i < buf->nbufs; ++i) {
143 buf->u.page_list[i].buf =
144 dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
145 &t, GFP_KERNEL);
146 if (!buf->u.page_list[i].buf)
147 goto err_free;
148
149 buf->u.page_list[i].map = t;
150
151 memset(buf->u.page_list[i].buf, 0, PAGE_SIZE);
152 }
153 }
154
155 return 0;
156
157err_free:
158 mlx4_buf_free(dev, size, buf);
159
160 return -ENOMEM;
161}
162EXPORT_SYMBOL_GPL(mlx4_buf_alloc);
163
164void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
165{
166 int i;
167
168 if (buf->nbufs == 1)
169 dma_free_coherent(&dev->pdev->dev, size, buf->u.direct.buf,
170 buf->u.direct.map);
171 else {
172 for (i = 0; i < buf->nbufs; ++i)
173 dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
174 buf->u.page_list[i].buf,
175 buf->u.page_list[i].map);
176 kfree(buf->u.page_list);
177 }
178}
179EXPORT_SYMBOL_GPL(mlx4_buf_free);
diff --git a/drivers/net/mlx4/catas.c b/drivers/net/mlx4/catas.c
new file mode 100644
index 000000000000..1bb088aeaf71
--- /dev/null
+++ b/drivers/net/mlx4/catas.c
@@ -0,0 +1,70 @@
1/*
2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include "mlx4.h"
34
35void mlx4_handle_catas_err(struct mlx4_dev *dev)
36{
37 struct mlx4_priv *priv = mlx4_priv(dev);
38
39 int i;
40
41 mlx4_err(dev, "Catastrophic error detected:\n");
42 for (i = 0; i < priv->fw.catas_size; ++i)
43 mlx4_err(dev, " buf[%02x]: %08x\n",
44 i, swab32(readl(priv->catas_err.map + i)));
45
46 mlx4_dispatch_event(dev, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR, 0, 0);
47}
48
49void mlx4_map_catas_buf(struct mlx4_dev *dev)
50{
51 struct mlx4_priv *priv = mlx4_priv(dev);
52 unsigned long addr;
53
54 addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) +
55 priv->fw.catas_offset;
56
57 priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4);
58 if (!priv->catas_err.map)
59 mlx4_warn(dev, "Failed to map catastrophic error buffer at 0x%lx\n",
60 addr);
61
62}
63
64void mlx4_unmap_catas_buf(struct mlx4_dev *dev)
65{
66 struct mlx4_priv *priv = mlx4_priv(dev);
67
68 if (priv->catas_err.map)
69 iounmap(priv->catas_err.map);
70}
diff --git a/drivers/net/mlx4/cmd.c b/drivers/net/mlx4/cmd.c
new file mode 100644
index 000000000000..c1f81a993f5d
--- /dev/null
+++ b/drivers/net/mlx4/cmd.c
@@ -0,0 +1,429 @@
1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
4 * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34
35#include <linux/sched.h>
36#include <linux/pci.h>
37#include <linux/errno.h>
38
39#include <linux/mlx4/cmd.h>
40
41#include <asm/io.h>
42
43#include "mlx4.h"
44
45#define CMD_POLL_TOKEN 0xffff
46
47enum {
48 /* command completed successfully: */
49 CMD_STAT_OK = 0x00,
50 /* Internal error (such as a bus error) occurred while processing command: */
51 CMD_STAT_INTERNAL_ERR = 0x01,
52 /* Operation/command not supported or opcode modifier not supported: */
53 CMD_STAT_BAD_OP = 0x02,
54 /* Parameter not supported or parameter out of range: */
55 CMD_STAT_BAD_PARAM = 0x03,
56 /* System not enabled or bad system state: */
57 CMD_STAT_BAD_SYS_STATE = 0x04,
58 /* Attempt to access reserved or unallocaterd resource: */
59 CMD_STAT_BAD_RESOURCE = 0x05,
60 /* Requested resource is currently executing a command, or is otherwise busy: */
61 CMD_STAT_RESOURCE_BUSY = 0x06,
62 /* Required capability exceeds device limits: */
63 CMD_STAT_EXCEED_LIM = 0x08,
64 /* Resource is not in the appropriate state or ownership: */
65 CMD_STAT_BAD_RES_STATE = 0x09,
66 /* Index out of range: */
67 CMD_STAT_BAD_INDEX = 0x0a,
68 /* FW image corrupted: */
69 CMD_STAT_BAD_NVMEM = 0x0b,
70 /* Attempt to modify a QP/EE which is not in the presumed state: */
71 CMD_STAT_BAD_QP_STATE = 0x10,
72 /* Bad segment parameters (Address/Size): */
73 CMD_STAT_BAD_SEG_PARAM = 0x20,
74 /* Memory Region has Memory Windows bound to: */
75 CMD_STAT_REG_BOUND = 0x21,
76 /* HCA local attached memory not present: */
77 CMD_STAT_LAM_NOT_PRE = 0x22,
78 /* Bad management packet (silently discarded): */
79 CMD_STAT_BAD_PKT = 0x30,
80 /* More outstanding CQEs in CQ than new CQ size: */
81 CMD_STAT_BAD_SIZE = 0x40
82};
83
84enum {
85 HCR_IN_PARAM_OFFSET = 0x00,
86 HCR_IN_MODIFIER_OFFSET = 0x08,
87 HCR_OUT_PARAM_OFFSET = 0x0c,
88 HCR_TOKEN_OFFSET = 0x14,
89 HCR_STATUS_OFFSET = 0x18,
90
91 HCR_OPMOD_SHIFT = 12,
92 HCR_T_BIT = 21,
93 HCR_E_BIT = 22,
94 HCR_GO_BIT = 23
95};
96
97enum {
98 GO_BIT_TIMEOUT = 10000
99};
100
101struct mlx4_cmd_context {
102 struct completion done;
103 int result;
104 int next;
105 u64 out_param;
106 u16 token;
107};
108
109static int mlx4_status_to_errno(u8 status) {
110 static const int trans_table[] = {
111 [CMD_STAT_INTERNAL_ERR] = -EIO,
112 [CMD_STAT_BAD_OP] = -EPERM,
113 [CMD_STAT_BAD_PARAM] = -EINVAL,
114 [CMD_STAT_BAD_SYS_STATE] = -ENXIO,
115 [CMD_STAT_BAD_RESOURCE] = -EBADF,
116 [CMD_STAT_RESOURCE_BUSY] = -EBUSY,
117 [CMD_STAT_EXCEED_LIM] = -ENOMEM,
118 [CMD_STAT_BAD_RES_STATE] = -EBADF,
119 [CMD_STAT_BAD_INDEX] = -EBADF,
120 [CMD_STAT_BAD_NVMEM] = -EFAULT,
121 [CMD_STAT_BAD_QP_STATE] = -EINVAL,
122 [CMD_STAT_BAD_SEG_PARAM] = -EFAULT,
123 [CMD_STAT_REG_BOUND] = -EBUSY,
124 [CMD_STAT_LAM_NOT_PRE] = -EAGAIN,
125 [CMD_STAT_BAD_PKT] = -EINVAL,
126 [CMD_STAT_BAD_SIZE] = -ENOMEM,
127 };
128
129 if (status >= ARRAY_SIZE(trans_table) ||
130 (status != CMD_STAT_OK && trans_table[status] == 0))
131 return -EIO;
132
133 return trans_table[status];
134}
135
136static int cmd_pending(struct mlx4_dev *dev)
137{
138 u32 status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET);
139
140 return (status & swab32(1 << HCR_GO_BIT)) ||
141 (mlx4_priv(dev)->cmd.toggle ==
142 !!(status & swab32(1 << HCR_T_BIT)));
143}
144
145static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param,
146 u32 in_modifier, u8 op_modifier, u16 op, u16 token,
147 int event)
148{
149 struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd;
150 u32 __iomem *hcr = cmd->hcr;
151 int ret = -EAGAIN;
152 unsigned long end;
153
154 mutex_lock(&cmd->hcr_mutex);
155
156 end = jiffies;
157 if (event)
158 end += HZ * 10;
159
160 while (cmd_pending(dev)) {
161 if (time_after_eq(jiffies, end))
162 goto out;
163 cond_resched();
164 }
165
166 /*
167 * We use writel (instead of something like memcpy_toio)
168 * because writes of less than 32 bits to the HCR don't work
169 * (and some architectures such as ia64 implement memcpy_toio
170 * in terms of writeb).
171 */
172 __raw_writel((__force u32) cpu_to_be32(in_param >> 32), hcr + 0);
173 __raw_writel((__force u32) cpu_to_be32(in_param & 0xfffffffful), hcr + 1);
174 __raw_writel((__force u32) cpu_to_be32(in_modifier), hcr + 2);
175 __raw_writel((__force u32) cpu_to_be32(out_param >> 32), hcr + 3);
176 __raw_writel((__force u32) cpu_to_be32(out_param & 0xfffffffful), hcr + 4);
177 __raw_writel((__force u32) cpu_to_be32(token << 16), hcr + 5);
178
179 /* __raw_writel may not order writes. */
180 wmb();
181
182 __raw_writel((__force u32) cpu_to_be32((1 << HCR_GO_BIT) |
183 (cmd->toggle << HCR_T_BIT) |
184 (event ? (1 << HCR_E_BIT) : 0) |
185 (op_modifier << HCR_OPMOD_SHIFT) |
186 op), hcr + 6);
187 cmd->toggle = cmd->toggle ^ 1;
188
189 ret = 0;
190
191out:
192 mutex_unlock(&cmd->hcr_mutex);
193 return ret;
194}
195
196static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
197 int out_is_imm, u32 in_modifier, u8 op_modifier,
198 u16 op, unsigned long timeout)
199{
200 struct mlx4_priv *priv = mlx4_priv(dev);
201 void __iomem *hcr = priv->cmd.hcr;
202 int err = 0;
203 unsigned long end;
204
205 down(&priv->cmd.poll_sem);
206
207 err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0,
208 in_modifier, op_modifier, op, CMD_POLL_TOKEN, 0);
209 if (err)
210 goto out;
211
212 end = msecs_to_jiffies(timeout) + jiffies;
213 while (cmd_pending(dev) && time_before(jiffies, end))
214 cond_resched();
215
216 if (cmd_pending(dev)) {
217 err = -ETIMEDOUT;
218 goto out;
219 }
220
221 if (out_is_imm)
222 *out_param =
223 (u64) be32_to_cpu((__force __be32)
224 __raw_readl(hcr + HCR_OUT_PARAM_OFFSET)) << 32 |
225 (u64) be32_to_cpu((__force __be32)
226 __raw_readl(hcr + HCR_OUT_PARAM_OFFSET + 4));
227
228 err = mlx4_status_to_errno(be32_to_cpu((__force __be32)
229 __raw_readl(hcr + HCR_STATUS_OFFSET)) >> 24);
230
231out:
232 up(&priv->cmd.poll_sem);
233 return err;
234}
235
236void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param)
237{
238 struct mlx4_priv *priv = mlx4_priv(dev);
239 struct mlx4_cmd_context *context =
240 &priv->cmd.context[token & priv->cmd.token_mask];
241
242 /* previously timed out command completing at long last */
243 if (token != context->token)
244 return;
245
246 context->result = mlx4_status_to_errno(status);
247 context->out_param = out_param;
248
249 context->token += priv->cmd.token_mask + 1;
250
251 complete(&context->done);
252}
253
254static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
255 int out_is_imm, u32 in_modifier, u8 op_modifier,
256 u16 op, unsigned long timeout)
257{
258 struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd;
259 struct mlx4_cmd_context *context;
260 int err = 0;
261
262 down(&cmd->event_sem);
263
264 spin_lock(&cmd->context_lock);
265 BUG_ON(cmd->free_head < 0);
266 context = &cmd->context[cmd->free_head];
267 cmd->free_head = context->next;
268 spin_unlock(&cmd->context_lock);
269
270 init_completion(&context->done);
271
272 mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0,
273 in_modifier, op_modifier, op, context->token, 1);
274
275 if (!wait_for_completion_timeout(&context->done, msecs_to_jiffies(timeout))) {
276 err = -EBUSY;
277 goto out;
278 }
279
280 err = context->result;
281 if (err)
282 goto out;
283
284 if (out_is_imm)
285 *out_param = context->out_param;
286
287out:
288 spin_lock(&cmd->context_lock);
289 context->next = cmd->free_head;
290 cmd->free_head = context - cmd->context;
291 spin_unlock(&cmd->context_lock);
292
293 up(&cmd->event_sem);
294 return err;
295}
296
297int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
298 int out_is_imm, u32 in_modifier, u8 op_modifier,
299 u16 op, unsigned long timeout)
300{
301 if (mlx4_priv(dev)->cmd.use_events)
302 return mlx4_cmd_wait(dev, in_param, out_param, out_is_imm,
303 in_modifier, op_modifier, op, timeout);
304 else
305 return mlx4_cmd_poll(dev, in_param, out_param, out_is_imm,
306 in_modifier, op_modifier, op, timeout);
307}
308EXPORT_SYMBOL_GPL(__mlx4_cmd);
309
310int mlx4_cmd_init(struct mlx4_dev *dev)
311{
312 struct mlx4_priv *priv = mlx4_priv(dev);
313
314 mutex_init(&priv->cmd.hcr_mutex);
315 sema_init(&priv->cmd.poll_sem, 1);
316 priv->cmd.use_events = 0;
317 priv->cmd.toggle = 1;
318
319 priv->cmd.hcr = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_HCR_BASE,
320 MLX4_HCR_SIZE);
321 if (!priv->cmd.hcr) {
322 mlx4_err(dev, "Couldn't map command register.");
323 return -ENOMEM;
324 }
325
326 priv->cmd.pool = pci_pool_create("mlx4_cmd", dev->pdev,
327 MLX4_MAILBOX_SIZE,
328 MLX4_MAILBOX_SIZE, 0);
329 if (!priv->cmd.pool) {
330 iounmap(priv->cmd.hcr);
331 return -ENOMEM;
332 }
333
334 return 0;
335}
336
337void mlx4_cmd_cleanup(struct mlx4_dev *dev)
338{
339 struct mlx4_priv *priv = mlx4_priv(dev);
340
341 pci_pool_destroy(priv->cmd.pool);
342 iounmap(priv->cmd.hcr);
343}
344
345/*
346 * Switch to using events to issue FW commands (can only be called
347 * after event queue for command events has been initialized).
348 */
349int mlx4_cmd_use_events(struct mlx4_dev *dev)
350{
351 struct mlx4_priv *priv = mlx4_priv(dev);
352 int i;
353
354 priv->cmd.context = kmalloc(priv->cmd.max_cmds *
355 sizeof (struct mlx4_cmd_context),
356 GFP_KERNEL);
357 if (!priv->cmd.context)
358 return -ENOMEM;
359
360 for (i = 0; i < priv->cmd.max_cmds; ++i) {
361 priv->cmd.context[i].token = i;
362 priv->cmd.context[i].next = i + 1;
363 }
364
365 priv->cmd.context[priv->cmd.max_cmds - 1].next = -1;
366 priv->cmd.free_head = 0;
367
368 sema_init(&priv->cmd.event_sem, priv->cmd.max_cmds);
369 spin_lock_init(&priv->cmd.context_lock);
370
371 for (priv->cmd.token_mask = 1;
372 priv->cmd.token_mask < priv->cmd.max_cmds;
373 priv->cmd.token_mask <<= 1)
374 ; /* nothing */
375 --priv->cmd.token_mask;
376
377 priv->cmd.use_events = 1;
378
379 down(&priv->cmd.poll_sem);
380
381 return 0;
382}
383
384/*
385 * Switch back to polling (used when shutting down the device)
386 */
387void mlx4_cmd_use_polling(struct mlx4_dev *dev)
388{
389 struct mlx4_priv *priv = mlx4_priv(dev);
390 int i;
391
392 priv->cmd.use_events = 0;
393
394 for (i = 0; i < priv->cmd.max_cmds; ++i)
395 down(&priv->cmd.event_sem);
396
397 kfree(priv->cmd.context);
398
399 up(&priv->cmd.poll_sem);
400}
401
402struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev)
403{
404 struct mlx4_cmd_mailbox *mailbox;
405
406 mailbox = kmalloc(sizeof *mailbox, GFP_KERNEL);
407 if (!mailbox)
408 return ERR_PTR(-ENOMEM);
409
410 mailbox->buf = pci_pool_alloc(mlx4_priv(dev)->cmd.pool, GFP_KERNEL,
411 &mailbox->dma);
412 if (!mailbox->buf) {
413 kfree(mailbox);
414 return ERR_PTR(-ENOMEM);
415 }
416
417 return mailbox;
418}
419EXPORT_SYMBOL_GPL(mlx4_alloc_cmd_mailbox);
420
421void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox)
422{
423 if (!mailbox)
424 return;
425
426 pci_pool_free(mlx4_priv(dev)->cmd.pool, mailbox->buf, mailbox->dma);
427 kfree(mailbox);
428}
429EXPORT_SYMBOL_GPL(mlx4_free_cmd_mailbox);
diff --git a/drivers/net/mlx4/cq.c b/drivers/net/mlx4/cq.c
new file mode 100644
index 000000000000..437d78ad0912
--- /dev/null
+++ b/drivers/net/mlx4/cq.c
@@ -0,0 +1,254 @@
1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4 * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
5 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
6 * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
17 *
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer.
21 *
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 */
36
37#include <linux/init.h>
38#include <linux/hardirq.h>
39
40#include <linux/mlx4/cmd.h>
41
42#include "mlx4.h"
43#include "icm.h"
44
45struct mlx4_cq_context {
46 __be32 flags;
47 u16 reserved1[3];
48 __be16 page_offset;
49 __be32 logsize_usrpage;
50 u8 reserved2;
51 u8 cq_period;
52 u8 reserved3;
53 u8 cq_max_count;
54 u8 reserved4[3];
55 u8 comp_eqn;
56 u8 log_page_size;
57 u8 reserved5[2];
58 u8 mtt_base_addr_h;
59 __be32 mtt_base_addr_l;
60 __be32 last_notified_index;
61 __be32 solicit_producer_index;
62 __be32 consumer_index;
63 __be32 producer_index;
64 u8 reserved6[2];
65 __be64 db_rec_addr;
66};
67
68#define MLX4_CQ_STATUS_OK ( 0 << 28)
69#define MLX4_CQ_STATUS_OVERFLOW ( 9 << 28)
70#define MLX4_CQ_STATUS_WRITE_FAIL (10 << 28)
71#define MLX4_CQ_FLAG_CC ( 1 << 18)
72#define MLX4_CQ_FLAG_OI ( 1 << 17)
73#define MLX4_CQ_STATE_ARMED ( 9 << 8)
74#define MLX4_CQ_STATE_ARMED_SOL ( 6 << 8)
75#define MLX4_EQ_STATE_FIRED (10 << 8)
76
77void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn)
78{
79 struct mlx4_cq *cq;
80
81 cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree,
82 cqn & (dev->caps.num_cqs - 1));
83 if (!cq) {
84 mlx4_warn(dev, "Completion event for bogus CQ %08x\n", cqn);
85 return;
86 }
87
88 ++cq->arm_sn;
89
90 cq->comp(cq);
91}
92
93void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type)
94{
95 struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
96 struct mlx4_cq *cq;
97
98 spin_lock(&cq_table->lock);
99
100 cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1));
101 if (cq)
102 atomic_inc(&cq->refcount);
103
104 spin_unlock(&cq_table->lock);
105
106 if (!cq) {
107 mlx4_warn(dev, "Async event for bogus CQ %08x\n", cqn);
108 return;
109 }
110
111 cq->event(cq, event_type);
112
113 if (atomic_dec_and_test(&cq->refcount))
114 complete(&cq->free);
115}
116
117static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
118 int cq_num)
119{
120 return mlx4_cmd(dev, mailbox->dma, cq_num, 0, MLX4_CMD_SW2HW_CQ,
121 MLX4_CMD_TIME_CLASS_A);
122}
123
124static int mlx4_HW2SW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
125 int cq_num)
126{
127 return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, cq_num,
128 mailbox ? 0 : 1, MLX4_CMD_HW2SW_CQ,
129 MLX4_CMD_TIME_CLASS_A);
130}
131
132int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
133 struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq)
134{
135 struct mlx4_priv *priv = mlx4_priv(dev);
136 struct mlx4_cq_table *cq_table = &priv->cq_table;
137 struct mlx4_cmd_mailbox *mailbox;
138 struct mlx4_cq_context *cq_context;
139 u64 mtt_addr;
140 int err;
141
142 cq->cqn = mlx4_bitmap_alloc(&cq_table->bitmap);
143 if (cq->cqn == -1)
144 return -ENOMEM;
145
146 err = mlx4_table_get(dev, &cq_table->table, cq->cqn);
147 if (err)
148 goto err_out;
149
150 err = mlx4_table_get(dev, &cq_table->cmpt_table, cq->cqn);
151 if (err)
152 goto err_put;
153
154 spin_lock_irq(&cq_table->lock);
155 err = radix_tree_insert(&cq_table->tree, cq->cqn, cq);
156 spin_unlock_irq(&cq_table->lock);
157 if (err)
158 goto err_cmpt_put;
159
160 mailbox = mlx4_alloc_cmd_mailbox(dev);
161 if (IS_ERR(mailbox)) {
162 err = PTR_ERR(mailbox);
163 goto err_radix;
164 }
165
166 cq_context = mailbox->buf;
167 memset(cq_context, 0, sizeof *cq_context);
168
169 cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index);
170 cq_context->comp_eqn = priv->eq_table.eq[MLX4_EQ_COMP].eqn;
171 cq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
172
173 mtt_addr = mlx4_mtt_addr(dev, mtt);
174 cq_context->mtt_base_addr_h = mtt_addr >> 32;
175 cq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff);
176 cq_context->db_rec_addr = cpu_to_be64(db_rec);
177
178 err = mlx4_SW2HW_CQ(dev, mailbox, cq->cqn);
179 mlx4_free_cmd_mailbox(dev, mailbox);
180 if (err)
181 goto err_radix;
182
183 cq->cons_index = 0;
184 cq->arm_sn = 1;
185 cq->uar = uar;
186 atomic_set(&cq->refcount, 1);
187 init_completion(&cq->free);
188
189 return 0;
190
191err_radix:
192 spin_lock_irq(&cq_table->lock);
193 radix_tree_delete(&cq_table->tree, cq->cqn);
194 spin_unlock_irq(&cq_table->lock);
195
196err_cmpt_put:
197 mlx4_table_put(dev, &cq_table->cmpt_table, cq->cqn);
198
199err_put:
200 mlx4_table_put(dev, &cq_table->table, cq->cqn);
201
202err_out:
203 mlx4_bitmap_free(&cq_table->bitmap, cq->cqn);
204
205 return err;
206}
207EXPORT_SYMBOL_GPL(mlx4_cq_alloc);
208
209void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq)
210{
211 struct mlx4_priv *priv = mlx4_priv(dev);
212 struct mlx4_cq_table *cq_table = &priv->cq_table;
213 int err;
214
215 err = mlx4_HW2SW_CQ(dev, NULL, cq->cqn);
216 if (err)
217 mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn);
218
219 synchronize_irq(priv->eq_table.eq[MLX4_EQ_COMP].irq);
220
221 spin_lock_irq(&cq_table->lock);
222 radix_tree_delete(&cq_table->tree, cq->cqn);
223 spin_unlock_irq(&cq_table->lock);
224
225 if (atomic_dec_and_test(&cq->refcount))
226 complete(&cq->free);
227 wait_for_completion(&cq->free);
228
229 mlx4_table_put(dev, &cq_table->table, cq->cqn);
230 mlx4_bitmap_free(&cq_table->bitmap, cq->cqn);
231}
232EXPORT_SYMBOL_GPL(mlx4_cq_free);
233
234int __devinit mlx4_init_cq_table(struct mlx4_dev *dev)
235{
236 struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
237 int err;
238
239 spin_lock_init(&cq_table->lock);
240 INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
241
242 err = mlx4_bitmap_init(&cq_table->bitmap, dev->caps.num_cqs,
243 dev->caps.num_cqs - 1, dev->caps.reserved_cqs);
244 if (err)
245 return err;
246
247 return 0;
248}
249
250void mlx4_cleanup_cq_table(struct mlx4_dev *dev)
251{
252 /* Nothing to do to clean up radix_tree */
253 mlx4_bitmap_cleanup(&mlx4_priv(dev)->cq_table.bitmap);
254}
diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
new file mode 100644
index 000000000000..acf1c801a1b8
--- /dev/null
+++ b/drivers/net/mlx4/eq.c
@@ -0,0 +1,696 @@
1/*
2 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
3 * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/init.h>
35#include <linux/interrupt.h>
36
37#include <linux/mlx4/cmd.h>
38
39#include "mlx4.h"
40#include "fw.h"
41
42enum {
43 MLX4_NUM_ASYNC_EQE = 0x100,
44 MLX4_NUM_SPARE_EQE = 0x80,
45 MLX4_EQ_ENTRY_SIZE = 0x20
46};
47
48/*
49 * Must be packed because start is 64 bits but only aligned to 32 bits.
50 */
51struct mlx4_eq_context {
52 __be32 flags;
53 u16 reserved1[3];
54 __be16 page_offset;
55 u8 log_eq_size;
56 u8 reserved2[4];
57 u8 eq_period;
58 u8 reserved3;
59 u8 eq_max_count;
60 u8 reserved4[3];
61 u8 intr;
62 u8 log_page_size;
63 u8 reserved5[2];
64 u8 mtt_base_addr_h;
65 __be32 mtt_base_addr_l;
66 u32 reserved6[2];
67 __be32 consumer_index;
68 __be32 producer_index;
69 u32 reserved7[4];
70};
71
72#define MLX4_EQ_STATUS_OK ( 0 << 28)
73#define MLX4_EQ_STATUS_WRITE_FAIL (10 << 28)
74#define MLX4_EQ_OWNER_SW ( 0 << 24)
75#define MLX4_EQ_OWNER_HW ( 1 << 24)
76#define MLX4_EQ_FLAG_EC ( 1 << 18)
77#define MLX4_EQ_FLAG_OI ( 1 << 17)
78#define MLX4_EQ_STATE_ARMED ( 9 << 8)
79#define MLX4_EQ_STATE_FIRED (10 << 8)
80#define MLX4_EQ_STATE_ALWAYS_ARMED (11 << 8)
81
82#define MLX4_ASYNC_EVENT_MASK ((1ull << MLX4_EVENT_TYPE_PATH_MIG) | \
83 (1ull << MLX4_EVENT_TYPE_COMM_EST) | \
84 (1ull << MLX4_EVENT_TYPE_SQ_DRAINED) | \
85 (1ull << MLX4_EVENT_TYPE_CQ_ERROR) | \
86 (1ull << MLX4_EVENT_TYPE_WQ_CATAS_ERROR) | \
87 (1ull << MLX4_EVENT_TYPE_EEC_CATAS_ERROR) | \
88 (1ull << MLX4_EVENT_TYPE_PATH_MIG_FAILED) | \
89 (1ull << MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
90 (1ull << MLX4_EVENT_TYPE_WQ_ACCESS_ERROR) | \
91 (1ull << MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR) | \
92 (1ull << MLX4_EVENT_TYPE_PORT_CHANGE) | \
93 (1ull << MLX4_EVENT_TYPE_ECC_DETECT) | \
94 (1ull << MLX4_EVENT_TYPE_SRQ_CATAS_ERROR) | \
95 (1ull << MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE) | \
96 (1ull << MLX4_EVENT_TYPE_SRQ_LIMIT) | \
97 (1ull << MLX4_EVENT_TYPE_CMD))
98#define MLX4_CATAS_EVENT_MASK (1ull << MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR)
99
100struct mlx4_eqe {
101 u8 reserved1;
102 u8 type;
103 u8 reserved2;
104 u8 subtype;
105 union {
106 u32 raw[6];
107 struct {
108 __be32 cqn;
109 } __attribute__((packed)) comp;
110 struct {
111 u16 reserved1;
112 __be16 token;
113 u32 reserved2;
114 u8 reserved3[3];
115 u8 status;
116 __be64 out_param;
117 } __attribute__((packed)) cmd;
118 struct {
119 __be32 qpn;
120 } __attribute__((packed)) qp;
121 struct {
122 __be32 srqn;
123 } __attribute__((packed)) srq;
124 struct {
125 __be32 cqn;
126 u32 reserved1;
127 u8 reserved2[3];
128 u8 syndrome;
129 } __attribute__((packed)) cq_err;
130 struct {
131 u32 reserved1[2];
132 __be32 port;
133 } __attribute__((packed)) port_change;
134 } event;
135 u8 reserved3[3];
136 u8 owner;
137} __attribute__((packed));
138
139static void eq_set_ci(struct mlx4_eq *eq, int req_not)
140{
141 __raw_writel((__force u32) cpu_to_be32((eq->cons_index & 0xffffff) |
142 req_not << 31),
143 eq->doorbell);
144 /* We still want ordering, just not swabbing, so add a barrier */
145 mb();
146}
147
148static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry)
149{
150 unsigned long off = (entry & (eq->nent - 1)) * MLX4_EQ_ENTRY_SIZE;
151 return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE;
152}
153
154static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq)
155{
156 struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index);
157 return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe;
158}
159
160static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
161{
162 struct mlx4_eqe *eqe;
163 int cqn;
164 int eqes_found = 0;
165 int set_ci = 0;
166
167 while ((eqe = next_eqe_sw(eq))) {
168 /*
169 * Make sure we read EQ entry contents after we've
170 * checked the ownership bit.
171 */
172 rmb();
173
174 switch (eqe->type) {
175 case MLX4_EVENT_TYPE_COMP:
176 cqn = be32_to_cpu(eqe->event.comp.cqn) & 0xffffff;
177 mlx4_cq_completion(dev, cqn);
178 break;
179
180 case MLX4_EVENT_TYPE_PATH_MIG:
181 case MLX4_EVENT_TYPE_COMM_EST:
182 case MLX4_EVENT_TYPE_SQ_DRAINED:
183 case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:
184 case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:
185 case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
186 case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
187 case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
188 mlx4_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
189 eqe->type);
190 break;
191
192 case MLX4_EVENT_TYPE_SRQ_LIMIT:
193 case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR:
194 mlx4_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) & 0xffffff,
195 eqe->type);
196 break;
197
198 case MLX4_EVENT_TYPE_CMD:
199 mlx4_cmd_event(dev,
200 be16_to_cpu(eqe->event.cmd.token),
201 eqe->event.cmd.status,
202 be64_to_cpu(eqe->event.cmd.out_param));
203 break;
204
205 case MLX4_EVENT_TYPE_PORT_CHANGE:
206 mlx4_dispatch_event(dev, eqe->type, eqe->subtype,
207 be32_to_cpu(eqe->event.port_change.port) >> 28);
208 break;
209
210 case MLX4_EVENT_TYPE_CQ_ERROR:
211 mlx4_warn(dev, "CQ %s on CQN %06x\n",
212 eqe->event.cq_err.syndrome == 1 ?
213 "overrun" : "access violation",
214 be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff);
215 mlx4_cq_event(dev, be32_to_cpu(eqe->event.cq_err.cqn),
216 eqe->type);
217 break;
218
219 case MLX4_EVENT_TYPE_EQ_OVERFLOW:
220 mlx4_warn(dev, "EQ overrun on EQN %d\n", eq->eqn);
221 break;
222
223 case MLX4_EVENT_TYPE_EEC_CATAS_ERROR:
224 case MLX4_EVENT_TYPE_ECC_DETECT:
225 default:
226 mlx4_warn(dev, "Unhandled event %02x(%02x) on EQ %d at index %u\n",
227 eqe->type, eqe->subtype, eq->eqn, eq->cons_index);
228 break;
229 };
230
231 ++eq->cons_index;
232 eqes_found = 1;
233 ++set_ci;
234
235 /*
236 * The HCA will think the queue has overflowed if we
237 * don't tell it we've been processing events. We
238 * create our EQs with MLX4_NUM_SPARE_EQE extra
239 * entries, so we must update our consumer index at
240 * least that often.
241 */
242 if (unlikely(set_ci >= MLX4_NUM_SPARE_EQE)) {
243 /*
244 * Conditional on hca_type is OK here because
245 * this is a rare case, not the fast path.
246 */
247 eq_set_ci(eq, 0);
248 set_ci = 0;
249 }
250 }
251
252 eq_set_ci(eq, 1);
253
254 return eqes_found;
255}
256
257static irqreturn_t mlx4_interrupt(int irq, void *dev_ptr)
258{
259 struct mlx4_dev *dev = dev_ptr;
260 struct mlx4_priv *priv = mlx4_priv(dev);
261 int work = 0;
262 int i;
263
264 writel(priv->eq_table.clr_mask, priv->eq_table.clr_int);
265
266 for (i = 0; i < MLX4_EQ_CATAS; ++i)
267 work |= mlx4_eq_int(dev, &priv->eq_table.eq[i]);
268
269 return IRQ_RETVAL(work);
270}
271
272static irqreturn_t mlx4_msi_x_interrupt(int irq, void *eq_ptr)
273{
274 struct mlx4_eq *eq = eq_ptr;
275 struct mlx4_dev *dev = eq->dev;
276
277 mlx4_eq_int(dev, eq);
278
279 /* MSI-X vectors always belong to us */
280 return IRQ_HANDLED;
281}
282
283static irqreturn_t mlx4_catas_interrupt(int irq, void *dev_ptr)
284{
285 mlx4_handle_catas_err(dev_ptr);
286
287 /* MSI-X vectors always belong to us */
288 return IRQ_HANDLED;
289}
290
291static int mlx4_MAP_EQ(struct mlx4_dev *dev, u64 event_mask, int unmap,
292 int eq_num)
293{
294 return mlx4_cmd(dev, event_mask, (unmap << 31) | eq_num,
295 0, MLX4_CMD_MAP_EQ, MLX4_CMD_TIME_CLASS_B);
296}
297
298static int mlx4_SW2HW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
299 int eq_num)
300{
301 return mlx4_cmd(dev, mailbox->dma, eq_num, 0, MLX4_CMD_SW2HW_EQ,
302 MLX4_CMD_TIME_CLASS_A);
303}
304
305static int mlx4_HW2SW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
306 int eq_num)
307{
308 return mlx4_cmd_box(dev, 0, mailbox->dma, eq_num, 0, MLX4_CMD_HW2SW_EQ,
309 MLX4_CMD_TIME_CLASS_A);
310}
311
312static void __devinit __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev,
313 struct mlx4_eq *eq)
314{
315 struct mlx4_priv *priv = mlx4_priv(dev);
316 int index;
317
318 index = eq->eqn / 4 - dev->caps.reserved_eqs / 4;
319
320 if (!priv->eq_table.uar_map[index]) {
321 priv->eq_table.uar_map[index] =
322 ioremap(pci_resource_start(dev->pdev, 2) +
323 ((eq->eqn / 4) << PAGE_SHIFT),
324 PAGE_SIZE);
325 if (!priv->eq_table.uar_map[index]) {
326 mlx4_err(dev, "Couldn't map EQ doorbell for EQN 0x%06x\n",
327 eq->eqn);
328 return NULL;
329 }
330 }
331
332 return priv->eq_table.uar_map[index] + 0x800 + 8 * (eq->eqn % 4);
333}
334
335static int __devinit mlx4_create_eq(struct mlx4_dev *dev, int nent,
336 u8 intr, struct mlx4_eq *eq)
337{
338 struct mlx4_priv *priv = mlx4_priv(dev);
339 struct mlx4_cmd_mailbox *mailbox;
340 struct mlx4_eq_context *eq_context;
341 int npages;
342 u64 *dma_list = NULL;
343 dma_addr_t t;
344 u64 mtt_addr;
345 int err = -ENOMEM;
346 int i;
347
348 eq->dev = dev;
349 eq->nent = roundup_pow_of_two(max(nent, 2));
350 npages = PAGE_ALIGN(eq->nent * MLX4_EQ_ENTRY_SIZE) / PAGE_SIZE;
351
352 eq->page_list = kmalloc(npages * sizeof *eq->page_list,
353 GFP_KERNEL);
354 if (!eq->page_list)
355 goto err_out;
356
357 for (i = 0; i < npages; ++i)
358 eq->page_list[i].buf = NULL;
359
360 dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
361 if (!dma_list)
362 goto err_out_free;
363
364 mailbox = mlx4_alloc_cmd_mailbox(dev);
365 if (IS_ERR(mailbox))
366 goto err_out_free;
367 eq_context = mailbox->buf;
368
369 for (i = 0; i < npages; ++i) {
370 eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev,
371 PAGE_SIZE, &t, GFP_KERNEL);
372 if (!eq->page_list[i].buf)
373 goto err_out_free_pages;
374
375 dma_list[i] = t;
376 eq->page_list[i].map = t;
377
378 memset(eq->page_list[i].buf, 0, PAGE_SIZE);
379 }
380
381 eq->eqn = mlx4_bitmap_alloc(&priv->eq_table.bitmap);
382 if (eq->eqn == -1)
383 goto err_out_free_pages;
384
385 eq->doorbell = mlx4_get_eq_uar(dev, eq);
386 if (!eq->doorbell) {
387 err = -ENOMEM;
388 goto err_out_free_eq;
389 }
390
391 err = mlx4_mtt_init(dev, npages, PAGE_SHIFT, &eq->mtt);
392 if (err)
393 goto err_out_free_eq;
394
395 err = mlx4_write_mtt(dev, &eq->mtt, 0, npages, dma_list);
396 if (err)
397 goto err_out_free_mtt;
398
399 memset(eq_context, 0, sizeof *eq_context);
400 eq_context->flags = cpu_to_be32(MLX4_EQ_STATUS_OK |
401 MLX4_EQ_STATE_ARMED);
402 eq_context->log_eq_size = ilog2(eq->nent);
403 eq_context->intr = intr;
404 eq_context->log_page_size = PAGE_SHIFT - MLX4_ICM_PAGE_SHIFT;
405
406 mtt_addr = mlx4_mtt_addr(dev, &eq->mtt);
407 eq_context->mtt_base_addr_h = mtt_addr >> 32;
408 eq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff);
409
410 err = mlx4_SW2HW_EQ(dev, mailbox, eq->eqn);
411 if (err) {
412 mlx4_warn(dev, "SW2HW_EQ failed (%d)\n", err);
413 goto err_out_free_mtt;
414 }
415
416 kfree(dma_list);
417 mlx4_free_cmd_mailbox(dev, mailbox);
418
419 eq->cons_index = 0;
420
421 return err;
422
423err_out_free_mtt:
424 mlx4_mtt_cleanup(dev, &eq->mtt);
425
426err_out_free_eq:
427 mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn);
428
429err_out_free_pages:
430 for (i = 0; i < npages; ++i)
431 if (eq->page_list[i].buf)
432 dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
433 eq->page_list[i].buf,
434 eq->page_list[i].map);
435
436 mlx4_free_cmd_mailbox(dev, mailbox);
437
438err_out_free:
439 kfree(eq->page_list);
440 kfree(dma_list);
441
442err_out:
443 return err;
444}
445
446static void mlx4_free_eq(struct mlx4_dev *dev,
447 struct mlx4_eq *eq)
448{
449 struct mlx4_priv *priv = mlx4_priv(dev);
450 struct mlx4_cmd_mailbox *mailbox;
451 int err;
452 int npages = PAGE_ALIGN(MLX4_EQ_ENTRY_SIZE * eq->nent) / PAGE_SIZE;
453 int i;
454
455 mailbox = mlx4_alloc_cmd_mailbox(dev);
456 if (IS_ERR(mailbox))
457 return;
458
459 err = mlx4_HW2SW_EQ(dev, mailbox, eq->eqn);
460 if (err)
461 mlx4_warn(dev, "HW2SW_EQ failed (%d)\n", err);
462
463 if (0) {
464 mlx4_dbg(dev, "Dumping EQ context %02x:\n", eq->eqn);
465 for (i = 0; i < sizeof (struct mlx4_eq_context) / 4; ++i) {
466 if (i % 4 == 0)
467 printk("[%02x] ", i * 4);
468 printk(" %08x", be32_to_cpup(mailbox->buf + i * 4));
469 if ((i + 1) % 4 == 0)
470 printk("\n");
471 }
472 }
473
474 mlx4_mtt_cleanup(dev, &eq->mtt);
475 for (i = 0; i < npages; ++i)
476 pci_free_consistent(dev->pdev, PAGE_SIZE,
477 eq->page_list[i].buf,
478 eq->page_list[i].map);
479
480 kfree(eq->page_list);
481 mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn);
482 mlx4_free_cmd_mailbox(dev, mailbox);
483}
484
485static void mlx4_free_irqs(struct mlx4_dev *dev)
486{
487 struct mlx4_eq_table *eq_table = &mlx4_priv(dev)->eq_table;
488 int i;
489
490 if (eq_table->have_irq)
491 free_irq(dev->pdev->irq, dev);
492 for (i = 0; i < MLX4_NUM_EQ; ++i)
493 if (eq_table->eq[i].have_irq)
494 free_irq(eq_table->eq[i].irq, eq_table->eq + i);
495}
496
497static int __devinit mlx4_map_clr_int(struct mlx4_dev *dev)
498{
499 struct mlx4_priv *priv = mlx4_priv(dev);
500
501 priv->clr_base = ioremap(pci_resource_start(dev->pdev, priv->fw.clr_int_bar) +
502 priv->fw.clr_int_base, MLX4_CLR_INT_SIZE);
503 if (!priv->clr_base) {
504 mlx4_err(dev, "Couldn't map interrupt clear register, aborting.\n");
505 return -ENOMEM;
506 }
507
508 return 0;
509}
510
511static void mlx4_unmap_clr_int(struct mlx4_dev *dev)
512{
513 struct mlx4_priv *priv = mlx4_priv(dev);
514
515 iounmap(priv->clr_base);
516}
517
518int __devinit mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt)
519{
520 struct mlx4_priv *priv = mlx4_priv(dev);
521 int ret;
522
523 /*
524 * We assume that mapping one page is enough for the whole EQ
525 * context table. This is fine with all current HCAs, because
526 * we only use 32 EQs and each EQ uses 64 bytes of context
527 * memory, or 1 KB total.
528 */
529 priv->eq_table.icm_virt = icm_virt;
530 priv->eq_table.icm_page = alloc_page(GFP_HIGHUSER);
531 if (!priv->eq_table.icm_page)
532 return -ENOMEM;
533 priv->eq_table.icm_dma = pci_map_page(dev->pdev, priv->eq_table.icm_page, 0,
534 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
535 if (pci_dma_mapping_error(priv->eq_table.icm_dma)) {
536 __free_page(priv->eq_table.icm_page);
537 return -ENOMEM;
538 }
539
540 ret = mlx4_MAP_ICM_page(dev, priv->eq_table.icm_dma, icm_virt);
541 if (ret) {
542 pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, PAGE_SIZE,
543 PCI_DMA_BIDIRECTIONAL);
544 __free_page(priv->eq_table.icm_page);
545 }
546
547 return ret;
548}
549
550void mlx4_unmap_eq_icm(struct mlx4_dev *dev)
551{
552 struct mlx4_priv *priv = mlx4_priv(dev);
553
554 mlx4_UNMAP_ICM(dev, priv->eq_table.icm_virt, 1);
555 pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, PAGE_SIZE,
556 PCI_DMA_BIDIRECTIONAL);
557 __free_page(priv->eq_table.icm_page);
558}
559
560int __devinit mlx4_init_eq_table(struct mlx4_dev *dev)
561{
562 struct mlx4_priv *priv = mlx4_priv(dev);
563 int err;
564 int i;
565
566 err = mlx4_bitmap_init(&priv->eq_table.bitmap, dev->caps.num_eqs,
567 dev->caps.num_eqs - 1, dev->caps.reserved_eqs);
568 if (err)
569 return err;
570
571 for (i = 0; i < ARRAY_SIZE(priv->eq_table.uar_map); ++i)
572 priv->eq_table.uar_map[i] = NULL;
573
574 err = mlx4_map_clr_int(dev);
575 if (err)
576 goto err_out_free;
577
578 priv->eq_table.clr_mask =
579 swab32(1 << (priv->eq_table.inta_pin & 31));
580 priv->eq_table.clr_int = priv->clr_base +
581 (priv->eq_table.inta_pin < 32 ? 4 : 0);
582
583 err = mlx4_create_eq(dev, dev->caps.num_cqs + MLX4_NUM_SPARE_EQE,
584 (dev->flags & MLX4_FLAG_MSI_X) ? MLX4_EQ_COMP : 0,
585 &priv->eq_table.eq[MLX4_EQ_COMP]);
586 if (err)
587 goto err_out_unmap;
588
589 err = mlx4_create_eq(dev, MLX4_NUM_ASYNC_EQE + MLX4_NUM_SPARE_EQE,
590 (dev->flags & MLX4_FLAG_MSI_X) ? MLX4_EQ_ASYNC : 0,
591 &priv->eq_table.eq[MLX4_EQ_ASYNC]);
592 if (err)
593 goto err_out_comp;
594
595 if (dev->flags & MLX4_FLAG_MSI_X) {
596 static const char *eq_name[] = {
597 [MLX4_EQ_COMP] = DRV_NAME " (comp)",
598 [MLX4_EQ_ASYNC] = DRV_NAME " (async)",
599 [MLX4_EQ_CATAS] = DRV_NAME " (catas)"
600 };
601
602 err = mlx4_create_eq(dev, 1, MLX4_EQ_CATAS,
603 &priv->eq_table.eq[MLX4_EQ_CATAS]);
604 if (err)
605 goto err_out_async;
606
607 for (i = 0; i < MLX4_EQ_CATAS; ++i) {
608 err = request_irq(priv->eq_table.eq[i].irq,
609 mlx4_msi_x_interrupt,
610 0, eq_name[i], priv->eq_table.eq + i);
611 if (err)
612 goto err_out_catas;
613
614 priv->eq_table.eq[i].have_irq = 1;
615 }
616
617 err = request_irq(priv->eq_table.eq[MLX4_EQ_CATAS].irq,
618 mlx4_catas_interrupt, 0,
619 eq_name[MLX4_EQ_CATAS], dev);
620 if (err)
621 goto err_out_catas;
622
623 priv->eq_table.eq[MLX4_EQ_CATAS].have_irq = 1;
624 } else {
625 err = request_irq(dev->pdev->irq, mlx4_interrupt,
626 SA_SHIRQ, DRV_NAME, dev);
627 if (err)
628 goto err_out_async;
629
630 priv->eq_table.have_irq = 1;
631 }
632
633 err = mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0,
634 priv->eq_table.eq[MLX4_EQ_ASYNC].eqn);
635 if (err)
636 mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n",
637 priv->eq_table.eq[MLX4_EQ_ASYNC].eqn, err);
638
639 for (i = 0; i < MLX4_EQ_CATAS; ++i)
640 eq_set_ci(&priv->eq_table.eq[i], 1);
641
642 if (dev->flags & MLX4_FLAG_MSI_X) {
643 err = mlx4_MAP_EQ(dev, MLX4_CATAS_EVENT_MASK, 0,
644 priv->eq_table.eq[MLX4_EQ_CATAS].eqn);
645 if (err)
646 mlx4_warn(dev, "MAP_EQ for catas EQ %d failed (%d)\n",
647 priv->eq_table.eq[MLX4_EQ_CATAS].eqn, err);
648 }
649
650 return 0;
651
652err_out_catas:
653 mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_CATAS]);
654
655err_out_async:
656 mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_ASYNC]);
657
658err_out_comp:
659 mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_COMP]);
660
661err_out_unmap:
662 mlx4_unmap_clr_int(dev);
663 mlx4_free_irqs(dev);
664
665err_out_free:
666 mlx4_bitmap_cleanup(&priv->eq_table.bitmap);
667 return err;
668}
669
670void mlx4_cleanup_eq_table(struct mlx4_dev *dev)
671{
672 struct mlx4_priv *priv = mlx4_priv(dev);
673 int i;
674
675 if (dev->flags & MLX4_FLAG_MSI_X)
676 mlx4_MAP_EQ(dev, MLX4_CATAS_EVENT_MASK, 1,
677 priv->eq_table.eq[MLX4_EQ_CATAS].eqn);
678
679 mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 1,
680 priv->eq_table.eq[MLX4_EQ_ASYNC].eqn);
681
682 mlx4_free_irqs(dev);
683
684 for (i = 0; i < MLX4_EQ_CATAS; ++i)
685 mlx4_free_eq(dev, &priv->eq_table.eq[i]);
686 if (dev->flags & MLX4_FLAG_MSI_X)
687 mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_CATAS]);
688
689 mlx4_unmap_clr_int(dev);
690
691 for (i = 0; i < ARRAY_SIZE(priv->eq_table.uar_map); ++i)
692 if (priv->eq_table.uar_map[i])
693 iounmap(priv->eq_table.uar_map[i]);
694
695 mlx4_bitmap_cleanup(&priv->eq_table.bitmap);
696}
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
new file mode 100644
index 000000000000..c42717313663
--- /dev/null
+++ b/drivers/net/mlx4/fw.c
@@ -0,0 +1,775 @@
1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
4 * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34
35#include <linux/mlx4/cmd.h>
36
37#include "fw.h"
38#include "icm.h"
39
40extern void __buggy_use_of_MLX4_GET(void);
41extern void __buggy_use_of_MLX4_PUT(void);
42
43#define MLX4_GET(dest, source, offset) \
44 do { \
45 void *__p = (char *) (source) + (offset); \
46 switch (sizeof (dest)) { \
47 case 1: (dest) = *(u8 *) __p; break; \
48 case 2: (dest) = be16_to_cpup(__p); break; \
49 case 4: (dest) = be32_to_cpup(__p); break; \
50 case 8: (dest) = be64_to_cpup(__p); break; \
51 default: __buggy_use_of_MLX4_GET(); \
52 } \
53 } while (0)
54
55#define MLX4_PUT(dest, source, offset) \
56 do { \
57 void *__d = ((char *) (dest) + (offset)); \
58 switch (sizeof(source)) { \
59 case 1: *(u8 *) __d = (source); break; \
60 case 2: *(__be16 *) __d = cpu_to_be16(source); break; \
61 case 4: *(__be32 *) __d = cpu_to_be32(source); break; \
62 case 8: *(__be64 *) __d = cpu_to_be64(source); break; \
63 default: __buggy_use_of_MLX4_PUT(); \
64 } \
65 } while (0)
66
67static void dump_dev_cap_flags(struct mlx4_dev *dev, u32 flags)
68{
69 static const char *fname[] = {
70 [ 0] = "RC transport",
71 [ 1] = "UC transport",
72 [ 2] = "UD transport",
73 [ 3] = "SRC transport",
74 [ 4] = "reliable multicast",
75 [ 5] = "FCoIB support",
76 [ 6] = "SRQ support",
77 [ 7] = "IPoIB checksum offload",
78 [ 8] = "P_Key violation counter",
79 [ 9] = "Q_Key violation counter",
80 [10] = "VMM",
81 [16] = "MW support",
82 [17] = "APM support",
83 [18] = "Atomic ops support",
84 [19] = "Raw multicast support",
85 [20] = "Address vector port checking support",
86 [21] = "UD multicast support",
87 [24] = "Demand paging support",
88 [25] = "Router support"
89 };
90 int i;
91
92 mlx4_dbg(dev, "DEV_CAP flags:\n");
93 for (i = 0; i < 32; ++i)
94 if (fname[i] && (flags & (1 << i)))
95 mlx4_dbg(dev, " %s\n", fname[i]);
96}
97
98int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
99{
100 struct mlx4_cmd_mailbox *mailbox;
101 u32 *outbox;
102 u8 field;
103 u16 size;
104 u16 stat_rate;
105 int err;
106
107#define QUERY_DEV_CAP_OUT_SIZE 0x100
108#define QUERY_DEV_CAP_MAX_SRQ_SZ_OFFSET 0x10
109#define QUERY_DEV_CAP_MAX_QP_SZ_OFFSET 0x11
110#define QUERY_DEV_CAP_RSVD_QP_OFFSET 0x12
111#define QUERY_DEV_CAP_MAX_QP_OFFSET 0x13
112#define QUERY_DEV_CAP_RSVD_SRQ_OFFSET 0x14
113#define QUERY_DEV_CAP_MAX_SRQ_OFFSET 0x15
114#define QUERY_DEV_CAP_RSVD_EEC_OFFSET 0x16
115#define QUERY_DEV_CAP_MAX_EEC_OFFSET 0x17
116#define QUERY_DEV_CAP_MAX_CQ_SZ_OFFSET 0x19
117#define QUERY_DEV_CAP_RSVD_CQ_OFFSET 0x1a
118#define QUERY_DEV_CAP_MAX_CQ_OFFSET 0x1b
119#define QUERY_DEV_CAP_MAX_MPT_OFFSET 0x1d
120#define QUERY_DEV_CAP_RSVD_EQ_OFFSET 0x1e
121#define QUERY_DEV_CAP_MAX_EQ_OFFSET 0x1f
122#define QUERY_DEV_CAP_RSVD_MTT_OFFSET 0x20
123#define QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET 0x21
124#define QUERY_DEV_CAP_RSVD_MRW_OFFSET 0x22
125#define QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET 0x23
126#define QUERY_DEV_CAP_MAX_AV_OFFSET 0x27
127#define QUERY_DEV_CAP_MAX_REQ_QP_OFFSET 0x29
128#define QUERY_DEV_CAP_MAX_RES_QP_OFFSET 0x2b
129#define QUERY_DEV_CAP_MAX_RDMA_OFFSET 0x2f
130#define QUERY_DEV_CAP_RSZ_SRQ_OFFSET 0x33
131#define QUERY_DEV_CAP_ACK_DELAY_OFFSET 0x35
132#define QUERY_DEV_CAP_MTU_WIDTH_OFFSET 0x36
133#define QUERY_DEV_CAP_VL_PORT_OFFSET 0x37
134#define QUERY_DEV_CAP_MAX_GID_OFFSET 0x3b
135#define QUERY_DEV_CAP_RATE_SUPPORT_OFFSET 0x3c
136#define QUERY_DEV_CAP_MAX_PKEY_OFFSET 0x3f
137#define QUERY_DEV_CAP_FLAGS_OFFSET 0x44
138#define QUERY_DEV_CAP_RSVD_UAR_OFFSET 0x48
139#define QUERY_DEV_CAP_UAR_SZ_OFFSET 0x49
140#define QUERY_DEV_CAP_PAGE_SZ_OFFSET 0x4b
141#define QUERY_DEV_CAP_BF_OFFSET 0x4c
142#define QUERY_DEV_CAP_LOG_BF_REG_SZ_OFFSET 0x4d
143#define QUERY_DEV_CAP_LOG_MAX_BF_REGS_PER_PAGE_OFFSET 0x4e
144#define QUERY_DEV_CAP_LOG_MAX_BF_PAGES_OFFSET 0x4f
145#define QUERY_DEV_CAP_MAX_SG_SQ_OFFSET 0x51
146#define QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET 0x52
147#define QUERY_DEV_CAP_MAX_SG_RQ_OFFSET 0x55
148#define QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET 0x56
149#define QUERY_DEV_CAP_MAX_QP_MCG_OFFSET 0x61
150#define QUERY_DEV_CAP_RSVD_MCG_OFFSET 0x62
151#define QUERY_DEV_CAP_MAX_MCG_OFFSET 0x63
152#define QUERY_DEV_CAP_RSVD_PD_OFFSET 0x64
153#define QUERY_DEV_CAP_MAX_PD_OFFSET 0x65
154#define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80
155#define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET 0x82
156#define QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET 0x84
157#define QUERY_DEV_CAP_ALTC_ENTRY_SZ_OFFSET 0x86
158#define QUERY_DEV_CAP_EQC_ENTRY_SZ_OFFSET 0x88
159#define QUERY_DEV_CAP_CQC_ENTRY_SZ_OFFSET 0x8a
160#define QUERY_DEV_CAP_SRQ_ENTRY_SZ_OFFSET 0x8c
161#define QUERY_DEV_CAP_C_MPT_ENTRY_SZ_OFFSET 0x8e
162#define QUERY_DEV_CAP_MTT_ENTRY_SZ_OFFSET 0x90
163#define QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET 0x92
164#define QUERY_DEV_CAP_BMME_FLAGS_OFFSET 0x97
165#define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98
166#define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0
167
168 mailbox = mlx4_alloc_cmd_mailbox(dev);
169 if (IS_ERR(mailbox))
170 return PTR_ERR(mailbox);
171 outbox = mailbox->buf;
172
173 err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_DEV_CAP,
174 MLX4_CMD_TIME_CLASS_A);
175
176 if (err)
177 goto out;
178
179 MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_QP_OFFSET);
180 dev_cap->reserved_qps = 1 << (field & 0xf);
181 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_OFFSET);
182 dev_cap->max_qps = 1 << (field & 0x1f);
183 MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_SRQ_OFFSET);
184 dev_cap->reserved_srqs = 1 << (field >> 4);
185 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SRQ_OFFSET);
186 dev_cap->max_srqs = 1 << (field & 0x1f);
187 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_CQ_SZ_OFFSET);
188 dev_cap->max_cq_sz = 1 << field;
189 MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_CQ_OFFSET);
190 dev_cap->reserved_cqs = 1 << (field & 0xf);
191 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_CQ_OFFSET);
192 dev_cap->max_cqs = 1 << (field & 0x1f);
193 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MPT_OFFSET);
194 dev_cap->max_mpts = 1 << (field & 0x3f);
195 MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_EQ_OFFSET);
196 dev_cap->reserved_eqs = 1 << (field & 0xf);
197 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_EQ_OFFSET);
198 dev_cap->max_eqs = 1 << (field & 0x7);
199 MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET);
200 dev_cap->reserved_mtts = 1 << (field >> 4);
201 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET);
202 dev_cap->max_mrw_sz = 1 << field;
203 MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MRW_OFFSET);
204 dev_cap->reserved_mrws = 1 << (field & 0xf);
205 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET);
206 dev_cap->max_mtt_seg = 1 << (field & 0x3f);
207 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_REQ_QP_OFFSET);
208 dev_cap->max_requester_per_qp = 1 << (field & 0x3f);
209 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RES_QP_OFFSET);
210 dev_cap->max_responder_per_qp = 1 << (field & 0x3f);
211 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RDMA_OFFSET);
212 dev_cap->max_rdma_global = 1 << (field & 0x3f);
213 MLX4_GET(field, outbox, QUERY_DEV_CAP_ACK_DELAY_OFFSET);
214 dev_cap->local_ca_ack_delay = field & 0x1f;
215 MLX4_GET(field, outbox, QUERY_DEV_CAP_MTU_WIDTH_OFFSET);
216 dev_cap->max_mtu = field >> 4;
217 dev_cap->max_port_width = field & 0xf;
218 MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET);
219 dev_cap->max_vl = field >> 4;
220 dev_cap->num_ports = field & 0xf;
221 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_GID_OFFSET);
222 dev_cap->max_gids = 1 << (field & 0xf);
223 MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET);
224 dev_cap->stat_rate_support = stat_rate;
225 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PKEY_OFFSET);
226 dev_cap->max_pkeys = 1 << (field & 0xf);
227 MLX4_GET(dev_cap->flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET);
228 MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_UAR_OFFSET);
229 dev_cap->reserved_uars = field >> 4;
230 MLX4_GET(field, outbox, QUERY_DEV_CAP_UAR_SZ_OFFSET);
231 dev_cap->uar_size = 1 << ((field & 0x3f) + 20);
232 MLX4_GET(field, outbox, QUERY_DEV_CAP_PAGE_SZ_OFFSET);
233 dev_cap->min_page_sz = 1 << field;
234
235 MLX4_GET(field, outbox, QUERY_DEV_CAP_BF_OFFSET);
236 if (field & 0x80) {
237 MLX4_GET(field, outbox, QUERY_DEV_CAP_LOG_BF_REG_SZ_OFFSET);
238 dev_cap->bf_reg_size = 1 << (field & 0x1f);
239 MLX4_GET(field, outbox, QUERY_DEV_CAP_LOG_MAX_BF_REGS_PER_PAGE_OFFSET);
240 dev_cap->bf_regs_per_page = 1 << (field & 0x3f);
241 mlx4_dbg(dev, "BlueFlame available (reg size %d, regs/page %d)\n",
242 dev_cap->bf_reg_size, dev_cap->bf_regs_per_page);
243 } else {
244 dev_cap->bf_reg_size = 0;
245 mlx4_dbg(dev, "BlueFlame not available\n");
246 }
247
248 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SG_SQ_OFFSET);
249 dev_cap->max_sq_sg = field;
250 MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET);
251 dev_cap->max_sq_desc_sz = size;
252
253 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_MCG_OFFSET);
254 dev_cap->max_qp_per_mcg = 1 << field;
255 MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MCG_OFFSET);
256 dev_cap->reserved_mgms = field & 0xf;
257 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MCG_OFFSET);
258 dev_cap->max_mcgs = 1 << field;
259 MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_PD_OFFSET);
260 dev_cap->reserved_pds = field >> 4;
261 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PD_OFFSET);
262 dev_cap->max_pds = 1 << (field & 0x3f);
263
264 MLX4_GET(size, outbox, QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET);
265 dev_cap->rdmarc_entry_sz = size;
266 MLX4_GET(size, outbox, QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET);
267 dev_cap->qpc_entry_sz = size;
268 MLX4_GET(size, outbox, QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET);
269 dev_cap->aux_entry_sz = size;
270 MLX4_GET(size, outbox, QUERY_DEV_CAP_ALTC_ENTRY_SZ_OFFSET);
271 dev_cap->altc_entry_sz = size;
272 MLX4_GET(size, outbox, QUERY_DEV_CAP_EQC_ENTRY_SZ_OFFSET);
273 dev_cap->eqc_entry_sz = size;
274 MLX4_GET(size, outbox, QUERY_DEV_CAP_CQC_ENTRY_SZ_OFFSET);
275 dev_cap->cqc_entry_sz = size;
276 MLX4_GET(size, outbox, QUERY_DEV_CAP_SRQ_ENTRY_SZ_OFFSET);
277 dev_cap->srq_entry_sz = size;
278 MLX4_GET(size, outbox, QUERY_DEV_CAP_C_MPT_ENTRY_SZ_OFFSET);
279 dev_cap->cmpt_entry_sz = size;
280 MLX4_GET(size, outbox, QUERY_DEV_CAP_MTT_ENTRY_SZ_OFFSET);
281 dev_cap->mtt_entry_sz = size;
282 MLX4_GET(size, outbox, QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET);
283 dev_cap->dmpt_entry_sz = size;
284
285 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SRQ_SZ_OFFSET);
286 dev_cap->max_srq_sz = 1 << field;
287 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_SZ_OFFSET);
288 dev_cap->max_qp_sz = 1 << field;
289 MLX4_GET(field, outbox, QUERY_DEV_CAP_RSZ_SRQ_OFFSET);
290 dev_cap->resize_srq = field & 1;
291 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SG_RQ_OFFSET);
292 dev_cap->max_rq_sg = field;
293 MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET);
294 dev_cap->max_rq_desc_sz = size;
295
296 MLX4_GET(dev_cap->bmme_flags, outbox,
297 QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
298 MLX4_GET(dev_cap->reserved_lkey, outbox,
299 QUERY_DEV_CAP_RSVD_LKEY_OFFSET);
300 MLX4_GET(dev_cap->max_icm_sz, outbox,
301 QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET);
302
303 if (dev_cap->bmme_flags & 1)
304 mlx4_dbg(dev, "Base MM extensions: yes "
305 "(flags %d, rsvd L_Key %08x)\n",
306 dev_cap->bmme_flags, dev_cap->reserved_lkey);
307 else
308 mlx4_dbg(dev, "Base MM extensions: no\n");
309
310 /*
311 * Each UAR has 4 EQ doorbells; so if a UAR is reserved, then
312 * we can't use any EQs whose doorbell falls on that page,
313 * even if the EQ itself isn't reserved.
314 */
315 dev_cap->reserved_eqs = max(dev_cap->reserved_uars * 4,
316 dev_cap->reserved_eqs);
317
318 mlx4_dbg(dev, "Max ICM size %lld MB\n",
319 (unsigned long long) dev_cap->max_icm_sz >> 20);
320 mlx4_dbg(dev, "Max QPs: %d, reserved QPs: %d, entry size: %d\n",
321 dev_cap->max_qps, dev_cap->reserved_qps, dev_cap->qpc_entry_sz);
322 mlx4_dbg(dev, "Max SRQs: %d, reserved SRQs: %d, entry size: %d\n",
323 dev_cap->max_srqs, dev_cap->reserved_srqs, dev_cap->srq_entry_sz);
324 mlx4_dbg(dev, "Max CQs: %d, reserved CQs: %d, entry size: %d\n",
325 dev_cap->max_cqs, dev_cap->reserved_cqs, dev_cap->cqc_entry_sz);
326 mlx4_dbg(dev, "Max EQs: %d, reserved EQs: %d, entry size: %d\n",
327 dev_cap->max_eqs, dev_cap->reserved_eqs, dev_cap->eqc_entry_sz);
328 mlx4_dbg(dev, "reserved MPTs: %d, reserved MTTs: %d\n",
329 dev_cap->reserved_mrws, dev_cap->reserved_mtts);
330 mlx4_dbg(dev, "Max PDs: %d, reserved PDs: %d, reserved UARs: %d\n",
331 dev_cap->max_pds, dev_cap->reserved_pds, dev_cap->reserved_uars);
332 mlx4_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n",
333 dev_cap->max_pds, dev_cap->reserved_mgms);
334 mlx4_dbg(dev, "Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n",
335 dev_cap->max_cq_sz, dev_cap->max_qp_sz, dev_cap->max_srq_sz);
336 mlx4_dbg(dev, "Local CA ACK delay: %d, max MTU: %d, port width cap: %d\n",
337 dev_cap->local_ca_ack_delay, 128 << dev_cap->max_mtu,
338 dev_cap->max_port_width);
339 mlx4_dbg(dev, "Max SQ desc size: %d, max SQ S/G: %d\n",
340 dev_cap->max_sq_desc_sz, dev_cap->max_sq_sg);
341 mlx4_dbg(dev, "Max RQ desc size: %d, max RQ S/G: %d\n",
342 dev_cap->max_rq_desc_sz, dev_cap->max_rq_sg);
343
344 dump_dev_cap_flags(dev, dev_cap->flags);
345
346out:
347 mlx4_free_cmd_mailbox(dev, mailbox);
348 return err;
349}
350
351int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt)
352{
353 struct mlx4_cmd_mailbox *mailbox;
354 struct mlx4_icm_iter iter;
355 __be64 *pages;
356 int lg;
357 int nent = 0;
358 int i;
359 int err = 0;
360 int ts = 0, tc = 0;
361
362 mailbox = mlx4_alloc_cmd_mailbox(dev);
363 if (IS_ERR(mailbox))
364 return PTR_ERR(mailbox);
365 memset(mailbox->buf, 0, MLX4_MAILBOX_SIZE);
366 pages = mailbox->buf;
367
368 for (mlx4_icm_first(icm, &iter);
369 !mlx4_icm_last(&iter);
370 mlx4_icm_next(&iter)) {
371 /*
372 * We have to pass pages that are aligned to their
373 * size, so find the least significant 1 in the
374 * address or size and use that as our log2 size.
375 */
376 lg = ffs(mlx4_icm_addr(&iter) | mlx4_icm_size(&iter)) - 1;
377 if (lg < MLX4_ICM_PAGE_SHIFT) {
378 mlx4_warn(dev, "Got FW area not aligned to %d (%llx/%lx).\n",
379 MLX4_ICM_PAGE_SIZE,
380 (unsigned long long) mlx4_icm_addr(&iter),
381 mlx4_icm_size(&iter));
382 err = -EINVAL;
383 goto out;
384 }
385
386 for (i = 0; i < mlx4_icm_size(&iter) >> lg; ++i) {
387 if (virt != -1) {
388 pages[nent * 2] = cpu_to_be64(virt);
389 virt += 1 << lg;
390 }
391
392 pages[nent * 2 + 1] =
393 cpu_to_be64((mlx4_icm_addr(&iter) + (i << lg)) |
394 (lg - MLX4_ICM_PAGE_SHIFT));
395 ts += 1 << (lg - 10);
396 ++tc;
397
398 if (++nent == MLX4_MAILBOX_SIZE / 16) {
399 err = mlx4_cmd(dev, mailbox->dma, nent, 0, op,
400 MLX4_CMD_TIME_CLASS_B);
401 if (err)
402 goto out;
403 nent = 0;
404 }
405 }
406 }
407
408 if (nent)
409 err = mlx4_cmd(dev, mailbox->dma, nent, 0, op, MLX4_CMD_TIME_CLASS_B);
410 if (err)
411 goto out;
412
413 switch (op) {
414 case MLX4_CMD_MAP_FA:
415 mlx4_dbg(dev, "Mapped %d chunks/%d KB for FW.\n", tc, ts);
416 break;
417 case MLX4_CMD_MAP_ICM_AUX:
418 mlx4_dbg(dev, "Mapped %d chunks/%d KB for ICM aux.\n", tc, ts);
419 break;
420 case MLX4_CMD_MAP_ICM:
421 mlx4_dbg(dev, "Mapped %d chunks/%d KB at %llx for ICM.\n",
422 tc, ts, (unsigned long long) virt - (ts << 10));
423 break;
424 }
425
426out:
427 mlx4_free_cmd_mailbox(dev, mailbox);
428 return err;
429}
430
431int mlx4_MAP_FA(struct mlx4_dev *dev, struct mlx4_icm *icm)
432{
433 return mlx4_map_cmd(dev, MLX4_CMD_MAP_FA, icm, -1);
434}
435
436int mlx4_UNMAP_FA(struct mlx4_dev *dev)
437{
438 return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_FA, MLX4_CMD_TIME_CLASS_B);
439}
440
441
442int mlx4_RUN_FW(struct mlx4_dev *dev)
443{
444 return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_RUN_FW, MLX4_CMD_TIME_CLASS_A);
445}
446
447int mlx4_QUERY_FW(struct mlx4_dev *dev)
448{
449 struct mlx4_fw *fw = &mlx4_priv(dev)->fw;
450 struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd;
451 struct mlx4_cmd_mailbox *mailbox;
452 u32 *outbox;
453 int err = 0;
454 u64 fw_ver;
455 u8 lg;
456
457#define QUERY_FW_OUT_SIZE 0x100
458#define QUERY_FW_VER_OFFSET 0x00
459#define QUERY_FW_MAX_CMD_OFFSET 0x0f
460#define QUERY_FW_ERR_START_OFFSET 0x30
461#define QUERY_FW_ERR_SIZE_OFFSET 0x38
462#define QUERY_FW_ERR_BAR_OFFSET 0x3c
463
464#define QUERY_FW_SIZE_OFFSET 0x00
465#define QUERY_FW_CLR_INT_BASE_OFFSET 0x20
466#define QUERY_FW_CLR_INT_BAR_OFFSET 0x28
467
468 mailbox = mlx4_alloc_cmd_mailbox(dev);
469 if (IS_ERR(mailbox))
470 return PTR_ERR(mailbox);
471 outbox = mailbox->buf;
472
473 err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_FW,
474 MLX4_CMD_TIME_CLASS_A);
475 if (err)
476 goto out;
477
478 MLX4_GET(fw_ver, outbox, QUERY_FW_VER_OFFSET);
479 /*
480 * FW subminor version is at more signifant bits than minor
481 * version, so swap here.
482 */
483 dev->caps.fw_ver = (fw_ver & 0xffff00000000ull) |
484 ((fw_ver & 0xffff0000ull) >> 16) |
485 ((fw_ver & 0x0000ffffull) << 16);
486
487 MLX4_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET);
488 cmd->max_cmds = 1 << lg;
489
490 mlx4_dbg(dev, "FW version %d.%d.%03d, max commands %d\n",
491 (int) (dev->caps.fw_ver >> 32),
492 (int) (dev->caps.fw_ver >> 16) & 0xffff,
493 (int) dev->caps.fw_ver & 0xffff,
494 cmd->max_cmds);
495
496 MLX4_GET(fw->catas_offset, outbox, QUERY_FW_ERR_START_OFFSET);
497 MLX4_GET(fw->catas_size, outbox, QUERY_FW_ERR_SIZE_OFFSET);
498 MLX4_GET(fw->catas_bar, outbox, QUERY_FW_ERR_BAR_OFFSET);
499 fw->catas_bar = (fw->catas_bar >> 6) * 2;
500
501 mlx4_dbg(dev, "Catastrophic error buffer at 0x%llx, size 0x%x, BAR %d\n",
502 (unsigned long long) fw->catas_offset, fw->catas_size, fw->catas_bar);
503
504 MLX4_GET(fw->fw_pages, outbox, QUERY_FW_SIZE_OFFSET);
505 MLX4_GET(fw->clr_int_base, outbox, QUERY_FW_CLR_INT_BASE_OFFSET);
506 MLX4_GET(fw->clr_int_bar, outbox, QUERY_FW_CLR_INT_BAR_OFFSET);
507 fw->clr_int_bar = (fw->clr_int_bar >> 6) * 2;
508
509 mlx4_dbg(dev, "FW size %d KB\n", fw->fw_pages >> 2);
510
511 /*
512 * Round up number of system pages needed in case
513 * MLX4_ICM_PAGE_SIZE < PAGE_SIZE.
514 */
515 fw->fw_pages =
516 ALIGN(fw->fw_pages, PAGE_SIZE / MLX4_ICM_PAGE_SIZE) >>
517 (PAGE_SHIFT - MLX4_ICM_PAGE_SHIFT);
518
519 mlx4_dbg(dev, "Clear int @ %llx, BAR %d\n",
520 (unsigned long long) fw->clr_int_base, fw->clr_int_bar);
521
522out:
523 mlx4_free_cmd_mailbox(dev, mailbox);
524 return err;
525}
526
527static void get_board_id(void *vsd, char *board_id)
528{
529 int i;
530
531#define VSD_OFFSET_SIG1 0x00
532#define VSD_OFFSET_SIG2 0xde
533#define VSD_OFFSET_MLX_BOARD_ID 0xd0
534#define VSD_OFFSET_TS_BOARD_ID 0x20
535
536#define VSD_SIGNATURE_TOPSPIN 0x5ad
537
538 memset(board_id, 0, MLX4_BOARD_ID_LEN);
539
540 if (be16_to_cpup(vsd + VSD_OFFSET_SIG1) == VSD_SIGNATURE_TOPSPIN &&
541 be16_to_cpup(vsd + VSD_OFFSET_SIG2) == VSD_SIGNATURE_TOPSPIN) {
542 strlcpy(board_id, vsd + VSD_OFFSET_TS_BOARD_ID, MLX4_BOARD_ID_LEN);
543 } else {
544 /*
545 * The board ID is a string but the firmware byte
546 * swaps each 4-byte word before passing it back to
547 * us. Therefore we need to swab it before printing.
548 */
549 for (i = 0; i < 4; ++i)
550 ((u32 *) board_id)[i] =
551 swab32(*(u32 *) (vsd + VSD_OFFSET_MLX_BOARD_ID + i * 4));
552 }
553}
554
555int mlx4_QUERY_ADAPTER(struct mlx4_dev *dev, struct mlx4_adapter *adapter)
556{
557 struct mlx4_cmd_mailbox *mailbox;
558 u32 *outbox;
559 int err;
560
561#define QUERY_ADAPTER_OUT_SIZE 0x100
562#define QUERY_ADAPTER_VENDOR_ID_OFFSET 0x00
563#define QUERY_ADAPTER_DEVICE_ID_OFFSET 0x04
564#define QUERY_ADAPTER_REVISION_ID_OFFSET 0x08
565#define QUERY_ADAPTER_INTA_PIN_OFFSET 0x10
566#define QUERY_ADAPTER_VSD_OFFSET 0x20
567
568 mailbox = mlx4_alloc_cmd_mailbox(dev);
569 if (IS_ERR(mailbox))
570 return PTR_ERR(mailbox);
571 outbox = mailbox->buf;
572
573 err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_ADAPTER,
574 MLX4_CMD_TIME_CLASS_A);
575 if (err)
576 goto out;
577
578 MLX4_GET(adapter->vendor_id, outbox, QUERY_ADAPTER_VENDOR_ID_OFFSET);
579 MLX4_GET(adapter->device_id, outbox, QUERY_ADAPTER_DEVICE_ID_OFFSET);
580 MLX4_GET(adapter->revision_id, outbox, QUERY_ADAPTER_REVISION_ID_OFFSET);
581 MLX4_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET);
582
583 get_board_id(outbox + QUERY_ADAPTER_VSD_OFFSET / 4,
584 adapter->board_id);
585
586out:
587 mlx4_free_cmd_mailbox(dev, mailbox);
588 return err;
589}
590
591int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
592{
593 struct mlx4_cmd_mailbox *mailbox;
594 __be32 *inbox;
595 int err;
596
597#define INIT_HCA_IN_SIZE 0x200
598#define INIT_HCA_VERSION_OFFSET 0x000
599#define INIT_HCA_VERSION 2
600#define INIT_HCA_FLAGS_OFFSET 0x014
601#define INIT_HCA_QPC_OFFSET 0x020
602#define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10)
603#define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17)
604#define INIT_HCA_SRQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x28)
605#define INIT_HCA_LOG_SRQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x2f)
606#define INIT_HCA_CQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x30)
607#define INIT_HCA_LOG_CQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x37)
608#define INIT_HCA_ALTC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x40)
609#define INIT_HCA_AUXC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x50)
610#define INIT_HCA_EQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x60)
611#define INIT_HCA_LOG_EQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x67)
612#define INIT_HCA_RDMARC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x70)
613#define INIT_HCA_LOG_RD_OFFSET (INIT_HCA_QPC_OFFSET + 0x77)
614#define INIT_HCA_MCAST_OFFSET 0x0c0
615#define INIT_HCA_MC_BASE_OFFSET (INIT_HCA_MCAST_OFFSET + 0x00)
616#define INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x12)
617#define INIT_HCA_LOG_MC_HASH_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x16)
618#define INIT_HCA_LOG_MC_TABLE_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x1b)
619#define INIT_HCA_TPT_OFFSET 0x0f0
620#define INIT_HCA_DMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x00)
621#define INIT_HCA_LOG_MPT_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x0b)
622#define INIT_HCA_MTT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x10)
623#define INIT_HCA_CMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x18)
624#define INIT_HCA_UAR_OFFSET 0x120
625#define INIT_HCA_LOG_UAR_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0a)
626#define INIT_HCA_UAR_PAGE_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0b)
627
628 mailbox = mlx4_alloc_cmd_mailbox(dev);
629 if (IS_ERR(mailbox))
630 return PTR_ERR(mailbox);
631 inbox = mailbox->buf;
632
633 memset(inbox, 0, INIT_HCA_IN_SIZE);
634
635 *((u8 *) mailbox->buf + INIT_HCA_VERSION_OFFSET) = INIT_HCA_VERSION;
636
637#if defined(__LITTLE_ENDIAN)
638 *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1);
639#elif defined(__BIG_ENDIAN)
640 *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 1);
641#else
642#error Host endianness not defined
643#endif
644 /* Check port for UD address vector: */
645 *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1);
646
647 /* QPC/EEC/CQC/EQC/RDMARC attributes */
648
649 MLX4_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET);
650 MLX4_PUT(inbox, param->log_num_qps, INIT_HCA_LOG_QP_OFFSET);
651 MLX4_PUT(inbox, param->srqc_base, INIT_HCA_SRQC_BASE_OFFSET);
652 MLX4_PUT(inbox, param->log_num_srqs, INIT_HCA_LOG_SRQ_OFFSET);
653 MLX4_PUT(inbox, param->cqc_base, INIT_HCA_CQC_BASE_OFFSET);
654 MLX4_PUT(inbox, param->log_num_cqs, INIT_HCA_LOG_CQ_OFFSET);
655 MLX4_PUT(inbox, param->altc_base, INIT_HCA_ALTC_BASE_OFFSET);
656 MLX4_PUT(inbox, param->auxc_base, INIT_HCA_AUXC_BASE_OFFSET);
657 MLX4_PUT(inbox, param->eqc_base, INIT_HCA_EQC_BASE_OFFSET);
658 MLX4_PUT(inbox, param->log_num_eqs, INIT_HCA_LOG_EQ_OFFSET);
659 MLX4_PUT(inbox, param->rdmarc_base, INIT_HCA_RDMARC_BASE_OFFSET);
660 MLX4_PUT(inbox, param->log_rd_per_qp, INIT_HCA_LOG_RD_OFFSET);
661
662 /* multicast attributes */
663
664 MLX4_PUT(inbox, param->mc_base, INIT_HCA_MC_BASE_OFFSET);
665 MLX4_PUT(inbox, param->log_mc_entry_sz, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET);
666 MLX4_PUT(inbox, param->log_mc_hash_sz, INIT_HCA_LOG_MC_HASH_SZ_OFFSET);
667 MLX4_PUT(inbox, param->log_mc_table_sz, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
668
669 /* TPT attributes */
670
671 MLX4_PUT(inbox, param->dmpt_base, INIT_HCA_DMPT_BASE_OFFSET);
672 MLX4_PUT(inbox, param->log_mpt_sz, INIT_HCA_LOG_MPT_SZ_OFFSET);
673 MLX4_PUT(inbox, param->mtt_base, INIT_HCA_MTT_BASE_OFFSET);
674 MLX4_PUT(inbox, param->cmpt_base, INIT_HCA_CMPT_BASE_OFFSET);
675
676 /* UAR attributes */
677
678 MLX4_PUT(inbox, (u8) (PAGE_SHIFT - 12), INIT_HCA_UAR_PAGE_SZ_OFFSET);
679 MLX4_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET);
680
681 err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 1000);
682
683 if (err)
684 mlx4_err(dev, "INIT_HCA returns %d\n", err);
685
686 mlx4_free_cmd_mailbox(dev, mailbox);
687 return err;
688}
689
690int mlx4_INIT_PORT(struct mlx4_dev *dev, struct mlx4_init_port_param *param, int port)
691{
692 struct mlx4_cmd_mailbox *mailbox;
693 u32 *inbox;
694 int err;
695 u32 flags;
696
697#define INIT_PORT_IN_SIZE 256
698#define INIT_PORT_FLAGS_OFFSET 0x00
699#define INIT_PORT_FLAG_SIG (1 << 18)
700#define INIT_PORT_FLAG_NG (1 << 17)
701#define INIT_PORT_FLAG_G0 (1 << 16)
702#define INIT_PORT_VL_SHIFT 4
703#define INIT_PORT_PORT_WIDTH_SHIFT 8
704#define INIT_PORT_MTU_OFFSET 0x04
705#define INIT_PORT_MAX_GID_OFFSET 0x06
706#define INIT_PORT_MAX_PKEY_OFFSET 0x0a
707#define INIT_PORT_GUID0_OFFSET 0x10
708#define INIT_PORT_NODE_GUID_OFFSET 0x18
709#define INIT_PORT_SI_GUID_OFFSET 0x20
710
711 mailbox = mlx4_alloc_cmd_mailbox(dev);
712 if (IS_ERR(mailbox))
713 return PTR_ERR(mailbox);
714 inbox = mailbox->buf;
715
716 memset(inbox, 0, INIT_PORT_IN_SIZE);
717
718 flags = 0;
719 flags |= param->set_guid0 ? INIT_PORT_FLAG_G0 : 0;
720 flags |= param->set_node_guid ? INIT_PORT_FLAG_NG : 0;
721 flags |= param->set_si_guid ? INIT_PORT_FLAG_SIG : 0;
722 flags |= (param->vl_cap & 0xf) << INIT_PORT_VL_SHIFT;
723 flags |= (param->port_width_cap & 0xf) << INIT_PORT_PORT_WIDTH_SHIFT;
724 MLX4_PUT(inbox, flags, INIT_PORT_FLAGS_OFFSET);
725
726 MLX4_PUT(inbox, param->mtu, INIT_PORT_MTU_OFFSET);
727 MLX4_PUT(inbox, param->max_gid, INIT_PORT_MAX_GID_OFFSET);
728 MLX4_PUT(inbox, param->max_pkey, INIT_PORT_MAX_PKEY_OFFSET);
729 MLX4_PUT(inbox, param->guid0, INIT_PORT_GUID0_OFFSET);
730 MLX4_PUT(inbox, param->node_guid, INIT_PORT_NODE_GUID_OFFSET);
731 MLX4_PUT(inbox, param->si_guid, INIT_PORT_SI_GUID_OFFSET);
732
733 err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_INIT_PORT,
734 MLX4_CMD_TIME_CLASS_A);
735
736 mlx4_free_cmd_mailbox(dev, mailbox);
737
738 return err;
739}
740EXPORT_SYMBOL_GPL(mlx4_INIT_PORT);
741
742int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port)
743{
744 return mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, 1000);
745}
746EXPORT_SYMBOL_GPL(mlx4_CLOSE_PORT);
747
748int mlx4_CLOSE_HCA(struct mlx4_dev *dev, int panic)
749{
750 return mlx4_cmd(dev, 0, 0, panic, MLX4_CMD_CLOSE_HCA, 1000);
751}
752
753int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages)
754{
755 int ret = mlx4_cmd_imm(dev, icm_size, aux_pages, 0, 0,
756 MLX4_CMD_SET_ICM_SIZE,
757 MLX4_CMD_TIME_CLASS_A);
758 if (ret)
759 return ret;
760
761 /*
762 * Round up number of system pages needed in case
763 * MLX4_ICM_PAGE_SIZE < PAGE_SIZE.
764 */
765 *aux_pages = ALIGN(*aux_pages, PAGE_SIZE / MLX4_ICM_PAGE_SIZE) >>
766 (PAGE_SHIFT - MLX4_ICM_PAGE_SHIFT);
767
768 return 0;
769}
770
771int mlx4_NOP(struct mlx4_dev *dev)
772{
773 /* Input modifier of 0x1f means "finish as soon as possible." */
774 return mlx4_cmd(dev, 0, 0x1f, 0, MLX4_CMD_NOP, 100);
775}
diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h
new file mode 100644
index 000000000000..2616fa53d4d0
--- /dev/null
+++ b/drivers/net/mlx4/fw.h
@@ -0,0 +1,167 @@
1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
4 * Copyright (c) 2006, 2007 Cisco Systems. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34
35#ifndef MLX4_FW_H
36#define MLX4_FW_H
37
38#include "mlx4.h"
39#include "icm.h"
40
41struct mlx4_dev_cap {
42 int max_srq_sz;
43 int max_qp_sz;
44 int reserved_qps;
45 int max_qps;
46 int reserved_srqs;
47 int max_srqs;
48 int max_cq_sz;
49 int reserved_cqs;
50 int max_cqs;
51 int max_mpts;
52 int reserved_eqs;
53 int max_eqs;
54 int reserved_mtts;
55 int max_mrw_sz;
56 int reserved_mrws;
57 int max_mtt_seg;
58 int max_requester_per_qp;
59 int max_responder_per_qp;
60 int max_rdma_global;
61 int local_ca_ack_delay;
62 int max_mtu;
63 int max_port_width;
64 int max_vl;
65 int num_ports;
66 int max_gids;
67 u16 stat_rate_support;
68 int max_pkeys;
69 u32 flags;
70 int reserved_uars;
71 int uar_size;
72 int min_page_sz;
73 int bf_reg_size;
74 int bf_regs_per_page;
75 int max_sq_sg;
76 int max_sq_desc_sz;
77 int max_rq_sg;
78 int max_rq_desc_sz;
79 int max_qp_per_mcg;
80 int reserved_mgms;
81 int max_mcgs;
82 int reserved_pds;
83 int max_pds;
84 int qpc_entry_sz;
85 int rdmarc_entry_sz;
86 int altc_entry_sz;
87 int aux_entry_sz;
88 int srq_entry_sz;
89 int cqc_entry_sz;
90 int eqc_entry_sz;
91 int dmpt_entry_sz;
92 int cmpt_entry_sz;
93 int mtt_entry_sz;
94 int resize_srq;
95 u8 bmme_flags;
96 u32 reserved_lkey;
97 u64 max_icm_sz;
98};
99
100struct mlx4_adapter {
101 u32 vendor_id;
102 u32 device_id;
103 u32 revision_id;
104 char board_id[MLX4_BOARD_ID_LEN];
105 u8 inta_pin;
106};
107
108struct mlx4_init_hca_param {
109 u64 qpc_base;
110 u64 rdmarc_base;
111 u64 auxc_base;
112 u64 altc_base;
113 u64 srqc_base;
114 u64 cqc_base;
115 u64 eqc_base;
116 u64 mc_base;
117 u64 dmpt_base;
118 u64 cmpt_base;
119 u64 mtt_base;
120 u16 log_mc_entry_sz;
121 u16 log_mc_hash_sz;
122 u8 log_num_qps;
123 u8 log_num_srqs;
124 u8 log_num_cqs;
125 u8 log_num_eqs;
126 u8 log_rd_per_qp;
127 u8 log_mc_table_sz;
128 u8 log_mpt_sz;
129 u8 log_uar_sz;
130};
131
132struct mlx4_init_ib_param {
133 int port_width;
134 int vl_cap;
135 int mtu_cap;
136 u16 gid_cap;
137 u16 pkey_cap;
138 int set_guid0;
139 u64 guid0;
140 int set_node_guid;
141 u64 node_guid;
142 int set_si_guid;
143 u64 si_guid;
144};
145
146struct mlx4_set_ib_param {
147 int set_si_guid;
148 int reset_qkey_viol;
149 u64 si_guid;
150 u32 cap_mask;
151};
152
153int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap);
154int mlx4_MAP_FA(struct mlx4_dev *dev, struct mlx4_icm *icm);
155int mlx4_UNMAP_FA(struct mlx4_dev *dev);
156int mlx4_RUN_FW(struct mlx4_dev *dev);
157int mlx4_QUERY_FW(struct mlx4_dev *dev);
158int mlx4_QUERY_ADAPTER(struct mlx4_dev *dev, struct mlx4_adapter *adapter);
159int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param);
160int mlx4_CLOSE_HCA(struct mlx4_dev *dev, int panic);
161int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt);
162int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages);
163int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm);
164int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev);
165int mlx4_NOP(struct mlx4_dev *dev);
166
167#endif /* MLX4_FW_H */
diff --git a/drivers/net/mlx4/icm.c b/drivers/net/mlx4/icm.c
new file mode 100644
index 000000000000..e96feaed6ed4
--- /dev/null
+++ b/drivers/net/mlx4/icm.c
@@ -0,0 +1,379 @@
1/*
2 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
3 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/init.h>
35#include <linux/errno.h>
36
37#include <linux/mlx4/cmd.h>
38
39#include "mlx4.h"
40#include "icm.h"
41#include "fw.h"
42
43/*
44 * We allocate in as big chunks as we can, up to a maximum of 256 KB
45 * per chunk.
46 */
47enum {
48 MLX4_ICM_ALLOC_SIZE = 1 << 18,
49 MLX4_TABLE_CHUNK_SIZE = 1 << 18
50};
51
52void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm)
53{
54 struct mlx4_icm_chunk *chunk, *tmp;
55 int i;
56
57 list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list) {
58 if (chunk->nsg > 0)
59 pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages,
60 PCI_DMA_BIDIRECTIONAL);
61
62 for (i = 0; i < chunk->npages; ++i)
63 __free_pages(chunk->mem[i].page,
64 get_order(chunk->mem[i].length));
65
66 kfree(chunk);
67 }
68
69 kfree(icm);
70}
71
72struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
73 gfp_t gfp_mask)
74{
75 struct mlx4_icm *icm;
76 struct mlx4_icm_chunk *chunk = NULL;
77 int cur_order;
78
79 icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
80 if (!icm)
81 return icm;
82
83 icm->refcount = 0;
84 INIT_LIST_HEAD(&icm->chunk_list);
85
86 cur_order = get_order(MLX4_ICM_ALLOC_SIZE);
87
88 while (npages > 0) {
89 if (!chunk) {
90 chunk = kmalloc(sizeof *chunk,
91 gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
92 if (!chunk)
93 goto fail;
94
95 chunk->npages = 0;
96 chunk->nsg = 0;
97 list_add_tail(&chunk->list, &icm->chunk_list);
98 }
99
100 while (1 << cur_order > npages)
101 --cur_order;
102
103 chunk->mem[chunk->npages].page = alloc_pages(gfp_mask, cur_order);
104 if (chunk->mem[chunk->npages].page) {
105 chunk->mem[chunk->npages].length = PAGE_SIZE << cur_order;
106 chunk->mem[chunk->npages].offset = 0;
107
108 if (++chunk->npages == MLX4_ICM_CHUNK_LEN) {
109 chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
110 chunk->npages,
111 PCI_DMA_BIDIRECTIONAL);
112
113 if (chunk->nsg <= 0)
114 goto fail;
115
116 chunk = NULL;
117 }
118
119 npages -= 1 << cur_order;
120 } else {
121 --cur_order;
122 if (cur_order < 0)
123 goto fail;
124 }
125 }
126
127 if (chunk) {
128 chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
129 chunk->npages,
130 PCI_DMA_BIDIRECTIONAL);
131
132 if (chunk->nsg <= 0)
133 goto fail;
134 }
135
136 return icm;
137
138fail:
139 mlx4_free_icm(dev, icm);
140 return NULL;
141}
142
143static int mlx4_MAP_ICM(struct mlx4_dev *dev, struct mlx4_icm *icm, u64 virt)
144{
145 return mlx4_map_cmd(dev, MLX4_CMD_MAP_ICM, icm, virt);
146}
147
148int mlx4_UNMAP_ICM(struct mlx4_dev *dev, u64 virt, u32 page_count)
149{
150 return mlx4_cmd(dev, virt, page_count, 0, MLX4_CMD_UNMAP_ICM,
151 MLX4_CMD_TIME_CLASS_B);
152}
153
154int mlx4_MAP_ICM_page(struct mlx4_dev *dev, u64 dma_addr, u64 virt)
155{
156 struct mlx4_cmd_mailbox *mailbox;
157 __be64 *inbox;
158 int err;
159
160 mailbox = mlx4_alloc_cmd_mailbox(dev);
161 if (IS_ERR(mailbox))
162 return PTR_ERR(mailbox);
163 inbox = mailbox->buf;
164
165 inbox[0] = cpu_to_be64(virt);
166 inbox[1] = cpu_to_be64(dma_addr);
167
168 err = mlx4_cmd(dev, mailbox->dma, 1, 0, MLX4_CMD_MAP_ICM,
169 MLX4_CMD_TIME_CLASS_B);
170
171 mlx4_free_cmd_mailbox(dev, mailbox);
172
173 if (!err)
174 mlx4_dbg(dev, "Mapped page at %llx to %llx for ICM.\n",
175 (unsigned long long) dma_addr, (unsigned long long) virt);
176
177 return err;
178}
179
180int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm)
181{
182 return mlx4_map_cmd(dev, MLX4_CMD_MAP_ICM_AUX, icm, -1);
183}
184
185int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev)
186{
187 return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_ICM_AUX, MLX4_CMD_TIME_CLASS_B);
188}
189
190int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
191{
192 int i = (obj & (table->num_obj - 1)) / (MLX4_TABLE_CHUNK_SIZE / table->obj_size);
193 int ret = 0;
194
195 mutex_lock(&table->mutex);
196
197 if (table->icm[i]) {
198 ++table->icm[i]->refcount;
199 goto out;
200 }
201
202 table->icm[i] = mlx4_alloc_icm(dev, MLX4_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
203 (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
204 __GFP_NOWARN);
205 if (!table->icm[i]) {
206 ret = -ENOMEM;
207 goto out;
208 }
209
210 if (mlx4_MAP_ICM(dev, table->icm[i], table->virt +
211 (u64) i * MLX4_TABLE_CHUNK_SIZE)) {
212 mlx4_free_icm(dev, table->icm[i]);
213 table->icm[i] = NULL;
214 ret = -ENOMEM;
215 goto out;
216 }
217
218 ++table->icm[i]->refcount;
219
220out:
221 mutex_unlock(&table->mutex);
222 return ret;
223}
224
225void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
226{
227 int i;
228
229 i = (obj & (table->num_obj - 1)) / (MLX4_TABLE_CHUNK_SIZE / table->obj_size);
230
231 mutex_lock(&table->mutex);
232
233 if (--table->icm[i]->refcount == 0) {
234 mlx4_UNMAP_ICM(dev, table->virt + i * MLX4_TABLE_CHUNK_SIZE,
235 MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE);
236 mlx4_free_icm(dev, table->icm[i]);
237 table->icm[i] = NULL;
238 }
239
240 mutex_unlock(&table->mutex);
241}
242
243void *mlx4_table_find(struct mlx4_icm_table *table, int obj)
244{
245 int idx, offset, i;
246 struct mlx4_icm_chunk *chunk;
247 struct mlx4_icm *icm;
248 struct page *page = NULL;
249
250 if (!table->lowmem)
251 return NULL;
252
253 mutex_lock(&table->mutex);
254
255 idx = obj & (table->num_obj - 1);
256 icm = table->icm[idx / (MLX4_TABLE_CHUNK_SIZE / table->obj_size)];
257 offset = idx % (MLX4_TABLE_CHUNK_SIZE / table->obj_size);
258
259 if (!icm)
260 goto out;
261
262 list_for_each_entry(chunk, &icm->chunk_list, list) {
263 for (i = 0; i < chunk->npages; ++i) {
264 if (chunk->mem[i].length > offset) {
265 page = chunk->mem[i].page;
266 goto out;
267 }
268 offset -= chunk->mem[i].length;
269 }
270 }
271
272out:
273 mutex_unlock(&table->mutex);
274 return page ? lowmem_page_address(page) + offset : NULL;
275}
276
277int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
278 int start, int end)
279{
280 int inc = MLX4_TABLE_CHUNK_SIZE / table->obj_size;
281 int i, err;
282
283 for (i = start; i <= end; i += inc) {
284 err = mlx4_table_get(dev, table, i);
285 if (err)
286 goto fail;
287 }
288
289 return 0;
290
291fail:
292 while (i > start) {
293 i -= inc;
294 mlx4_table_put(dev, table, i);
295 }
296
297 return err;
298}
299
300void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
301 int start, int end)
302{
303 int i;
304
305 for (i = start; i <= end; i += MLX4_TABLE_CHUNK_SIZE / table->obj_size)
306 mlx4_table_put(dev, table, i);
307}
308
309int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
310 u64 virt, int obj_size, int nobj, int reserved,
311 int use_lowmem)
312{
313 int obj_per_chunk;
314 int num_icm;
315 unsigned chunk_size;
316 int i;
317
318 obj_per_chunk = MLX4_TABLE_CHUNK_SIZE / obj_size;
319 num_icm = (nobj + obj_per_chunk - 1) / obj_per_chunk;
320
321 table->icm = kcalloc(num_icm, sizeof *table->icm, GFP_KERNEL);
322 if (!table->icm)
323 return -ENOMEM;
324 table->virt = virt;
325 table->num_icm = num_icm;
326 table->num_obj = nobj;
327 table->obj_size = obj_size;
328 table->lowmem = use_lowmem;
329 mutex_init(&table->mutex);
330
331 for (i = 0; i * MLX4_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) {
332 chunk_size = MLX4_TABLE_CHUNK_SIZE;
333 if ((i + 1) * MLX4_TABLE_CHUNK_SIZE > nobj * obj_size)
334 chunk_size = PAGE_ALIGN(nobj * obj_size - i * MLX4_TABLE_CHUNK_SIZE);
335
336 table->icm[i] = mlx4_alloc_icm(dev, chunk_size >> PAGE_SHIFT,
337 (use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
338 __GFP_NOWARN);
339 if (!table->icm[i])
340 goto err;
341 if (mlx4_MAP_ICM(dev, table->icm[i], virt + i * MLX4_TABLE_CHUNK_SIZE)) {
342 mlx4_free_icm(dev, table->icm[i]);
343 table->icm[i] = NULL;
344 goto err;
345 }
346
347 /*
348 * Add a reference to this ICM chunk so that it never
349 * gets freed (since it contains reserved firmware objects).
350 */
351 ++table->icm[i]->refcount;
352 }
353
354 return 0;
355
356err:
357 for (i = 0; i < num_icm; ++i)
358 if (table->icm[i]) {
359 mlx4_UNMAP_ICM(dev, virt + i * MLX4_TABLE_CHUNK_SIZE,
360 MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE);
361 mlx4_free_icm(dev, table->icm[i]);
362 }
363
364 return -ENOMEM;
365}
366
367void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table)
368{
369 int i;
370
371 for (i = 0; i < table->num_icm; ++i)
372 if (table->icm[i]) {
373 mlx4_UNMAP_ICM(dev, table->virt + i * MLX4_TABLE_CHUNK_SIZE,
374 MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE);
375 mlx4_free_icm(dev, table->icm[i]);
376 }
377
378 kfree(table->icm);
379}
diff --git a/drivers/net/mlx4/icm.h b/drivers/net/mlx4/icm.h
new file mode 100644
index 000000000000..bea223d879a5
--- /dev/null
+++ b/drivers/net/mlx4/icm.h
@@ -0,0 +1,135 @@
1/*
2 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
3 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#ifndef MLX4_ICM_H
35#define MLX4_ICM_H
36
37#include <linux/list.h>
38#include <linux/pci.h>
39#include <linux/mutex.h>
40
41#define MLX4_ICM_CHUNK_LEN \
42 ((256 - sizeof (struct list_head) - 2 * sizeof (int)) / \
43 (sizeof (struct scatterlist)))
44
45enum {
46 MLX4_ICM_PAGE_SHIFT = 12,
47 MLX4_ICM_PAGE_SIZE = 1 << MLX4_ICM_PAGE_SHIFT,
48};
49
50struct mlx4_icm_chunk {
51 struct list_head list;
52 int npages;
53 int nsg;
54 struct scatterlist mem[MLX4_ICM_CHUNK_LEN];
55};
56
57struct mlx4_icm {
58 struct list_head chunk_list;
59 int refcount;
60};
61
62struct mlx4_icm_iter {
63 struct mlx4_icm *icm;
64 struct mlx4_icm_chunk *chunk;
65 int page_idx;
66};
67
68struct mlx4_dev;
69
70struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, gfp_t gfp_mask);
71void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm);
72
73int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
74void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
75int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
76 int start, int end);
77void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
78 int start, int end);
79int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
80 u64 virt, int obj_size, int nobj, int reserved,
81 int use_lowmem);
82void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table);
83int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
84void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
85void *mlx4_table_find(struct mlx4_icm_table *table, int obj);
86int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
87 int start, int end);
88void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
89 int start, int end);
90
91static inline void mlx4_icm_first(struct mlx4_icm *icm,
92 struct mlx4_icm_iter *iter)
93{
94 iter->icm = icm;
95 iter->chunk = list_empty(&icm->chunk_list) ?
96 NULL : list_entry(icm->chunk_list.next,
97 struct mlx4_icm_chunk, list);
98 iter->page_idx = 0;
99}
100
101static inline int mlx4_icm_last(struct mlx4_icm_iter *iter)
102{
103 return !iter->chunk;
104}
105
106static inline void mlx4_icm_next(struct mlx4_icm_iter *iter)
107{
108 if (++iter->page_idx >= iter->chunk->nsg) {
109 if (iter->chunk->list.next == &iter->icm->chunk_list) {
110 iter->chunk = NULL;
111 return;
112 }
113
114 iter->chunk = list_entry(iter->chunk->list.next,
115 struct mlx4_icm_chunk, list);
116 iter->page_idx = 0;
117 }
118}
119
120static inline dma_addr_t mlx4_icm_addr(struct mlx4_icm_iter *iter)
121{
122 return sg_dma_address(&iter->chunk->mem[iter->page_idx]);
123}
124
125static inline unsigned long mlx4_icm_size(struct mlx4_icm_iter *iter)
126{
127 return sg_dma_len(&iter->chunk->mem[iter->page_idx]);
128}
129
130int mlx4_UNMAP_ICM(struct mlx4_dev *dev, u64 virt, u32 page_count);
131int mlx4_MAP_ICM_page(struct mlx4_dev *dev, u64 dma_addr, u64 virt);
132int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm);
133int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev);
134
135#endif /* MLX4_ICM_H */
diff --git a/drivers/net/mlx4/intf.c b/drivers/net/mlx4/intf.c
new file mode 100644
index 000000000000..65854f9e9c76
--- /dev/null
+++ b/drivers/net/mlx4/intf.c
@@ -0,0 +1,165 @@
1/*
2 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/mlx4/driver.h>
34
35#include "mlx4.h"
36
37struct mlx4_device_context {
38 struct list_head list;
39 struct mlx4_interface *intf;
40 void *context;
41};
42
43static LIST_HEAD(intf_list);
44static LIST_HEAD(dev_list);
45static DEFINE_MUTEX(intf_mutex);
46
47static void mlx4_add_device(struct mlx4_interface *intf, struct mlx4_priv *priv)
48{
49 struct mlx4_device_context *dev_ctx;
50
51 dev_ctx = kmalloc(sizeof *dev_ctx, GFP_KERNEL);
52 if (!dev_ctx)
53 return;
54
55 dev_ctx->intf = intf;
56 dev_ctx->context = intf->add(&priv->dev);
57
58 if (dev_ctx->context) {
59 spin_lock_irq(&priv->ctx_lock);
60 list_add_tail(&dev_ctx->list, &priv->ctx_list);
61 spin_unlock_irq(&priv->ctx_lock);
62 } else
63 kfree(dev_ctx);
64}
65
66static void mlx4_remove_device(struct mlx4_interface *intf, struct mlx4_priv *priv)
67{
68 struct mlx4_device_context *dev_ctx;
69
70 list_for_each_entry(dev_ctx, &priv->ctx_list, list)
71 if (dev_ctx->intf == intf) {
72 spin_lock_irq(&priv->ctx_lock);
73 list_del(&dev_ctx->list);
74 spin_unlock_irq(&priv->ctx_lock);
75
76 intf->remove(&priv->dev, dev_ctx->context);
77 kfree(dev_ctx);
78 return;
79 }
80}
81
82int mlx4_register_interface(struct mlx4_interface *intf)
83{
84 struct mlx4_priv *priv;
85
86 if (!intf->add || !intf->remove)
87 return -EINVAL;
88
89 mutex_lock(&intf_mutex);
90
91 list_add_tail(&intf->list, &intf_list);
92 list_for_each_entry(priv, &dev_list, dev_list)
93 mlx4_add_device(intf, priv);
94
95 mutex_unlock(&intf_mutex);
96
97 return 0;
98}
99EXPORT_SYMBOL_GPL(mlx4_register_interface);
100
101void mlx4_unregister_interface(struct mlx4_interface *intf)
102{
103 struct mlx4_priv *priv;
104
105 mutex_lock(&intf_mutex);
106
107 list_for_each_entry(priv, &dev_list, dev_list)
108 mlx4_remove_device(intf, priv);
109
110 list_del(&intf->list);
111
112 mutex_unlock(&intf_mutex);
113}
114EXPORT_SYMBOL_GPL(mlx4_unregister_interface);
115
116void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_event type,
117 int subtype, int port)
118{
119 struct mlx4_priv *priv = mlx4_priv(dev);
120 struct mlx4_device_context *dev_ctx;
121 unsigned long flags;
122
123 spin_lock_irqsave(&priv->ctx_lock, flags);
124
125 list_for_each_entry(dev_ctx, &priv->ctx_list, list)
126 if (dev_ctx->intf->event)
127 dev_ctx->intf->event(dev, dev_ctx->context, type,
128 subtype, port);
129
130 spin_unlock_irqrestore(&priv->ctx_lock, flags);
131}
132
133int mlx4_register_device(struct mlx4_dev *dev)
134{
135 struct mlx4_priv *priv = mlx4_priv(dev);
136 struct mlx4_interface *intf;
137
138 INIT_LIST_HEAD(&priv->ctx_list);
139 spin_lock_init(&priv->ctx_lock);
140
141 mutex_lock(&intf_mutex);
142
143 list_add_tail(&priv->dev_list, &dev_list);
144 list_for_each_entry(intf, &intf_list, list)
145 mlx4_add_device(intf, priv);
146
147 mutex_unlock(&intf_mutex);
148
149 return 0;
150}
151
152void mlx4_unregister_device(struct mlx4_dev *dev)
153{
154 struct mlx4_priv *priv = mlx4_priv(dev);
155 struct mlx4_interface *intf;
156
157 mutex_lock(&intf_mutex);
158
159 list_for_each_entry(intf, &intf_list, list)
160 mlx4_remove_device(intf, priv);
161
162 list_del(&priv->dev_list);
163
164 mutex_unlock(&intf_mutex);
165}
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
new file mode 100644
index 000000000000..4debb024eaf9
--- /dev/null
+++ b/drivers/net/mlx4/main.c
@@ -0,0 +1,936 @@
1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer.
20 *
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36#include <linux/module.h>
37#include <linux/init.h>
38#include <linux/errno.h>
39#include <linux/pci.h>
40#include <linux/dma-mapping.h>
41
42#include <linux/mlx4/device.h>
43#include <linux/mlx4/doorbell.h>
44
45#include "mlx4.h"
46#include "fw.h"
47#include "icm.h"
48
49MODULE_AUTHOR("Roland Dreier");
50MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver");
51MODULE_LICENSE("Dual BSD/GPL");
52MODULE_VERSION(DRV_VERSION);
53
54#ifdef CONFIG_MLX4_DEBUG
55
56int mlx4_debug_level = 0;
57module_param_named(debug_level, mlx4_debug_level, int, 0644);
58MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
59
60#endif /* CONFIG_MLX4_DEBUG */
61
62#ifdef CONFIG_PCI_MSI
63
64static int msi_x;
65module_param(msi_x, int, 0444);
66MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
67
68#else /* CONFIG_PCI_MSI */
69
70#define msi_x (0)
71
72#endif /* CONFIG_PCI_MSI */
73
74static const char mlx4_version[] __devinitdata =
75 DRV_NAME ": Mellanox ConnectX core driver v"
76 DRV_VERSION " (" DRV_RELDATE ")\n";
77
78static struct mlx4_profile default_profile = {
79 .num_qp = 1 << 16,
80 .num_srq = 1 << 16,
81 .rdmarc_per_qp = 4,
82 .num_cq = 1 << 16,
83 .num_mcg = 1 << 13,
84 .num_mpt = 1 << 17,
85 .num_mtt = 1 << 20,
86};
87
88static int __devinit mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
89{
90 int err;
91
92 err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
93 if (err) {
94 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
95 return err;
96 }
97
98 if (dev_cap->min_page_sz > PAGE_SIZE) {
99 mlx4_err(dev, "HCA minimum page size of %d bigger than "
100 "kernel PAGE_SIZE of %ld, aborting.\n",
101 dev_cap->min_page_sz, PAGE_SIZE);
102 return -ENODEV;
103 }
104 if (dev_cap->num_ports > MLX4_MAX_PORTS) {
105 mlx4_err(dev, "HCA has %d ports, but we only support %d, "
106 "aborting.\n",
107 dev_cap->num_ports, MLX4_MAX_PORTS);
108 return -ENODEV;
109 }
110
111 if (dev_cap->uar_size > pci_resource_len(dev->pdev, 2)) {
112 mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than "
113 "PCI resource 2 size of 0x%llx, aborting.\n",
114 dev_cap->uar_size,
115 (unsigned long long) pci_resource_len(dev->pdev, 2));
116 return -ENODEV;
117 }
118
119 dev->caps.num_ports = dev_cap->num_ports;
120 dev->caps.num_uars = dev_cap->uar_size / PAGE_SIZE;
121 dev->caps.vl_cap = dev_cap->max_vl;
122 dev->caps.mtu_cap = dev_cap->max_mtu;
123 dev->caps.gid_table_len = dev_cap->max_gids;
124 dev->caps.pkey_table_len = dev_cap->max_pkeys;
125 dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
126 dev->caps.bf_reg_size = dev_cap->bf_reg_size;
127 dev->caps.bf_regs_per_page = dev_cap->bf_regs_per_page;
128 dev->caps.max_sq_sg = dev_cap->max_sq_sg;
129 dev->caps.max_rq_sg = dev_cap->max_rq_sg;
130 dev->caps.max_wqes = dev_cap->max_qp_sz;
131 dev->caps.max_qp_init_rdma = dev_cap->max_requester_per_qp;
132 dev->caps.reserved_qps = dev_cap->reserved_qps;
133 dev->caps.max_srq_wqes = dev_cap->max_srq_sz;
134 dev->caps.max_srq_sge = dev_cap->max_rq_sg - 1;
135 dev->caps.reserved_srqs = dev_cap->reserved_srqs;
136 dev->caps.max_sq_desc_sz = dev_cap->max_sq_desc_sz;
137 dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz;
138 dev->caps.num_qp_per_mgm = MLX4_QP_PER_MGM;
139 /*
140 * Subtract 1 from the limit because we need to allocate a
141 * spare CQE so the HCA HW can tell the difference between an
142 * empty CQ and a full CQ.
143 */
144 dev->caps.max_cqes = dev_cap->max_cq_sz - 1;
145 dev->caps.reserved_cqs = dev_cap->reserved_cqs;
146 dev->caps.reserved_eqs = dev_cap->reserved_eqs;
147 dev->caps.reserved_mtts = dev_cap->reserved_mtts;
148 dev->caps.reserved_mrws = dev_cap->reserved_mrws;
149 dev->caps.reserved_uars = dev_cap->reserved_uars;
150 dev->caps.reserved_pds = dev_cap->reserved_pds;
151 dev->caps.port_width_cap = dev_cap->max_port_width;
152 dev->caps.mtt_entry_sz = MLX4_MTT_ENTRY_PER_SEG * dev_cap->mtt_entry_sz;
153 dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1);
154 dev->caps.flags = dev_cap->flags;
155 dev->caps.stat_rate_support = dev_cap->stat_rate_support;
156
157 return 0;
158}
159
160static int __devinit mlx4_load_fw(struct mlx4_dev *dev)
161{
162 struct mlx4_priv *priv = mlx4_priv(dev);
163 int err;
164
165 priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages,
166 GFP_HIGHUSER | __GFP_NOWARN);
167 if (!priv->fw.fw_icm) {
168 mlx4_err(dev, "Couldn't allocate FW area, aborting.\n");
169 return -ENOMEM;
170 }
171
172 err = mlx4_MAP_FA(dev, priv->fw.fw_icm);
173 if (err) {
174 mlx4_err(dev, "MAP_FA command failed, aborting.\n");
175 goto err_free;
176 }
177
178 err = mlx4_RUN_FW(dev);
179 if (err) {
180 mlx4_err(dev, "RUN_FW command failed, aborting.\n");
181 goto err_unmap_fa;
182 }
183
184 return 0;
185
186err_unmap_fa:
187 mlx4_UNMAP_FA(dev);
188
189err_free:
190 mlx4_free_icm(dev, priv->fw.fw_icm);
191 return err;
192}
193
194static int __devinit mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
195 int cmpt_entry_sz)
196{
197 struct mlx4_priv *priv = mlx4_priv(dev);
198 int err;
199
200 err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table,
201 cmpt_base +
202 ((u64) (MLX4_CMPT_TYPE_QP *
203 cmpt_entry_sz) << MLX4_CMPT_SHIFT),
204 cmpt_entry_sz, dev->caps.num_qps,
205 dev->caps.reserved_qps, 0);
206 if (err)
207 goto err;
208
209 err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table,
210 cmpt_base +
211 ((u64) (MLX4_CMPT_TYPE_SRQ *
212 cmpt_entry_sz) << MLX4_CMPT_SHIFT),
213 cmpt_entry_sz, dev->caps.num_srqs,
214 dev->caps.reserved_srqs, 0);
215 if (err)
216 goto err_qp;
217
218 err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table,
219 cmpt_base +
220 ((u64) (MLX4_CMPT_TYPE_CQ *
221 cmpt_entry_sz) << MLX4_CMPT_SHIFT),
222 cmpt_entry_sz, dev->caps.num_cqs,
223 dev->caps.reserved_cqs, 0);
224 if (err)
225 goto err_srq;
226
227 err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
228 cmpt_base +
229 ((u64) (MLX4_CMPT_TYPE_EQ *
230 cmpt_entry_sz) << MLX4_CMPT_SHIFT),
231 cmpt_entry_sz,
232 roundup_pow_of_two(MLX4_NUM_EQ +
233 dev->caps.reserved_eqs),
234 MLX4_NUM_EQ + dev->caps.reserved_eqs, 0);
235 if (err)
236 goto err_cq;
237
238 return 0;
239
240err_cq:
241 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
242
243err_srq:
244 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
245
246err_qp:
247 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
248
249err:
250 return err;
251}
252
253static int __devinit mlx4_init_icm(struct mlx4_dev *dev,
254 struct mlx4_dev_cap *dev_cap,
255 struct mlx4_init_hca_param *init_hca,
256 u64 icm_size)
257{
258 struct mlx4_priv *priv = mlx4_priv(dev);
259 u64 aux_pages;
260 int err;
261
262 err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
263 if (err) {
264 mlx4_err(dev, "SET_ICM_SIZE command failed, aborting.\n");
265 return err;
266 }
267
268 mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory.\n",
269 (unsigned long long) icm_size >> 10,
270 (unsigned long long) aux_pages << 2);
271
272 priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages,
273 GFP_HIGHUSER | __GFP_NOWARN);
274 if (!priv->fw.aux_icm) {
275 mlx4_err(dev, "Couldn't allocate aux memory, aborting.\n");
276 return -ENOMEM;
277 }
278
279 err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm);
280 if (err) {
281 mlx4_err(dev, "MAP_ICM_AUX command failed, aborting.\n");
282 goto err_free_aux;
283 }
284
285 err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz);
286 if (err) {
287 mlx4_err(dev, "Failed to map cMPT context memory, aborting.\n");
288 goto err_unmap_aux;
289 }
290
291 err = mlx4_map_eq_icm(dev, init_hca->eqc_base);
292 if (err) {
293 mlx4_err(dev, "Failed to map EQ context memory, aborting.\n");
294 goto err_unmap_cmpt;
295 }
296
297 err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
298 init_hca->mtt_base,
299 dev->caps.mtt_entry_sz,
300 dev->caps.num_mtt_segs,
301 dev->caps.reserved_mtts, 1);
302 if (err) {
303 mlx4_err(dev, "Failed to map MTT context memory, aborting.\n");
304 goto err_unmap_eq;
305 }
306
307 err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table,
308 init_hca->dmpt_base,
309 dev_cap->dmpt_entry_sz,
310 dev->caps.num_mpts,
311 dev->caps.reserved_mrws, 1);
312 if (err) {
313 mlx4_err(dev, "Failed to map dMPT context memory, aborting.\n");
314 goto err_unmap_mtt;
315 }
316
317 err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table,
318 init_hca->qpc_base,
319 dev_cap->qpc_entry_sz,
320 dev->caps.num_qps,
321 dev->caps.reserved_qps, 0);
322 if (err) {
323 mlx4_err(dev, "Failed to map QP context memory, aborting.\n");
324 goto err_unmap_dmpt;
325 }
326
327 err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table,
328 init_hca->auxc_base,
329 dev_cap->aux_entry_sz,
330 dev->caps.num_qps,
331 dev->caps.reserved_qps, 0);
332 if (err) {
333 mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n");
334 goto err_unmap_qp;
335 }
336
337 err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table,
338 init_hca->altc_base,
339 dev_cap->altc_entry_sz,
340 dev->caps.num_qps,
341 dev->caps.reserved_qps, 0);
342 if (err) {
343 mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n");
344 goto err_unmap_auxc;
345 }
346
347 err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table,
348 init_hca->rdmarc_base,
349 dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift,
350 dev->caps.num_qps,
351 dev->caps.reserved_qps, 0);
352 if (err) {
353 mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n");
354 goto err_unmap_altc;
355 }
356
357 err = mlx4_init_icm_table(dev, &priv->cq_table.table,
358 init_hca->cqc_base,
359 dev_cap->cqc_entry_sz,
360 dev->caps.num_cqs,
361 dev->caps.reserved_cqs, 0);
362 if (err) {
363 mlx4_err(dev, "Failed to map CQ context memory, aborting.\n");
364 goto err_unmap_rdmarc;
365 }
366
367 err = mlx4_init_icm_table(dev, &priv->srq_table.table,
368 init_hca->srqc_base,
369 dev_cap->srq_entry_sz,
370 dev->caps.num_srqs,
371 dev->caps.reserved_srqs, 0);
372 if (err) {
373 mlx4_err(dev, "Failed to map SRQ context memory, aborting.\n");
374 goto err_unmap_cq;
375 }
376
377 /*
378 * It's not strictly required, but for simplicity just map the
379 * whole multicast group table now. The table isn't very big
380 * and it's a lot easier than trying to track ref counts.
381 */
382 err = mlx4_init_icm_table(dev, &priv->mcg_table.table,
383 init_hca->mc_base, MLX4_MGM_ENTRY_SIZE,
384 dev->caps.num_mgms + dev->caps.num_amgms,
385 dev->caps.num_mgms + dev->caps.num_amgms,
386 0);
387 if (err) {
388 mlx4_err(dev, "Failed to map MCG context memory, aborting.\n");
389 goto err_unmap_srq;
390 }
391
392 return 0;
393
394err_unmap_srq:
395 mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
396
397err_unmap_cq:
398 mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
399
400err_unmap_rdmarc:
401 mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
402
403err_unmap_altc:
404 mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
405
406err_unmap_auxc:
407 mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
408
409err_unmap_qp:
410 mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
411
412err_unmap_dmpt:
413 mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
414
415err_unmap_mtt:
416 mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
417
418err_unmap_eq:
419 mlx4_unmap_eq_icm(dev);
420
421err_unmap_cmpt:
422 mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
423 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
424 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
425 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
426
427err_unmap_aux:
428 mlx4_UNMAP_ICM_AUX(dev);
429
430err_free_aux:
431 mlx4_free_icm(dev, priv->fw.aux_icm);
432
433 return err;
434}
435
436static void mlx4_free_icms(struct mlx4_dev *dev)
437{
438 struct mlx4_priv *priv = mlx4_priv(dev);
439
440 mlx4_cleanup_icm_table(dev, &priv->mcg_table.table);
441 mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
442 mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
443 mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
444 mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
445 mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
446 mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
447 mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
448 mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
449 mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
450 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
451 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
452 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
453 mlx4_unmap_eq_icm(dev);
454
455 mlx4_UNMAP_ICM_AUX(dev);
456 mlx4_free_icm(dev, priv->fw.aux_icm);
457}
458
459static void mlx4_close_hca(struct mlx4_dev *dev)
460{
461 mlx4_CLOSE_HCA(dev, 0);
462 mlx4_free_icms(dev);
463 mlx4_UNMAP_FA(dev);
464 mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm);
465}
466
467static int __devinit mlx4_init_hca(struct mlx4_dev *dev)
468{
469 struct mlx4_priv *priv = mlx4_priv(dev);
470 struct mlx4_adapter adapter;
471 struct mlx4_dev_cap dev_cap;
472 struct mlx4_profile profile;
473 struct mlx4_init_hca_param init_hca;
474 u64 icm_size;
475 int err;
476
477 err = mlx4_QUERY_FW(dev);
478 if (err) {
479 mlx4_err(dev, "QUERY_FW command failed, aborting.\n");
480 return err;
481 }
482
483 err = mlx4_load_fw(dev);
484 if (err) {
485 mlx4_err(dev, "Failed to start FW, aborting.\n");
486 return err;
487 }
488
489 err = mlx4_dev_cap(dev, &dev_cap);
490 if (err) {
491 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
492 goto err_stop_fw;
493 }
494
495 profile = default_profile;
496
497 icm_size = mlx4_make_profile(dev, &profile, &dev_cap, &init_hca);
498 if ((long long) icm_size < 0) {
499 err = icm_size;
500 goto err_stop_fw;
501 }
502
503 init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
504
505 err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
506 if (err)
507 goto err_stop_fw;
508
509 err = mlx4_INIT_HCA(dev, &init_hca);
510 if (err) {
511 mlx4_err(dev, "INIT_HCA command failed, aborting.\n");
512 goto err_free_icm;
513 }
514
515 err = mlx4_QUERY_ADAPTER(dev, &adapter);
516 if (err) {
517 mlx4_err(dev, "QUERY_ADAPTER command failed, aborting.\n");
518 goto err_close;
519 }
520
521 priv->eq_table.inta_pin = adapter.inta_pin;
522 priv->rev_id = adapter.revision_id;
523 memcpy(priv->board_id, adapter.board_id, sizeof priv->board_id);
524
525 return 0;
526
527err_close:
528 mlx4_close_hca(dev);
529
530err_free_icm:
531 mlx4_free_icms(dev);
532
533err_stop_fw:
534 mlx4_UNMAP_FA(dev);
535 mlx4_free_icm(dev, priv->fw.fw_icm);
536
537 return err;
538}
539
540static int __devinit mlx4_setup_hca(struct mlx4_dev *dev)
541{
542 struct mlx4_priv *priv = mlx4_priv(dev);
543 int err;
544
545 MLX4_INIT_DOORBELL_LOCK(&priv->doorbell_lock);
546
547 err = mlx4_init_uar_table(dev);
548 if (err) {
549 mlx4_err(dev, "Failed to initialize "
550 "user access region table, aborting.\n");
551 return err;
552 }
553
554 err = mlx4_uar_alloc(dev, &priv->driver_uar);
555 if (err) {
556 mlx4_err(dev, "Failed to allocate driver access region, "
557 "aborting.\n");
558 goto err_uar_table_free;
559 }
560
561 priv->kar = ioremap(priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
562 if (!priv->kar) {
563 mlx4_err(dev, "Couldn't map kernel access region, "
564 "aborting.\n");
565 err = -ENOMEM;
566 goto err_uar_free;
567 }
568
569 err = mlx4_init_pd_table(dev);
570 if (err) {
571 mlx4_err(dev, "Failed to initialize "
572 "protection domain table, aborting.\n");
573 goto err_kar_unmap;
574 }
575
576 err = mlx4_init_mr_table(dev);
577 if (err) {
578 mlx4_err(dev, "Failed to initialize "
579 "memory region table, aborting.\n");
580 goto err_pd_table_free;
581 }
582
583 mlx4_map_catas_buf(dev);
584
585 err = mlx4_init_eq_table(dev);
586 if (err) {
587 mlx4_err(dev, "Failed to initialize "
588 "event queue table, aborting.\n");
589 goto err_catas_buf;
590 }
591
592 err = mlx4_cmd_use_events(dev);
593 if (err) {
594 mlx4_err(dev, "Failed to switch to event-driven "
595 "firmware commands, aborting.\n");
596 goto err_eq_table_free;
597 }
598
599 err = mlx4_NOP(dev);
600 if (err) {
601 mlx4_err(dev, "NOP command failed to generate interrupt "
602 "(IRQ %d), aborting.\n",
603 priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
604 if (dev->flags & MLX4_FLAG_MSI_X)
605 mlx4_err(dev, "Try again with MSI-X disabled.\n");
606 else
607 mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
608
609 goto err_cmd_poll;
610 }
611
612 mlx4_dbg(dev, "NOP command IRQ test passed\n");
613
614 err = mlx4_init_cq_table(dev);
615 if (err) {
616 mlx4_err(dev, "Failed to initialize "
617 "completion queue table, aborting.\n");
618 goto err_cmd_poll;
619 }
620
621 err = mlx4_init_srq_table(dev);
622 if (err) {
623 mlx4_err(dev, "Failed to initialize "
624 "shared receive queue table, aborting.\n");
625 goto err_cq_table_free;
626 }
627
628 err = mlx4_init_qp_table(dev);
629 if (err) {
630 mlx4_err(dev, "Failed to initialize "
631 "queue pair table, aborting.\n");
632 goto err_srq_table_free;
633 }
634
635 err = mlx4_init_mcg_table(dev);
636 if (err) {
637 mlx4_err(dev, "Failed to initialize "
638 "multicast group table, aborting.\n");
639 goto err_qp_table_free;
640 }
641
642 return 0;
643
644err_qp_table_free:
645 mlx4_cleanup_qp_table(dev);
646
647err_srq_table_free:
648 mlx4_cleanup_srq_table(dev);
649
650err_cq_table_free:
651 mlx4_cleanup_cq_table(dev);
652
653err_cmd_poll:
654 mlx4_cmd_use_polling(dev);
655
656err_eq_table_free:
657 mlx4_cleanup_eq_table(dev);
658
659err_catas_buf:
660 mlx4_unmap_catas_buf(dev);
661 mlx4_cleanup_mr_table(dev);
662
663err_pd_table_free:
664 mlx4_cleanup_pd_table(dev);
665
666err_kar_unmap:
667 iounmap(priv->kar);
668
669err_uar_free:
670 mlx4_uar_free(dev, &priv->driver_uar);
671
672err_uar_table_free:
673 mlx4_cleanup_uar_table(dev);
674 return err;
675}
676
677static void __devinit mlx4_enable_msi_x(struct mlx4_dev *dev)
678{
679 struct mlx4_priv *priv = mlx4_priv(dev);
680 struct msix_entry entries[MLX4_NUM_EQ];
681 int err;
682 int i;
683
684 if (msi_x) {
685 for (i = 0; i < MLX4_NUM_EQ; ++i)
686 entries[i].entry = i;
687
688 err = pci_enable_msix(dev->pdev, entries, ARRAY_SIZE(entries));
689 if (err) {
690 if (err > 0)
691 mlx4_info(dev, "Only %d MSI-X vectors available, "
692 "not using MSI-X\n", err);
693 goto no_msi;
694 }
695
696 for (i = 0; i < MLX4_NUM_EQ; ++i)
697 priv->eq_table.eq[i].irq = entries[i].vector;
698
699 dev->flags |= MLX4_FLAG_MSI_X;
700 return;
701 }
702
703no_msi:
704 for (i = 0; i < MLX4_NUM_EQ; ++i)
705 priv->eq_table.eq[i].irq = dev->pdev->irq;
706}
707
708static int __devinit mlx4_init_one(struct pci_dev *pdev,
709 const struct pci_device_id *id)
710{
711 static int mlx4_version_printed;
712 struct mlx4_priv *priv;
713 struct mlx4_dev *dev;
714 int err;
715
716 if (!mlx4_version_printed) {
717 printk(KERN_INFO "%s", mlx4_version);
718 ++mlx4_version_printed;
719 }
720
721 printk(KERN_INFO PFX "Initializing %s\n",
722 pci_name(pdev));
723
724 err = pci_enable_device(pdev);
725 if (err) {
726 dev_err(&pdev->dev, "Cannot enable PCI device, "
727 "aborting.\n");
728 return err;
729 }
730
731 /*
732 * Check for BARs. We expect 0: 1MB, 2: 8MB, 4: DDR (may not
733 * be present)
734 */
735 if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
736 pci_resource_len(pdev, 0) != 1 << 20) {
737 dev_err(&pdev->dev, "Missing DCS, aborting.\n");
738 err = -ENODEV;
739 goto err_disable_pdev;
740 }
741 if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
742 dev_err(&pdev->dev, "Missing UAR, aborting.\n");
743 err = -ENODEV;
744 goto err_disable_pdev;
745 }
746
747 err = pci_request_region(pdev, 0, DRV_NAME);
748 if (err) {
749 dev_err(&pdev->dev, "Cannot request control region, aborting.\n");
750 goto err_disable_pdev;
751 }
752
753 err = pci_request_region(pdev, 2, DRV_NAME);
754 if (err) {
755 dev_err(&pdev->dev, "Cannot request UAR region, aborting.\n");
756 goto err_release_bar0;
757 }
758
759 pci_set_master(pdev);
760
761 err = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
762 if (err) {
763 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n");
764 err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
765 if (err) {
766 dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
767 goto err_release_bar2;
768 }
769 }
770 err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
771 if (err) {
772 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit "
773 "consistent PCI DMA mask.\n");
774 err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
775 if (err) {
776 dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, "
777 "aborting.\n");
778 goto err_release_bar2;
779 }
780 }
781
782 priv = kzalloc(sizeof *priv, GFP_KERNEL);
783 if (!priv) {
784 dev_err(&pdev->dev, "Device struct alloc failed, "
785 "aborting.\n");
786 err = -ENOMEM;
787 goto err_release_bar2;
788 }
789
790 dev = &priv->dev;
791 dev->pdev = pdev;
792
793 /*
794 * Now reset the HCA before we touch the PCI capabilities or
795 * attempt a firmware command, since a boot ROM may have left
796 * the HCA in an undefined state.
797 */
798 err = mlx4_reset(dev);
799 if (err) {
800 mlx4_err(dev, "Failed to reset HCA, aborting.\n");
801 goto err_free_dev;
802 }
803
804 mlx4_enable_msi_x(dev);
805
806 if (mlx4_cmd_init(dev)) {
807 mlx4_err(dev, "Failed to init command interface, aborting.\n");
808 goto err_free_dev;
809 }
810
811 err = mlx4_init_hca(dev);
812 if (err)
813 goto err_cmd;
814
815 err = mlx4_setup_hca(dev);
816 if (err)
817 goto err_close;
818
819 err = mlx4_register_device(dev);
820 if (err)
821 goto err_cleanup;
822
823 pci_set_drvdata(pdev, dev);
824
825 return 0;
826
827err_cleanup:
828 mlx4_cleanup_mcg_table(dev);
829 mlx4_cleanup_qp_table(dev);
830 mlx4_cleanup_srq_table(dev);
831 mlx4_cleanup_cq_table(dev);
832 mlx4_cmd_use_polling(dev);
833 mlx4_cleanup_eq_table(dev);
834
835 mlx4_unmap_catas_buf(dev);
836
837 mlx4_cleanup_mr_table(dev);
838 mlx4_cleanup_pd_table(dev);
839 mlx4_cleanup_uar_table(dev);
840
841err_close:
842 mlx4_close_hca(dev);
843
844err_cmd:
845 mlx4_cmd_cleanup(dev);
846
847err_free_dev:
848 if (dev->flags & MLX4_FLAG_MSI_X)
849 pci_disable_msix(pdev);
850
851 kfree(priv);
852
853err_release_bar2:
854 pci_release_region(pdev, 2);
855
856err_release_bar0:
857 pci_release_region(pdev, 0);
858
859err_disable_pdev:
860 pci_disable_device(pdev);
861 pci_set_drvdata(pdev, NULL);
862 return err;
863}
864
865static void __devexit mlx4_remove_one(struct pci_dev *pdev)
866{
867 struct mlx4_dev *dev = pci_get_drvdata(pdev);
868 struct mlx4_priv *priv = mlx4_priv(dev);
869 int p;
870
871 if (dev) {
872 mlx4_unregister_device(dev);
873
874 for (p = 1; p <= dev->caps.num_ports; ++p)
875 mlx4_CLOSE_PORT(dev, p);
876
877 mlx4_cleanup_mcg_table(dev);
878 mlx4_cleanup_qp_table(dev);
879 mlx4_cleanup_srq_table(dev);
880 mlx4_cleanup_cq_table(dev);
881 mlx4_cmd_use_polling(dev);
882 mlx4_cleanup_eq_table(dev);
883
884 mlx4_unmap_catas_buf(dev);
885
886 mlx4_cleanup_mr_table(dev);
887 mlx4_cleanup_pd_table(dev);
888
889 iounmap(priv->kar);
890 mlx4_uar_free(dev, &priv->driver_uar);
891 mlx4_cleanup_uar_table(dev);
892 mlx4_close_hca(dev);
893 mlx4_cmd_cleanup(dev);
894
895 if (dev->flags & MLX4_FLAG_MSI_X)
896 pci_disable_msix(pdev);
897
898 kfree(priv);
899 pci_release_region(pdev, 2);
900 pci_release_region(pdev, 0);
901 pci_disable_device(pdev);
902 pci_set_drvdata(pdev, NULL);
903 }
904}
905
906static struct pci_device_id mlx4_pci_table[] = {
907 { PCI_VDEVICE(MELLANOX, 0x6340) }, /* MT25408 "Hermon" SDR */
908 { PCI_VDEVICE(MELLANOX, 0x634a) }, /* MT25408 "Hermon" DDR */
909 { PCI_VDEVICE(MELLANOX, 0x6354) }, /* MT25408 "Hermon" QDR */
910 { 0, }
911};
912
913MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
914
915static struct pci_driver mlx4_driver = {
916 .name = DRV_NAME,
917 .id_table = mlx4_pci_table,
918 .probe = mlx4_init_one,
919 .remove = __devexit_p(mlx4_remove_one)
920};
921
922static int __init mlx4_init(void)
923{
924 int ret;
925
926 ret = pci_register_driver(&mlx4_driver);
927 return ret < 0 ? ret : 0;
928}
929
930static void __exit mlx4_cleanup(void)
931{
932 pci_unregister_driver(&mlx4_driver);
933}
934
935module_init(mlx4_init);
936module_exit(mlx4_cleanup);
diff --git a/drivers/net/mlx4/mcg.c b/drivers/net/mlx4/mcg.c
new file mode 100644
index 000000000000..672024a0ee71
--- /dev/null
+++ b/drivers/net/mlx4/mcg.c
@@ -0,0 +1,380 @@
1/*
2 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/init.h>
34#include <linux/string.h>
35#include <linux/slab.h>
36
37#include <linux/mlx4/cmd.h>
38
39#include "mlx4.h"
40
41struct mlx4_mgm {
42 __be32 next_gid_index;
43 __be32 members_count;
44 u32 reserved[2];
45 u8 gid[16];
46 __be32 qp[MLX4_QP_PER_MGM];
47};
48
49static const u8 zero_gid[16]; /* automatically initialized to 0 */
50
51static int mlx4_READ_MCG(struct mlx4_dev *dev, int index,
52 struct mlx4_cmd_mailbox *mailbox)
53{
54 return mlx4_cmd_box(dev, 0, mailbox->dma, index, 0, MLX4_CMD_READ_MCG,
55 MLX4_CMD_TIME_CLASS_A);
56}
57
58static int mlx4_WRITE_MCG(struct mlx4_dev *dev, int index,
59 struct mlx4_cmd_mailbox *mailbox)
60{
61 return mlx4_cmd(dev, mailbox->dma, index, 0, MLX4_CMD_WRITE_MCG,
62 MLX4_CMD_TIME_CLASS_A);
63}
64
65static int mlx4_MGID_HASH(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
66 u16 *hash)
67{
68 u64 imm;
69 int err;
70
71 err = mlx4_cmd_imm(dev, mailbox->dma, &imm, 0, 0, MLX4_CMD_MGID_HASH,
72 MLX4_CMD_TIME_CLASS_A);
73
74 if (!err)
75 *hash = imm;
76
77 return err;
78}
79
80/*
81 * Caller must hold MCG table semaphore. gid and mgm parameters must
82 * be properly aligned for command interface.
83 *
84 * Returns 0 unless a firmware command error occurs.
85 *
86 * If GID is found in MGM or MGM is empty, *index = *hash, *prev = -1
87 * and *mgm holds MGM entry.
88 *
89 * if GID is found in AMGM, *index = index in AMGM, *prev = index of
90 * previous entry in hash chain and *mgm holds AMGM entry.
91 *
92 * If no AMGM exists for given gid, *index = -1, *prev = index of last
93 * entry in hash chain and *mgm holds end of hash chain.
94 */
95static int find_mgm(struct mlx4_dev *dev,
96 u8 *gid, struct mlx4_cmd_mailbox *mgm_mailbox,
97 u16 *hash, int *prev, int *index)
98{
99 struct mlx4_cmd_mailbox *mailbox;
100 struct mlx4_mgm *mgm = mgm_mailbox->buf;
101 u8 *mgid;
102 int err;
103
104 mailbox = mlx4_alloc_cmd_mailbox(dev);
105 if (IS_ERR(mailbox))
106 return -ENOMEM;
107 mgid = mailbox->buf;
108
109 memcpy(mgid, gid, 16);
110
111 err = mlx4_MGID_HASH(dev, mailbox, hash);
112 mlx4_free_cmd_mailbox(dev, mailbox);
113 if (err)
114 return err;
115
116 if (0)
117 mlx4_dbg(dev, "Hash for %04x:%04x:%04x:%04x:"
118 "%04x:%04x:%04x:%04x is %04x\n",
119 be16_to_cpu(((__be16 *) gid)[0]),
120 be16_to_cpu(((__be16 *) gid)[1]),
121 be16_to_cpu(((__be16 *) gid)[2]),
122 be16_to_cpu(((__be16 *) gid)[3]),
123 be16_to_cpu(((__be16 *) gid)[4]),
124 be16_to_cpu(((__be16 *) gid)[5]),
125 be16_to_cpu(((__be16 *) gid)[6]),
126 be16_to_cpu(((__be16 *) gid)[7]),
127 *hash);
128
129 *index = *hash;
130 *prev = -1;
131
132 do {
133 err = mlx4_READ_MCG(dev, *index, mgm_mailbox);
134 if (err)
135 return err;
136
137 if (!memcmp(mgm->gid, zero_gid, 16)) {
138 if (*index != *hash) {
139 mlx4_err(dev, "Found zero MGID in AMGM.\n");
140 err = -EINVAL;
141 }
142 return err;
143 }
144
145 if (!memcmp(mgm->gid, gid, 16))
146 return err;
147
148 *prev = *index;
149 *index = be32_to_cpu(mgm->next_gid_index) >> 6;
150 } while (*index);
151
152 *index = -1;
153 return err;
154}
155
156int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16])
157{
158 struct mlx4_priv *priv = mlx4_priv(dev);
159 struct mlx4_cmd_mailbox *mailbox;
160 struct mlx4_mgm *mgm;
161 u32 members_count;
162 u16 hash;
163 int index, prev;
164 int link = 0;
165 int i;
166 int err;
167
168 mailbox = mlx4_alloc_cmd_mailbox(dev);
169 if (IS_ERR(mailbox))
170 return PTR_ERR(mailbox);
171 mgm = mailbox->buf;
172
173 mutex_lock(&priv->mcg_table.mutex);
174
175 err = find_mgm(dev, gid, mailbox, &hash, &prev, &index);
176 if (err)
177 goto out;
178
179 if (index != -1) {
180 if (!memcmp(mgm->gid, zero_gid, 16))
181 memcpy(mgm->gid, gid, 16);
182 } else {
183 link = 1;
184
185 index = mlx4_bitmap_alloc(&priv->mcg_table.bitmap);
186 if (index == -1) {
187 mlx4_err(dev, "No AMGM entries left\n");
188 err = -ENOMEM;
189 goto out;
190 }
191 index += dev->caps.num_mgms;
192
193 err = mlx4_READ_MCG(dev, index, mailbox);
194 if (err)
195 goto out;
196
197 memset(mgm, 0, sizeof *mgm);
198 memcpy(mgm->gid, gid, 16);
199 }
200
201 members_count = be32_to_cpu(mgm->members_count);
202 if (members_count == MLX4_QP_PER_MGM) {
203 mlx4_err(dev, "MGM at index %x is full.\n", index);
204 err = -ENOMEM;
205 goto out;
206 }
207
208 for (i = 0; i < members_count; ++i)
209 if (mgm->qp[i] == cpu_to_be32(qp->qpn)) {
210 mlx4_dbg(dev, "QP %06x already a member of MGM\n", qp->qpn);
211 err = 0;
212 goto out;
213 }
214
215 mgm->qp[members_count++] = cpu_to_be32(qp->qpn);
216 mgm->members_count = cpu_to_be32(members_count);
217
218 err = mlx4_WRITE_MCG(dev, index, mailbox);
219 if (err)
220 goto out;
221
222 if (!link)
223 goto out;
224
225 err = mlx4_READ_MCG(dev, prev, mailbox);
226 if (err)
227 goto out;
228
229 mgm->next_gid_index = cpu_to_be32(index << 6);
230
231 err = mlx4_WRITE_MCG(dev, prev, mailbox);
232 if (err)
233 goto out;
234
235out:
236 if (err && link && index != -1) {
237 if (index < dev->caps.num_mgms)
238 mlx4_warn(dev, "Got AMGM index %d < %d",
239 index, dev->caps.num_mgms);
240 else
241 mlx4_bitmap_free(&priv->mcg_table.bitmap,
242 index - dev->caps.num_mgms);
243 }
244 mutex_unlock(&priv->mcg_table.mutex);
245
246 mlx4_free_cmd_mailbox(dev, mailbox);
247 return err;
248}
249EXPORT_SYMBOL_GPL(mlx4_multicast_attach);
250
251int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16])
252{
253 struct mlx4_priv *priv = mlx4_priv(dev);
254 struct mlx4_cmd_mailbox *mailbox;
255 struct mlx4_mgm *mgm;
256 u32 members_count;
257 u16 hash;
258 int prev, index;
259 int i, loc;
260 int err;
261
262 mailbox = mlx4_alloc_cmd_mailbox(dev);
263 if (IS_ERR(mailbox))
264 return PTR_ERR(mailbox);
265 mgm = mailbox->buf;
266
267 mutex_lock(&priv->mcg_table.mutex);
268
269 err = find_mgm(dev, gid, mailbox, &hash, &prev, &index);
270 if (err)
271 goto out;
272
273 if (index == -1) {
274 mlx4_err(dev, "MGID %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x "
275 "not found\n",
276 be16_to_cpu(((__be16 *) gid)[0]),
277 be16_to_cpu(((__be16 *) gid)[1]),
278 be16_to_cpu(((__be16 *) gid)[2]),
279 be16_to_cpu(((__be16 *) gid)[3]),
280 be16_to_cpu(((__be16 *) gid)[4]),
281 be16_to_cpu(((__be16 *) gid)[5]),
282 be16_to_cpu(((__be16 *) gid)[6]),
283 be16_to_cpu(((__be16 *) gid)[7]));
284 err = -EINVAL;
285 goto out;
286 }
287
288 members_count = be32_to_cpu(mgm->members_count);
289 for (loc = -1, i = 0; i < members_count; ++i)
290 if (mgm->qp[i] == cpu_to_be32(qp->qpn))
291 loc = i;
292
293 if (loc == -1) {
294 mlx4_err(dev, "QP %06x not found in MGM\n", qp->qpn);
295 err = -EINVAL;
296 goto out;
297 }
298
299
300 mgm->members_count = cpu_to_be32(--members_count);
301 mgm->qp[loc] = mgm->qp[i - 1];
302 mgm->qp[i - 1] = 0;
303
304 err = mlx4_WRITE_MCG(dev, index, mailbox);
305 if (err)
306 goto out;
307
308 if (i != 1)
309 goto out;
310
311 if (prev == -1) {
312 /* Remove entry from MGM */
313 int amgm_index = be32_to_cpu(mgm->next_gid_index) >> 6;
314 if (amgm_index) {
315 err = mlx4_READ_MCG(dev, amgm_index, mailbox);
316 if (err)
317 goto out;
318 } else
319 memset(mgm->gid, 0, 16);
320
321 err = mlx4_WRITE_MCG(dev, index, mailbox);
322 if (err)
323 goto out;
324
325 if (amgm_index) {
326 if (amgm_index < dev->caps.num_mgms)
327 mlx4_warn(dev, "MGM entry %d had AMGM index %d < %d",
328 index, amgm_index, dev->caps.num_mgms);
329 else
330 mlx4_bitmap_free(&priv->mcg_table.bitmap,
331 amgm_index - dev->caps.num_mgms);
332 }
333 } else {
334 /* Remove entry from AMGM */
335 int cur_next_index = be32_to_cpu(mgm->next_gid_index) >> 6;
336 err = mlx4_READ_MCG(dev, prev, mailbox);
337 if (err)
338 goto out;
339
340 mgm->next_gid_index = cpu_to_be32(cur_next_index << 6);
341
342 err = mlx4_WRITE_MCG(dev, prev, mailbox);
343 if (err)
344 goto out;
345
346 if (index < dev->caps.num_mgms)
347 mlx4_warn(dev, "entry %d had next AMGM index %d < %d",
348 prev, index, dev->caps.num_mgms);
349 else
350 mlx4_bitmap_free(&priv->mcg_table.bitmap,
351 index - dev->caps.num_mgms);
352 }
353
354out:
355 mutex_unlock(&priv->mcg_table.mutex);
356
357 mlx4_free_cmd_mailbox(dev, mailbox);
358 return err;
359}
360EXPORT_SYMBOL_GPL(mlx4_multicast_detach);
361
362int __devinit mlx4_init_mcg_table(struct mlx4_dev *dev)
363{
364 struct mlx4_priv *priv = mlx4_priv(dev);
365 int err;
366
367 err = mlx4_bitmap_init(&priv->mcg_table.bitmap,
368 dev->caps.num_amgms, dev->caps.num_amgms - 1, 0);
369 if (err)
370 return err;
371
372 mutex_init(&priv->mcg_table.mutex);
373
374 return 0;
375}
376
377void mlx4_cleanup_mcg_table(struct mlx4_dev *dev)
378{
379 mlx4_bitmap_cleanup(&mlx4_priv(dev)->mcg_table.bitmap);
380}
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
new file mode 100644
index 000000000000..9befbae3d196
--- /dev/null
+++ b/drivers/net/mlx4/mlx4.h
@@ -0,0 +1,348 @@
1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4 * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
5 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
6 * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
17 *
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer.
21 *
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 */
36
37#ifndef MLX4_H
38#define MLX4_H
39
40#include <linux/radix-tree.h>
41
42#include <linux/mlx4/device.h>
43#include <linux/mlx4/doorbell.h>
44
45#define DRV_NAME "mlx4_core"
46#define PFX DRV_NAME ": "
47#define DRV_VERSION "0.01"
48#define DRV_RELDATE "May 1, 2007"
49
50enum {
51 MLX4_HCR_BASE = 0x80680,
52 MLX4_HCR_SIZE = 0x0001c,
53 MLX4_CLR_INT_SIZE = 0x00008
54};
55
56enum {
57 MLX4_BOARD_ID_LEN = 64
58};
59
60enum {
61 MLX4_MGM_ENTRY_SIZE = 0x40,
62 MLX4_QP_PER_MGM = 4 * (MLX4_MGM_ENTRY_SIZE / 16 - 2),
63 MLX4_MTT_ENTRY_PER_SEG = 8
64};
65
66enum {
67 MLX4_EQ_ASYNC,
68 MLX4_EQ_COMP,
69 MLX4_EQ_CATAS,
70 MLX4_NUM_EQ
71};
72
73enum {
74 MLX4_NUM_PDS = 1 << 15
75};
76
77enum {
78 MLX4_CMPT_TYPE_QP = 0,
79 MLX4_CMPT_TYPE_SRQ = 1,
80 MLX4_CMPT_TYPE_CQ = 2,
81 MLX4_CMPT_TYPE_EQ = 3,
82 MLX4_CMPT_NUM_TYPE
83};
84
85enum {
86 MLX4_CMPT_SHIFT = 24,
87 MLX4_NUM_CMPTS = MLX4_CMPT_NUM_TYPE << MLX4_CMPT_SHIFT
88};
89
90#ifdef CONFIG_MLX4_DEBUG
91extern int mlx4_debug_level;
92
93#define mlx4_dbg(mdev, format, arg...) \
94 do { \
95 if (mlx4_debug_level) \
96 dev_printk(KERN_DEBUG, &mdev->pdev->dev, format, ## arg); \
97 } while (0)
98
99#else /* CONFIG_MLX4_DEBUG */
100
101#define mlx4_dbg(mdev, format, arg...) do { (void) mdev; } while (0)
102
103#endif /* CONFIG_MLX4_DEBUG */
104
105#define mlx4_err(mdev, format, arg...) \
106 dev_err(&mdev->pdev->dev, format, ## arg)
107#define mlx4_info(mdev, format, arg...) \
108 dev_info(&mdev->pdev->dev, format, ## arg)
109#define mlx4_warn(mdev, format, arg...) \
110 dev_warn(&mdev->pdev->dev, format, ## arg)
111
112struct mlx4_bitmap {
113 u32 last;
114 u32 top;
115 u32 max;
116 u32 mask;
117 spinlock_t lock;
118 unsigned long *table;
119};
120
121struct mlx4_buddy {
122 unsigned long **bits;
123 int max_order;
124 spinlock_t lock;
125};
126
127struct mlx4_icm;
128
129struct mlx4_icm_table {
130 u64 virt;
131 int num_icm;
132 int num_obj;
133 int obj_size;
134 int lowmem;
135 struct mutex mutex;
136 struct mlx4_icm **icm;
137};
138
139struct mlx4_eq {
140 struct mlx4_dev *dev;
141 void __iomem *doorbell;
142 int eqn;
143 u32 cons_index;
144 u16 irq;
145 u16 have_irq;
146 int nent;
147 struct mlx4_buf_list *page_list;
148 struct mlx4_mtt mtt;
149};
150
151struct mlx4_profile {
152 int num_qp;
153 int rdmarc_per_qp;
154 int num_srq;
155 int num_cq;
156 int num_mcg;
157 int num_mpt;
158 int num_mtt;
159};
160
161struct mlx4_fw {
162 u64 clr_int_base;
163 u64 catas_offset;
164 struct mlx4_icm *fw_icm;
165 struct mlx4_icm *aux_icm;
166 u32 catas_size;
167 u16 fw_pages;
168 u8 clr_int_bar;
169 u8 catas_bar;
170};
171
172struct mlx4_cmd {
173 struct pci_pool *pool;
174 void __iomem *hcr;
175 struct mutex hcr_mutex;
176 struct semaphore poll_sem;
177 struct semaphore event_sem;
178 int max_cmds;
179 spinlock_t context_lock;
180 int free_head;
181 struct mlx4_cmd_context *context;
182 u16 token_mask;
183 u8 use_events;
184 u8 toggle;
185};
186
187struct mlx4_uar_table {
188 struct mlx4_bitmap bitmap;
189};
190
191struct mlx4_mr_table {
192 struct mlx4_bitmap mpt_bitmap;
193 struct mlx4_buddy mtt_buddy;
194 u64 mtt_base;
195 u64 mpt_base;
196 struct mlx4_icm_table mtt_table;
197 struct mlx4_icm_table dmpt_table;
198};
199
200struct mlx4_cq_table {
201 struct mlx4_bitmap bitmap;
202 spinlock_t lock;
203 struct radix_tree_root tree;
204 struct mlx4_icm_table table;
205 struct mlx4_icm_table cmpt_table;
206};
207
208struct mlx4_eq_table {
209 struct mlx4_bitmap bitmap;
210 void __iomem *clr_int;
211 void __iomem *uar_map[(MLX4_NUM_EQ + 6) / 4];
212 u32 clr_mask;
213 struct mlx4_eq eq[MLX4_NUM_EQ];
214 u64 icm_virt;
215 struct page *icm_page;
216 dma_addr_t icm_dma;
217 struct mlx4_icm_table cmpt_table;
218 int have_irq;
219 u8 inta_pin;
220};
221
222struct mlx4_srq_table {
223 struct mlx4_bitmap bitmap;
224 spinlock_t lock;
225 struct radix_tree_root tree;
226 struct mlx4_icm_table table;
227 struct mlx4_icm_table cmpt_table;
228};
229
230struct mlx4_qp_table {
231 struct mlx4_bitmap bitmap;
232 u32 rdmarc_base;
233 int rdmarc_shift;
234 spinlock_t lock;
235 struct mlx4_icm_table qp_table;
236 struct mlx4_icm_table auxc_table;
237 struct mlx4_icm_table altc_table;
238 struct mlx4_icm_table rdmarc_table;
239 struct mlx4_icm_table cmpt_table;
240};
241
242struct mlx4_mcg_table {
243 struct mutex mutex;
244 struct mlx4_bitmap bitmap;
245 struct mlx4_icm_table table;
246};
247
248struct mlx4_catas_err {
249 u32 __iomem *map;
250 int size;
251};
252
253struct mlx4_priv {
254 struct mlx4_dev dev;
255
256 struct list_head dev_list;
257 struct list_head ctx_list;
258 spinlock_t ctx_lock;
259
260 struct mlx4_fw fw;
261 struct mlx4_cmd cmd;
262
263 struct mlx4_bitmap pd_bitmap;
264 struct mlx4_uar_table uar_table;
265 struct mlx4_mr_table mr_table;
266 struct mlx4_cq_table cq_table;
267 struct mlx4_eq_table eq_table;
268 struct mlx4_srq_table srq_table;
269 struct mlx4_qp_table qp_table;
270 struct mlx4_mcg_table mcg_table;
271
272 struct mlx4_catas_err catas_err;
273
274 void __iomem *clr_base;
275
276 struct mlx4_uar driver_uar;
277 void __iomem *kar;
278 MLX4_DECLARE_DOORBELL_LOCK(doorbell_lock)
279
280 u32 rev_id;
281 char board_id[MLX4_BOARD_ID_LEN];
282};
283
284static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev)
285{
286 return container_of(dev, struct mlx4_priv, dev);
287}
288
289u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap);
290void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj);
291int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, u32 reserved);
292void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap);
293
294int mlx4_reset(struct mlx4_dev *dev);
295
296int mlx4_init_pd_table(struct mlx4_dev *dev);
297int mlx4_init_uar_table(struct mlx4_dev *dev);
298int mlx4_init_mr_table(struct mlx4_dev *dev);
299int mlx4_init_eq_table(struct mlx4_dev *dev);
300int mlx4_init_cq_table(struct mlx4_dev *dev);
301int mlx4_init_qp_table(struct mlx4_dev *dev);
302int mlx4_init_srq_table(struct mlx4_dev *dev);
303int mlx4_init_mcg_table(struct mlx4_dev *dev);
304
305void mlx4_cleanup_pd_table(struct mlx4_dev *dev);
306void mlx4_cleanup_uar_table(struct mlx4_dev *dev);
307void mlx4_cleanup_mr_table(struct mlx4_dev *dev);
308void mlx4_cleanup_eq_table(struct mlx4_dev *dev);
309void mlx4_cleanup_cq_table(struct mlx4_dev *dev);
310void mlx4_cleanup_qp_table(struct mlx4_dev *dev);
311void mlx4_cleanup_srq_table(struct mlx4_dev *dev);
312void mlx4_cleanup_mcg_table(struct mlx4_dev *dev);
313
314void mlx4_map_catas_buf(struct mlx4_dev *dev);
315void mlx4_unmap_catas_buf(struct mlx4_dev *dev);
316
317int mlx4_register_device(struct mlx4_dev *dev);
318void mlx4_unregister_device(struct mlx4_dev *dev);
319void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_event type,
320 int subtype, int port);
321
322struct mlx4_dev_cap;
323struct mlx4_init_hca_param;
324
325u64 mlx4_make_profile(struct mlx4_dev *dev,
326 struct mlx4_profile *request,
327 struct mlx4_dev_cap *dev_cap,
328 struct mlx4_init_hca_param *init_hca);
329
330int mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt);
331void mlx4_unmap_eq_icm(struct mlx4_dev *dev);
332
333int mlx4_cmd_init(struct mlx4_dev *dev);
334void mlx4_cmd_cleanup(struct mlx4_dev *dev);
335void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param);
336int mlx4_cmd_use_events(struct mlx4_dev *dev);
337void mlx4_cmd_use_polling(struct mlx4_dev *dev);
338
339void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn);
340void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type);
341
342void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type);
343
344void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type);
345
346void mlx4_handle_catas_err(struct mlx4_dev *dev);
347
348#endif /* MLX4_H */
diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c
new file mode 100644
index 000000000000..b33864dab179
--- /dev/null
+++ b/drivers/net/mlx4/mr.c
@@ -0,0 +1,479 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
4 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34
35#include <linux/init.h>
36#include <linux/errno.h>
37
38#include <linux/mlx4/cmd.h>
39
40#include "mlx4.h"
41#include "icm.h"
42
43/*
44 * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
45 */
46struct mlx4_mpt_entry {
47 __be32 flags;
48 __be32 qpn;
49 __be32 key;
50 __be32 pd;
51 __be64 start;
52 __be64 length;
53 __be32 lkey;
54 __be32 win_cnt;
55 u8 reserved1[3];
56 u8 mtt_rep;
57 __be64 mtt_seg;
58 __be32 mtt_sz;
59 __be32 entity_size;
60 __be32 first_byte_offset;
61} __attribute__((packed));
62
63#define MLX4_MPT_FLAG_SW_OWNS (0xfUL << 28)
64#define MLX4_MPT_FLAG_MIO (1 << 17)
65#define MLX4_MPT_FLAG_BIND_ENABLE (1 << 15)
66#define MLX4_MPT_FLAG_PHYSICAL (1 << 9)
67#define MLX4_MPT_FLAG_REGION (1 << 8)
68
69#define MLX4_MTT_FLAG_PRESENT 1
70
71static u32 mlx4_buddy_alloc(struct mlx4_buddy *buddy, int order)
72{
73 int o;
74 int m;
75 u32 seg;
76
77 spin_lock(&buddy->lock);
78
79 for (o = order; o <= buddy->max_order; ++o) {
80 m = 1 << (buddy->max_order - o);
81 seg = find_first_bit(buddy->bits[o], m);
82 if (seg < m)
83 goto found;
84 }
85
86 spin_unlock(&buddy->lock);
87 return -1;
88
89 found:
90 clear_bit(seg, buddy->bits[o]);
91
92 while (o > order) {
93 --o;
94 seg <<= 1;
95 set_bit(seg ^ 1, buddy->bits[o]);
96 }
97
98 spin_unlock(&buddy->lock);
99
100 seg <<= order;
101
102 return seg;
103}
104
105static void mlx4_buddy_free(struct mlx4_buddy *buddy, u32 seg, int order)
106{
107 seg >>= order;
108
109 spin_lock(&buddy->lock);
110
111 while (test_bit(seg ^ 1, buddy->bits[order])) {
112 clear_bit(seg ^ 1, buddy->bits[order]);
113 seg >>= 1;
114 ++order;
115 }
116
117 set_bit(seg, buddy->bits[order]);
118
119 spin_unlock(&buddy->lock);
120}
121
122static int __devinit mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order)
123{
124 int i, s;
125
126 buddy->max_order = max_order;
127 spin_lock_init(&buddy->lock);
128
129 buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *),
130 GFP_KERNEL);
131 if (!buddy->bits)
132 goto err_out;
133
134 for (i = 0; i <= buddy->max_order; ++i) {
135 s = BITS_TO_LONGS(1 << (buddy->max_order - i));
136 buddy->bits[i] = kmalloc(s * sizeof (long), GFP_KERNEL);
137 if (!buddy->bits[i])
138 goto err_out_free;
139 bitmap_zero(buddy->bits[i], 1 << (buddy->max_order - i));
140 }
141
142 set_bit(0, buddy->bits[buddy->max_order]);
143
144 return 0;
145
146err_out_free:
147 for (i = 0; i <= buddy->max_order; ++i)
148 kfree(buddy->bits[i]);
149
150 kfree(buddy->bits);
151
152err_out:
153 return -ENOMEM;
154}
155
156static void mlx4_buddy_cleanup(struct mlx4_buddy *buddy)
157{
158 int i;
159
160 for (i = 0; i <= buddy->max_order; ++i)
161 kfree(buddy->bits[i]);
162
163 kfree(buddy->bits);
164}
165
166static u32 mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order)
167{
168 struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
169 u32 seg;
170
171 seg = mlx4_buddy_alloc(&mr_table->mtt_buddy, order);
172 if (seg == -1)
173 return -1;
174
175 if (mlx4_table_get_range(dev, &mr_table->mtt_table, seg,
176 seg + (1 << order) - 1)) {
177 mlx4_buddy_free(&mr_table->mtt_buddy, seg, order);
178 return -1;
179 }
180
181 return seg;
182}
183
184int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift,
185 struct mlx4_mtt *mtt)
186{
187 int i;
188
189 if (!npages) {
190 mtt->order = -1;
191 mtt->page_shift = MLX4_ICM_PAGE_SHIFT;
192 return 0;
193 } else
194 mtt->page_shift = page_shift;
195
196 for (mtt->order = 0, i = MLX4_MTT_ENTRY_PER_SEG; i < npages; i <<= 1)
197 ++mtt->order;
198
199 mtt->first_seg = mlx4_alloc_mtt_range(dev, mtt->order);
200 if (mtt->first_seg == -1)
201 return -ENOMEM;
202
203 return 0;
204}
205EXPORT_SYMBOL_GPL(mlx4_mtt_init);
206
207void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
208{
209 struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
210
211 if (mtt->order < 0)
212 return;
213
214 mlx4_buddy_free(&mr_table->mtt_buddy, mtt->first_seg, mtt->order);
215 mlx4_table_put_range(dev, &mr_table->mtt_table, mtt->first_seg,
216 mtt->first_seg + (1 << mtt->order) - 1);
217}
218EXPORT_SYMBOL_GPL(mlx4_mtt_cleanup);
219
220u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
221{
222 return (u64) mtt->first_seg * dev->caps.mtt_entry_sz;
223}
224EXPORT_SYMBOL_GPL(mlx4_mtt_addr);
225
226static u32 hw_index_to_key(u32 ind)
227{
228 return (ind >> 24) | (ind << 8);
229}
230
231static u32 key_to_hw_index(u32 key)
232{
233 return (key << 24) | (key >> 8);
234}
235
236static int mlx4_SW2HW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
237 int mpt_index)
238{
239 return mlx4_cmd(dev, mailbox->dma, mpt_index, 0, MLX4_CMD_SW2HW_MPT,
240 MLX4_CMD_TIME_CLASS_B);
241}
242
243static int mlx4_HW2SW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
244 int mpt_index)
245{
246 return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, mpt_index,
247 !mailbox, MLX4_CMD_HW2SW_MPT, MLX4_CMD_TIME_CLASS_B);
248}
249
250int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access,
251 int npages, int page_shift, struct mlx4_mr *mr)
252{
253 struct mlx4_priv *priv = mlx4_priv(dev);
254 u32 index;
255 int err;
256
257 index = mlx4_bitmap_alloc(&priv->mr_table.mpt_bitmap);
258 if (index == -1) {
259 err = -ENOMEM;
260 goto err;
261 }
262
263 mr->iova = iova;
264 mr->size = size;
265 mr->pd = pd;
266 mr->access = access;
267 mr->enabled = 0;
268 mr->key = hw_index_to_key(index);
269
270 err = mlx4_mtt_init(dev, npages, page_shift, &mr->mtt);
271 if (err)
272 goto err_index;
273
274 return 0;
275
276err_index:
277 mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, index);
278
279err:
280 kfree(mr);
281 return err;
282}
283EXPORT_SYMBOL_GPL(mlx4_mr_alloc);
284
285void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr)
286{
287 struct mlx4_priv *priv = mlx4_priv(dev);
288 int err;
289
290 if (mr->enabled) {
291 err = mlx4_HW2SW_MPT(dev, NULL,
292 key_to_hw_index(mr->key) &
293 (dev->caps.num_mpts - 1));
294 if (err)
295 mlx4_warn(dev, "HW2SW_MPT failed (%d)\n", err);
296 }
297
298 mlx4_mtt_cleanup(dev, &mr->mtt);
299 mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, key_to_hw_index(mr->key));
300}
301EXPORT_SYMBOL_GPL(mlx4_mr_free);
302
303int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
304{
305 struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
306 struct mlx4_cmd_mailbox *mailbox;
307 struct mlx4_mpt_entry *mpt_entry;
308 int err;
309
310 err = mlx4_table_get(dev, &mr_table->dmpt_table, key_to_hw_index(mr->key));
311 if (err)
312 return err;
313
314 mailbox = mlx4_alloc_cmd_mailbox(dev);
315 if (IS_ERR(mailbox)) {
316 err = PTR_ERR(mailbox);
317 goto err_table;
318 }
319 mpt_entry = mailbox->buf;
320
321 memset(mpt_entry, 0, sizeof *mpt_entry);
322
323 mpt_entry->flags = cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS |
324 MLX4_MPT_FLAG_MIO |
325 MLX4_MPT_FLAG_REGION |
326 mr->access);
327 if (mr->mtt.order < 0)
328 mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL);
329
330 mpt_entry->key = cpu_to_be32(key_to_hw_index(mr->key));
331 mpt_entry->pd = cpu_to_be32(mr->pd);
332 mpt_entry->start = cpu_to_be64(mr->iova);
333 mpt_entry->length = cpu_to_be64(mr->size);
334 mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift);
335 mpt_entry->mtt_seg = cpu_to_be64(mlx4_mtt_addr(dev, &mr->mtt));
336
337 err = mlx4_SW2HW_MPT(dev, mailbox,
338 key_to_hw_index(mr->key) & (dev->caps.num_mpts - 1));
339 if (err) {
340 mlx4_warn(dev, "SW2HW_MPT failed (%d)\n", err);
341 goto err_cmd;
342 }
343
344 mr->enabled = 1;
345
346 mlx4_free_cmd_mailbox(dev, mailbox);
347
348 return 0;
349
350err_cmd:
351 mlx4_free_cmd_mailbox(dev, mailbox);
352
353err_table:
354 mlx4_table_put(dev, &mr_table->dmpt_table, key_to_hw_index(mr->key));
355 return err;
356}
357EXPORT_SYMBOL_GPL(mlx4_mr_enable);
358
359static int mlx4_WRITE_MTT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
360 int num_mtt)
361{
362 return mlx4_cmd(dev, mailbox->dma, num_mtt, 0, MLX4_CMD_WRITE_MTT,
363 MLX4_CMD_TIME_CLASS_B);
364}
365
366int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
367 int start_index, int npages, u64 *page_list)
368{
369 struct mlx4_cmd_mailbox *mailbox;
370 __be64 *mtt_entry;
371 int i;
372 int err = 0;
373
374 if (mtt->order < 0)
375 return -EINVAL;
376
377 mailbox = mlx4_alloc_cmd_mailbox(dev);
378 if (IS_ERR(mailbox))
379 return PTR_ERR(mailbox);
380
381 mtt_entry = mailbox->buf;
382
383 while (npages > 0) {
384 mtt_entry[0] = cpu_to_be64(mlx4_mtt_addr(dev, mtt) + start_index * 8);
385 mtt_entry[1] = 0;
386
387 for (i = 0; i < npages && i < MLX4_MAILBOX_SIZE / 8 - 2; ++i)
388 mtt_entry[i + 2] = cpu_to_be64(page_list[i] |
389 MLX4_MTT_FLAG_PRESENT);
390
391 /*
392 * If we have an odd number of entries to write, add
393 * one more dummy entry for firmware efficiency.
394 */
395 if (i & 1)
396 mtt_entry[i + 2] = 0;
397
398 err = mlx4_WRITE_MTT(dev, mailbox, (i + 1) & ~1);
399 if (err)
400 goto out;
401
402 npages -= i;
403 start_index += i;
404 page_list += i;
405 }
406
407out:
408 mlx4_free_cmd_mailbox(dev, mailbox);
409
410 return err;
411}
412EXPORT_SYMBOL_GPL(mlx4_write_mtt);
413
414int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
415 struct mlx4_buf *buf)
416{
417 u64 *page_list;
418 int err;
419 int i;
420
421 page_list = kmalloc(buf->npages * sizeof *page_list, GFP_KERNEL);
422 if (!page_list)
423 return -ENOMEM;
424
425 for (i = 0; i < buf->npages; ++i)
426 if (buf->nbufs == 1)
427 page_list[i] = buf->u.direct.map + (i << buf->page_shift);
428 else
429 page_list[i] = buf->u.page_list[i].map;
430
431 err = mlx4_write_mtt(dev, mtt, 0, buf->npages, page_list);
432
433 kfree(page_list);
434 return err;
435}
436EXPORT_SYMBOL_GPL(mlx4_buf_write_mtt);
437
438int __devinit mlx4_init_mr_table(struct mlx4_dev *dev)
439{
440 struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
441 int err;
442
443 err = mlx4_bitmap_init(&mr_table->mpt_bitmap, dev->caps.num_mpts,
444 ~0, dev->caps.reserved_mrws);
445 if (err)
446 return err;
447
448 err = mlx4_buddy_init(&mr_table->mtt_buddy,
449 ilog2(dev->caps.num_mtt_segs));
450 if (err)
451 goto err_buddy;
452
453 if (dev->caps.reserved_mtts) {
454 if (mlx4_alloc_mtt_range(dev, ilog2(dev->caps.reserved_mtts)) == -1) {
455 mlx4_warn(dev, "MTT table of order %d is too small.\n",
456 mr_table->mtt_buddy.max_order);
457 err = -ENOMEM;
458 goto err_reserve_mtts;
459 }
460 }
461
462 return 0;
463
464err_reserve_mtts:
465 mlx4_buddy_cleanup(&mr_table->mtt_buddy);
466
467err_buddy:
468 mlx4_bitmap_cleanup(&mr_table->mpt_bitmap);
469
470 return err;
471}
472
473void mlx4_cleanup_mr_table(struct mlx4_dev *dev)
474{
475 struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
476
477 mlx4_buddy_cleanup(&mr_table->mtt_buddy);
478 mlx4_bitmap_cleanup(&mr_table->mpt_bitmap);
479}
diff --git a/drivers/net/mlx4/pd.c b/drivers/net/mlx4/pd.c
new file mode 100644
index 000000000000..23dea1ee7750
--- /dev/null
+++ b/drivers/net/mlx4/pd.c
@@ -0,0 +1,102 @@
1/*
2 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
3 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/init.h>
35#include <linux/errno.h>
36
37#include <asm/page.h>
38
39#include "mlx4.h"
40#include "icm.h"
41
42int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn)
43{
44 struct mlx4_priv *priv = mlx4_priv(dev);
45
46 *pdn = mlx4_bitmap_alloc(&priv->pd_bitmap);
47 if (*pdn == -1)
48 return -ENOMEM;
49
50 return 0;
51}
52EXPORT_SYMBOL_GPL(mlx4_pd_alloc);
53
54void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn)
55{
56 mlx4_bitmap_free(&mlx4_priv(dev)->pd_bitmap, pdn);
57}
58EXPORT_SYMBOL_GPL(mlx4_pd_free);
59
60int __devinit mlx4_init_pd_table(struct mlx4_dev *dev)
61{
62 struct mlx4_priv *priv = mlx4_priv(dev);
63
64 return mlx4_bitmap_init(&priv->pd_bitmap, dev->caps.num_pds,
65 (1 << 24) - 1, dev->caps.reserved_pds);
66}
67
68void mlx4_cleanup_pd_table(struct mlx4_dev *dev)
69{
70 mlx4_bitmap_cleanup(&mlx4_priv(dev)->pd_bitmap);
71}
72
73
74int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar)
75{
76 uar->index = mlx4_bitmap_alloc(&mlx4_priv(dev)->uar_table.bitmap);
77 if (uar->index == -1)
78 return -ENOMEM;
79
80 uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + uar->index;
81
82 return 0;
83}
84EXPORT_SYMBOL_GPL(mlx4_uar_alloc);
85
86void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar)
87{
88 mlx4_bitmap_free(&mlx4_priv(dev)->uar_table.bitmap, uar->index);
89}
90EXPORT_SYMBOL_GPL(mlx4_uar_free);
91
92int mlx4_init_uar_table(struct mlx4_dev *dev)
93{
94 return mlx4_bitmap_init(&mlx4_priv(dev)->uar_table.bitmap,
95 dev->caps.num_uars, dev->caps.num_uars - 1,
96 max(128, dev->caps.reserved_uars));
97}
98
99void mlx4_cleanup_uar_table(struct mlx4_dev *dev)
100{
101 mlx4_bitmap_cleanup(&mlx4_priv(dev)->uar_table.bitmap);
102}
diff --git a/drivers/net/mlx4/profile.c b/drivers/net/mlx4/profile.c
new file mode 100644
index 000000000000..9ca42b213d54
--- /dev/null
+++ b/drivers/net/mlx4/profile.c
@@ -0,0 +1,238 @@
1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
4 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34
35#include <linux/init.h>
36
37#include "mlx4.h"
38#include "fw.h"
39
40enum {
41 MLX4_RES_QP,
42 MLX4_RES_RDMARC,
43 MLX4_RES_ALTC,
44 MLX4_RES_AUXC,
45 MLX4_RES_SRQ,
46 MLX4_RES_CQ,
47 MLX4_RES_EQ,
48 MLX4_RES_DMPT,
49 MLX4_RES_CMPT,
50 MLX4_RES_MTT,
51 MLX4_RES_MCG,
52 MLX4_RES_NUM
53};
54
55static const char *res_name[] = {
56 [MLX4_RES_QP] = "QP",
57 [MLX4_RES_RDMARC] = "RDMARC",
58 [MLX4_RES_ALTC] = "ALTC",
59 [MLX4_RES_AUXC] = "AUXC",
60 [MLX4_RES_SRQ] = "SRQ",
61 [MLX4_RES_CQ] = "CQ",
62 [MLX4_RES_EQ] = "EQ",
63 [MLX4_RES_DMPT] = "DMPT",
64 [MLX4_RES_CMPT] = "CMPT",
65 [MLX4_RES_MTT] = "MTT",
66 [MLX4_RES_MCG] = "MCG",
67};
68
69u64 mlx4_make_profile(struct mlx4_dev *dev,
70 struct mlx4_profile *request,
71 struct mlx4_dev_cap *dev_cap,
72 struct mlx4_init_hca_param *init_hca)
73{
74 struct mlx4_priv *priv = mlx4_priv(dev);
75 struct mlx4_resource {
76 u64 size;
77 u64 start;
78 int type;
79 int num;
80 int log_num;
81 };
82
83 u64 total_size = 0;
84 struct mlx4_resource *profile;
85 struct mlx4_resource tmp;
86 int i, j;
87
88 profile = kzalloc(MLX4_RES_NUM * sizeof *profile, GFP_KERNEL);
89 if (!profile)
90 return -ENOMEM;
91
92 profile[MLX4_RES_QP].size = dev_cap->qpc_entry_sz;
93 profile[MLX4_RES_RDMARC].size = dev_cap->rdmarc_entry_sz;
94 profile[MLX4_RES_ALTC].size = dev_cap->altc_entry_sz;
95 profile[MLX4_RES_AUXC].size = dev_cap->aux_entry_sz;
96 profile[MLX4_RES_SRQ].size = dev_cap->srq_entry_sz;
97 profile[MLX4_RES_CQ].size = dev_cap->cqc_entry_sz;
98 profile[MLX4_RES_EQ].size = dev_cap->eqc_entry_sz;
99 profile[MLX4_RES_DMPT].size = dev_cap->dmpt_entry_sz;
100 profile[MLX4_RES_CMPT].size = dev_cap->cmpt_entry_sz;
101 profile[MLX4_RES_MTT].size = MLX4_MTT_ENTRY_PER_SEG * dev_cap->mtt_entry_sz;
102 profile[MLX4_RES_MCG].size = MLX4_MGM_ENTRY_SIZE;
103
104 profile[MLX4_RES_QP].num = request->num_qp;
105 profile[MLX4_RES_RDMARC].num = request->num_qp * request->rdmarc_per_qp;
106 profile[MLX4_RES_ALTC].num = request->num_qp;
107 profile[MLX4_RES_AUXC].num = request->num_qp;
108 profile[MLX4_RES_SRQ].num = request->num_srq;
109 profile[MLX4_RES_CQ].num = request->num_cq;
110 profile[MLX4_RES_EQ].num = MLX4_NUM_EQ + dev_cap->reserved_eqs;
111 profile[MLX4_RES_DMPT].num = request->num_mpt;
112 profile[MLX4_RES_CMPT].num = MLX4_NUM_CMPTS;
113 profile[MLX4_RES_MTT].num = request->num_mtt;
114 profile[MLX4_RES_MCG].num = request->num_mcg;
115
116 for (i = 0; i < MLX4_RES_NUM; ++i) {
117 profile[i].type = i;
118 profile[i].num = roundup_pow_of_two(profile[i].num);
119 profile[i].log_num = ilog2(profile[i].num);
120 profile[i].size *= profile[i].num;
121 profile[i].size = max(profile[i].size, (u64) PAGE_SIZE);
122 }
123
124 /*
125 * Sort the resources in decreasing order of size. Since they
126 * all have sizes that are powers of 2, we'll be able to keep
127 * resources aligned to their size and pack them without gaps
128 * using the sorted order.
129 */
130 for (i = MLX4_RES_NUM; i > 0; --i)
131 for (j = 1; j < i; ++j) {
132 if (profile[j].size > profile[j - 1].size) {
133 tmp = profile[j];
134 profile[j] = profile[j - 1];
135 profile[j - 1] = tmp;
136 }
137 }
138
139 for (i = 0; i < MLX4_RES_NUM; ++i) {
140 if (profile[i].size) {
141 profile[i].start = total_size;
142 total_size += profile[i].size;
143 }
144
145 if (total_size > dev_cap->max_icm_sz) {
146 mlx4_err(dev, "Profile requires 0x%llx bytes; "
147 "won't fit in 0x%llx bytes of context memory.\n",
148 (unsigned long long) total_size,
149 (unsigned long long) dev_cap->max_icm_sz);
150 kfree(profile);
151 return -ENOMEM;
152 }
153
154 if (profile[i].size)
155 mlx4_dbg(dev, " profile[%2d] (%6s): 2^%02d entries @ 0x%10llx, "
156 "size 0x%10llx\n",
157 i, res_name[profile[i].type], profile[i].log_num,
158 (unsigned long long) profile[i].start,
159 (unsigned long long) profile[i].size);
160 }
161
162 mlx4_dbg(dev, "HCA context memory: reserving %d KB\n",
163 (int) (total_size >> 10));
164
165 for (i = 0; i < MLX4_RES_NUM; ++i) {
166 switch (profile[i].type) {
167 case MLX4_RES_QP:
168 dev->caps.num_qps = profile[i].num;
169 init_hca->qpc_base = profile[i].start;
170 init_hca->log_num_qps = profile[i].log_num;
171 break;
172 case MLX4_RES_RDMARC:
173 for (priv->qp_table.rdmarc_shift = 0;
174 request->num_qp << priv->qp_table.rdmarc_shift < profile[i].num;
175 ++priv->qp_table.rdmarc_shift)
176 ; /* nothing */
177 dev->caps.max_qp_dest_rdma = 1 << priv->qp_table.rdmarc_shift;
178 priv->qp_table.rdmarc_base = (u32) profile[i].start;
179 init_hca->rdmarc_base = profile[i].start;
180 init_hca->log_rd_per_qp = priv->qp_table.rdmarc_shift;
181 break;
182 case MLX4_RES_ALTC:
183 init_hca->altc_base = profile[i].start;
184 break;
185 case MLX4_RES_AUXC:
186 init_hca->auxc_base = profile[i].start;
187 break;
188 case MLX4_RES_SRQ:
189 dev->caps.num_srqs = profile[i].num;
190 init_hca->srqc_base = profile[i].start;
191 init_hca->log_num_srqs = profile[i].log_num;
192 break;
193 case MLX4_RES_CQ:
194 dev->caps.num_cqs = profile[i].num;
195 init_hca->cqc_base = profile[i].start;
196 init_hca->log_num_cqs = profile[i].log_num;
197 break;
198 case MLX4_RES_EQ:
199 dev->caps.num_eqs = profile[i].num;
200 init_hca->eqc_base = profile[i].start;
201 init_hca->log_num_eqs = profile[i].log_num;
202 break;
203 case MLX4_RES_DMPT:
204 dev->caps.num_mpts = profile[i].num;
205 priv->mr_table.mpt_base = profile[i].start;
206 init_hca->dmpt_base = profile[i].start;
207 init_hca->log_mpt_sz = profile[i].log_num;
208 break;
209 case MLX4_RES_CMPT:
210 init_hca->cmpt_base = profile[i].start;
211 break;
212 case MLX4_RES_MTT:
213 dev->caps.num_mtt_segs = profile[i].num;
214 priv->mr_table.mtt_base = profile[i].start;
215 init_hca->mtt_base = profile[i].start;
216 break;
217 case MLX4_RES_MCG:
218 dev->caps.num_mgms = profile[i].num >> 1;
219 dev->caps.num_amgms = profile[i].num >> 1;
220 init_hca->mc_base = profile[i].start;
221 init_hca->log_mc_entry_sz = ilog2(MLX4_MGM_ENTRY_SIZE);
222 init_hca->log_mc_table_sz = profile[i].log_num;
223 init_hca->log_mc_hash_sz = profile[i].log_num - 1;
224 break;
225 default:
226 break;
227 }
228 }
229
230 /*
231 * PDs don't take any HCA memory, but we assign them as part
232 * of the HCA profile anyway.
233 */
234 dev->caps.num_pds = MLX4_NUM_PDS;
235
236 kfree(profile);
237 return total_size;
238}
diff --git a/drivers/net/mlx4/qp.c b/drivers/net/mlx4/qp.c
new file mode 100644
index 000000000000..7f8b7d55b6e1
--- /dev/null
+++ b/drivers/net/mlx4/qp.c
@@ -0,0 +1,280 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5 * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer.
20 *
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36#include <linux/init.h>
37
38#include <linux/mlx4/cmd.h>
39#include <linux/mlx4/qp.h>
40
41#include "mlx4.h"
42#include "icm.h"
43
44void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type)
45{
46 struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
47 struct mlx4_qp *qp;
48
49 spin_lock(&qp_table->lock);
50
51 qp = __mlx4_qp_lookup(dev, qpn);
52 if (qp)
53 atomic_inc(&qp->refcount);
54
55 spin_unlock(&qp_table->lock);
56
57 if (!qp) {
58 mlx4_warn(dev, "Async event for bogus QP %08x\n", qpn);
59 return;
60 }
61
62 qp->event(qp, event_type);
63
64 if (atomic_dec_and_test(&qp->refcount))
65 complete(&qp->free);
66}
67
68int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
69 enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state,
70 struct mlx4_qp_context *context, enum mlx4_qp_optpar optpar,
71 int sqd_event, struct mlx4_qp *qp)
72{
73 static const u16 op[MLX4_QP_NUM_STATE][MLX4_QP_NUM_STATE] = {
74 [MLX4_QP_STATE_RST] = {
75 [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
76 [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
77 [MLX4_QP_STATE_INIT] = MLX4_CMD_RST2INIT_QP,
78 },
79 [MLX4_QP_STATE_INIT] = {
80 [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
81 [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
82 [MLX4_QP_STATE_INIT] = MLX4_CMD_INIT2INIT_QP,
83 [MLX4_QP_STATE_RTR] = MLX4_CMD_INIT2RTR_QP,
84 },
85 [MLX4_QP_STATE_RTR] = {
86 [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
87 [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
88 [MLX4_QP_STATE_RTS] = MLX4_CMD_RTR2RTS_QP,
89 },
90 [MLX4_QP_STATE_RTS] = {
91 [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
92 [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
93 [MLX4_QP_STATE_RTS] = MLX4_CMD_RTS2RTS_QP,
94 [MLX4_QP_STATE_SQD] = MLX4_CMD_RTS2SQD_QP,
95 },
96 [MLX4_QP_STATE_SQD] = {
97 [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
98 [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
99 [MLX4_QP_STATE_RTS] = MLX4_CMD_SQD2RTS_QP,
100 [MLX4_QP_STATE_SQD] = MLX4_CMD_SQD2SQD_QP,
101 },
102 [MLX4_QP_STATE_SQER] = {
103 [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
104 [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
105 [MLX4_QP_STATE_RTS] = MLX4_CMD_SQERR2RTS_QP,
106 },
107 [MLX4_QP_STATE_ERR] = {
108 [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
109 [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
110 }
111 };
112
113 struct mlx4_cmd_mailbox *mailbox;
114 int ret = 0;
115
116 if (cur_state < 0 || cur_state >= MLX4_QP_NUM_STATE ||
117 new_state < 0 || cur_state >= MLX4_QP_NUM_STATE ||
118 !op[cur_state][new_state])
119 return -EINVAL;
120
121 if (op[cur_state][new_state] == MLX4_CMD_2RST_QP)
122 return mlx4_cmd(dev, 0, qp->qpn, 2,
123 MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A);
124
125 mailbox = mlx4_alloc_cmd_mailbox(dev);
126 if (IS_ERR(mailbox))
127 return PTR_ERR(mailbox);
128
129 if (cur_state == MLX4_QP_STATE_RST && new_state == MLX4_QP_STATE_INIT) {
130 u64 mtt_addr = mlx4_mtt_addr(dev, mtt);
131 context->mtt_base_addr_h = mtt_addr >> 32;
132 context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff);
133 context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
134 }
135
136 *(__be32 *) mailbox->buf = cpu_to_be32(optpar);
137 memcpy(mailbox->buf + 8, context, sizeof *context);
138
139 ((struct mlx4_qp_context *) (mailbox->buf + 8))->local_qpn =
140 cpu_to_be32(qp->qpn);
141
142 ret = mlx4_cmd(dev, mailbox->dma, qp->qpn | (!!sqd_event << 31),
143 new_state == MLX4_QP_STATE_RST ? 2 : 0,
144 op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C);
145
146 mlx4_free_cmd_mailbox(dev, mailbox);
147 return ret;
148}
149EXPORT_SYMBOL_GPL(mlx4_qp_modify);
150
151int mlx4_qp_alloc(struct mlx4_dev *dev, int sqpn, struct mlx4_qp *qp)
152{
153 struct mlx4_priv *priv = mlx4_priv(dev);
154 struct mlx4_qp_table *qp_table = &priv->qp_table;
155 int err;
156
157 if (sqpn)
158 qp->qpn = sqpn;
159 else {
160 qp->qpn = mlx4_bitmap_alloc(&qp_table->bitmap);
161 if (qp->qpn == -1)
162 return -ENOMEM;
163 }
164
165 err = mlx4_table_get(dev, &qp_table->qp_table, qp->qpn);
166 if (err)
167 goto err_out;
168
169 err = mlx4_table_get(dev, &qp_table->auxc_table, qp->qpn);
170 if (err)
171 goto err_put_qp;
172
173 err = mlx4_table_get(dev, &qp_table->altc_table, qp->qpn);
174 if (err)
175 goto err_put_auxc;
176
177 err = mlx4_table_get(dev, &qp_table->rdmarc_table, qp->qpn);
178 if (err)
179 goto err_put_altc;
180
181 err = mlx4_table_get(dev, &qp_table->cmpt_table, qp->qpn);
182 if (err)
183 goto err_put_rdmarc;
184
185 spin_lock_irq(&qp_table->lock);
186 err = radix_tree_insert(&dev->qp_table_tree, qp->qpn & (dev->caps.num_qps - 1), qp);
187 spin_unlock_irq(&qp_table->lock);
188 if (err)
189 goto err_put_cmpt;
190
191 atomic_set(&qp->refcount, 1);
192 init_completion(&qp->free);
193
194 return 0;
195
196err_put_cmpt:
197 mlx4_table_put(dev, &qp_table->cmpt_table, qp->qpn);
198
199err_put_rdmarc:
200 mlx4_table_put(dev, &qp_table->rdmarc_table, qp->qpn);
201
202err_put_altc:
203 mlx4_table_put(dev, &qp_table->altc_table, qp->qpn);
204
205err_put_auxc:
206 mlx4_table_put(dev, &qp_table->auxc_table, qp->qpn);
207
208err_put_qp:
209 mlx4_table_put(dev, &qp_table->qp_table, qp->qpn);
210
211err_out:
212 if (!sqpn)
213 mlx4_bitmap_free(&qp_table->bitmap, qp->qpn);
214
215 return err;
216}
217EXPORT_SYMBOL_GPL(mlx4_qp_alloc);
218
219void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp)
220{
221 struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
222 unsigned long flags;
223
224 spin_lock_irqsave(&qp_table->lock, flags);
225 radix_tree_delete(&dev->qp_table_tree, qp->qpn & (dev->caps.num_qps - 1));
226 spin_unlock_irqrestore(&qp_table->lock, flags);
227}
228EXPORT_SYMBOL_GPL(mlx4_qp_remove);
229
230void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp)
231{
232 struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
233
234 if (atomic_dec_and_test(&qp->refcount))
235 complete(&qp->free);
236 wait_for_completion(&qp->free);
237
238 mlx4_table_put(dev, &qp_table->cmpt_table, qp->qpn);
239 mlx4_table_put(dev, &qp_table->rdmarc_table, qp->qpn);
240 mlx4_table_put(dev, &qp_table->altc_table, qp->qpn);
241 mlx4_table_put(dev, &qp_table->auxc_table, qp->qpn);
242 mlx4_table_put(dev, &qp_table->qp_table, qp->qpn);
243
244 mlx4_bitmap_free(&qp_table->bitmap, qp->qpn);
245}
246EXPORT_SYMBOL_GPL(mlx4_qp_free);
247
248static int mlx4_CONF_SPECIAL_QP(struct mlx4_dev *dev, u32 base_qpn)
249{
250 return mlx4_cmd(dev, 0, base_qpn, 0, MLX4_CMD_CONF_SPECIAL_QP,
251 MLX4_CMD_TIME_CLASS_B);
252}
253
254int __devinit mlx4_init_qp_table(struct mlx4_dev *dev)
255{
256 struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
257 int err;
258
259 spin_lock_init(&qp_table->lock);
260 INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC);
261
262 /*
263 * We reserve 2 extra QPs per port for the special QPs. The
264 * block of special QPs must be aligned to a multiple of 8, so
265 * round up.
266 */
267 dev->caps.sqp_start = ALIGN(dev->caps.reserved_qps, 8);
268 err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps,
269 (1 << 24) - 1, dev->caps.sqp_start + 8);
270 if (err)
271 return err;
272
273 return mlx4_CONF_SPECIAL_QP(dev, dev->caps.sqp_start);
274}
275
276void mlx4_cleanup_qp_table(struct mlx4_dev *dev)
277{
278 mlx4_CONF_SPECIAL_QP(dev, 0);
279 mlx4_bitmap_cleanup(&mlx4_priv(dev)->qp_table.bitmap);
280}
diff --git a/drivers/net/mlx4/reset.c b/drivers/net/mlx4/reset.c
new file mode 100644
index 000000000000..51eef8492e93
--- /dev/null
+++ b/drivers/net/mlx4/reset.c
@@ -0,0 +1,181 @@
1/*
2 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/init.h>
34#include <linux/errno.h>
35#include <linux/pci.h>
36#include <linux/delay.h>
37#include <linux/slab.h>
38
39#include "mlx4.h"
40
41int mlx4_reset(struct mlx4_dev *dev)
42{
43 void __iomem *reset;
44 u32 *hca_header = NULL;
45 int pcie_cap;
46 u16 devctl;
47 u16 linkctl;
48 u16 vendor;
49 unsigned long end;
50 u32 sem;
51 int i;
52 int err = 0;
53
54#define MLX4_RESET_BASE 0xf0000
55#define MLX4_RESET_SIZE 0x400
56#define MLX4_SEM_OFFSET 0x3fc
57#define MLX4_RESET_OFFSET 0x10
58#define MLX4_RESET_VALUE swab32(1)
59
60#define MLX4_SEM_TIMEOUT_JIFFIES (10 * HZ)
61#define MLX4_RESET_TIMEOUT_JIFFIES (2 * HZ)
62
63 /*
64 * Reset the chip. This is somewhat ugly because we have to
65 * save off the PCI header before reset and then restore it
66 * after the chip reboots. We skip config space offsets 22
67 * and 23 since those have a special meaning.
68 */
69
70 /* Do we need to save off the full 4K PCI Express header?? */
71 hca_header = kmalloc(256, GFP_KERNEL);
72 if (!hca_header) {
73 err = -ENOMEM;
74 mlx4_err(dev, "Couldn't allocate memory to save HCA "
75 "PCI header, aborting.\n");
76 goto out;
77 }
78
79 pcie_cap = pci_find_capability(dev->pdev, PCI_CAP_ID_EXP);
80
81 for (i = 0; i < 64; ++i) {
82 if (i == 22 || i == 23)
83 continue;
84 if (pci_read_config_dword(dev->pdev, i * 4, hca_header + i)) {
85 err = -ENODEV;
86 mlx4_err(dev, "Couldn't save HCA "
87 "PCI header, aborting.\n");
88 goto out;
89 }
90 }
91
92 reset = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_RESET_BASE,
93 MLX4_RESET_SIZE);
94 if (!reset) {
95 err = -ENOMEM;
96 mlx4_err(dev, "Couldn't map HCA reset register, aborting.\n");
97 goto out;
98 }
99
100 /* grab HW semaphore to lock out flash updates */
101 end = jiffies + MLX4_SEM_TIMEOUT_JIFFIES;
102 do {
103 sem = readl(reset + MLX4_SEM_OFFSET);
104 if (!sem)
105 break;
106
107 msleep(1);
108 } while (time_before(jiffies, end));
109
110 if (sem) {
111 mlx4_err(dev, "Failed to obtain HW semaphore, aborting\n");
112 err = -EAGAIN;
113 iounmap(reset);
114 goto out;
115 }
116
117 /* actually hit reset */
118 writel(MLX4_RESET_VALUE, reset + MLX4_RESET_OFFSET);
119 iounmap(reset);
120
121 end = jiffies + MLX4_RESET_TIMEOUT_JIFFIES;
122 do {
123 if (!pci_read_config_word(dev->pdev, PCI_VENDOR_ID, &vendor) &&
124 vendor != 0xffff)
125 break;
126
127 msleep(1);
128 } while (time_before(jiffies, end));
129
130 if (vendor == 0xffff) {
131 err = -ENODEV;
132 mlx4_err(dev, "PCI device did not come back after reset, "
133 "aborting.\n");
134 goto out;
135 }
136
137 /* Now restore the PCI headers */
138 if (pcie_cap) {
139 devctl = hca_header[(pcie_cap + PCI_EXP_DEVCTL) / 4];
140 if (pci_write_config_word(dev->pdev, pcie_cap + PCI_EXP_DEVCTL,
141 devctl)) {
142 err = -ENODEV;
143 mlx4_err(dev, "Couldn't restore HCA PCI Express "
144 "Device Control register, aborting.\n");
145 goto out;
146 }
147 linkctl = hca_header[(pcie_cap + PCI_EXP_LNKCTL) / 4];
148 if (pci_write_config_word(dev->pdev, pcie_cap + PCI_EXP_LNKCTL,
149 linkctl)) {
150 err = -ENODEV;
151 mlx4_err(dev, "Couldn't restore HCA PCI Express "
152 "Link control register, aborting.\n");
153 goto out;
154 }
155 }
156
157 for (i = 0; i < 16; ++i) {
158 if (i * 4 == PCI_COMMAND)
159 continue;
160
161 if (pci_write_config_dword(dev->pdev, i * 4, hca_header[i])) {
162 err = -ENODEV;
163 mlx4_err(dev, "Couldn't restore HCA reg %x, "
164 "aborting.\n", i);
165 goto out;
166 }
167 }
168
169 if (pci_write_config_dword(dev->pdev, PCI_COMMAND,
170 hca_header[PCI_COMMAND / 4])) {
171 err = -ENODEV;
172 mlx4_err(dev, "Couldn't restore HCA COMMAND, "
173 "aborting.\n");
174 goto out;
175 }
176
177out:
178 kfree(hca_header);
179
180 return err;
181}
diff --git a/drivers/net/mlx4/srq.c b/drivers/net/mlx4/srq.c
new file mode 100644
index 000000000000..2134f83aed87
--- /dev/null
+++ b/drivers/net/mlx4/srq.c
@@ -0,0 +1,227 @@
1/*
2 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/init.h>
34
35#include <linux/mlx4/cmd.h>
36
37#include "mlx4.h"
38#include "icm.h"
39
40struct mlx4_srq_context {
41 __be32 state_logsize_srqn;
42 u8 logstride;
43 u8 reserved1[3];
44 u8 pg_offset;
45 u8 reserved2[3];
46 u32 reserved3;
47 u8 log_page_size;
48 u8 reserved4[2];
49 u8 mtt_base_addr_h;
50 __be32 mtt_base_addr_l;
51 __be32 pd;
52 __be16 limit_watermark;
53 __be16 wqe_cnt;
54 u16 reserved5;
55 __be16 wqe_counter;
56 u32 reserved6;
57 __be64 db_rec_addr;
58};
59
60void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type)
61{
62 struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
63 struct mlx4_srq *srq;
64
65 spin_lock(&srq_table->lock);
66
67 srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1));
68 if (srq)
69 atomic_inc(&srq->refcount);
70
71 spin_unlock(&srq_table->lock);
72
73 if (!srq) {
74 mlx4_warn(dev, "Async event for bogus SRQ %08x\n", srqn);
75 return;
76 }
77
78 srq->event(srq, event_type);
79
80 if (atomic_dec_and_test(&srq->refcount))
81 complete(&srq->free);
82}
83
84static int mlx4_SW2HW_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
85 int srq_num)
86{
87 return mlx4_cmd(dev, mailbox->dma, srq_num, 0, MLX4_CMD_SW2HW_SRQ,
88 MLX4_CMD_TIME_CLASS_A);
89}
90
91static int mlx4_HW2SW_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
92 int srq_num)
93{
94 return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, srq_num,
95 mailbox ? 0 : 1, MLX4_CMD_HW2SW_SRQ,
96 MLX4_CMD_TIME_CLASS_A);
97}
98
99static int mlx4_ARM_SRQ(struct mlx4_dev *dev, int srq_num, int limit_watermark)
100{
101 return mlx4_cmd(dev, limit_watermark, srq_num, 0, MLX4_CMD_ARM_SRQ,
102 MLX4_CMD_TIME_CLASS_B);
103}
104
105int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt,
106 u64 db_rec, struct mlx4_srq *srq)
107{
108 struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
109 struct mlx4_cmd_mailbox *mailbox;
110 struct mlx4_srq_context *srq_context;
111 u64 mtt_addr;
112 int err;
113
114 srq->srqn = mlx4_bitmap_alloc(&srq_table->bitmap);
115 if (srq->srqn == -1)
116 return -ENOMEM;
117
118 err = mlx4_table_get(dev, &srq_table->table, srq->srqn);
119 if (err)
120 goto err_out;
121
122 err = mlx4_table_get(dev, &srq_table->cmpt_table, srq->srqn);
123 if (err)
124 goto err_put;
125
126 spin_lock_irq(&srq_table->lock);
127 err = radix_tree_insert(&srq_table->tree, srq->srqn, srq);
128 spin_unlock_irq(&srq_table->lock);
129 if (err)
130 goto err_cmpt_put;
131
132 mailbox = mlx4_alloc_cmd_mailbox(dev);
133 if (IS_ERR(mailbox)) {
134 err = PTR_ERR(mailbox);
135 goto err_radix;
136 }
137
138 srq_context = mailbox->buf;
139 memset(srq_context, 0, sizeof *srq_context);
140
141 srq_context->state_logsize_srqn = cpu_to_be32((ilog2(srq->max) << 24) |
142 srq->srqn);
143 srq_context->logstride = srq->wqe_shift - 4;
144 srq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
145
146 mtt_addr = mlx4_mtt_addr(dev, mtt);
147 srq_context->mtt_base_addr_h = mtt_addr >> 32;
148 srq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff);
149 srq_context->pd = cpu_to_be32(pdn);
150 srq_context->db_rec_addr = cpu_to_be64(db_rec);
151
152 err = mlx4_SW2HW_SRQ(dev, mailbox, srq->srqn);
153 mlx4_free_cmd_mailbox(dev, mailbox);
154 if (err)
155 goto err_radix;
156
157 atomic_set(&srq->refcount, 1);
158 init_completion(&srq->free);
159
160 return 0;
161
162err_radix:
163 spin_lock_irq(&srq_table->lock);
164 radix_tree_delete(&srq_table->tree, srq->srqn);
165 spin_unlock_irq(&srq_table->lock);
166
167err_cmpt_put:
168 mlx4_table_put(dev, &srq_table->cmpt_table, srq->srqn);
169
170err_put:
171 mlx4_table_put(dev, &srq_table->table, srq->srqn);
172
173err_out:
174 mlx4_bitmap_free(&srq_table->bitmap, srq->srqn);
175
176 return err;
177}
178EXPORT_SYMBOL_GPL(mlx4_srq_alloc);
179
180void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq)
181{
182 struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
183 int err;
184
185 err = mlx4_HW2SW_SRQ(dev, NULL, srq->srqn);
186 if (err)
187 mlx4_warn(dev, "HW2SW_SRQ failed (%d) for SRQN %06x\n", err, srq->srqn);
188
189 spin_lock_irq(&srq_table->lock);
190 radix_tree_delete(&srq_table->tree, srq->srqn);
191 spin_unlock_irq(&srq_table->lock);
192
193 if (atomic_dec_and_test(&srq->refcount))
194 complete(&srq->free);
195 wait_for_completion(&srq->free);
196
197 mlx4_table_put(dev, &srq_table->table, srq->srqn);
198 mlx4_bitmap_free(&srq_table->bitmap, srq->srqn);
199}
200EXPORT_SYMBOL_GPL(mlx4_srq_free);
201
202int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark)
203{
204 return mlx4_ARM_SRQ(dev, srq->srqn, limit_watermark);
205}
206EXPORT_SYMBOL_GPL(mlx4_srq_arm);
207
208int __devinit mlx4_init_srq_table(struct mlx4_dev *dev)
209{
210 struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
211 int err;
212
213 spin_lock_init(&srq_table->lock);
214 INIT_RADIX_TREE(&srq_table->tree, GFP_ATOMIC);
215
216 err = mlx4_bitmap_init(&srq_table->bitmap, dev->caps.num_srqs,
217 dev->caps.num_srqs - 1, dev->caps.reserved_srqs);
218 if (err)
219 return err;
220
221 return 0;
222}
223
224void mlx4_cleanup_srq_table(struct mlx4_dev *dev)
225{
226 mlx4_bitmap_cleanup(&mlx4_priv(dev)->srq_table.bitmap);
227}