aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStefan Roscher <stefan.roscher at de.ibm.com>2007-07-20 10:04:17 -0400
committerRoland Dreier <rolandd@cisco.com>2007-07-21 00:19:47 -0400
commite2f81daf23efde23d8cac1fc253d41838f0347cf (patch)
tree310c493f007cc10625426118eaf217e50dd978f3
parent0c10f7b79b5bb07a37aa5927072abdc3f45ac8d3 (diff)
IB/ehca: Support small QP queues
eHCA2 supports QP queues that can be as small as 512 bytes. This greatly reduces memory overhead for consumers that use lots of QPs with small queues (e.g. RDMA-only QPs). Apart from dealing with firmware, this code needs to manage bite-sized chunks of kernel pages, making sure that no kernel page is shared between different protection domains. Signed-off-by: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h41
-rw-r--r--drivers/infiniband/hw/ehca/ehca_cq.c8
-rw-r--r--drivers/infiniband/hw/ehca/ehca_eq.c8
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c14
-rw-r--r--drivers/infiniband/hw/ehca/ehca_pd.c25
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c161
-rw-r--r--drivers/infiniband/hw/ehca/ehca_uverbs.c2
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.c30
-rw-r--r--drivers/infiniband/hw/ehca/ipz_pt_fn.c222
-rw-r--r--drivers/infiniband/hw/ehca/ipz_pt_fn.h26
10 files changed, 378 insertions, 159 deletions
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 63b8b9f7c4fc..3725aa8664d9 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -43,7 +43,6 @@
43#ifndef __EHCA_CLASSES_H__ 43#ifndef __EHCA_CLASSES_H__
44#define __EHCA_CLASSES_H__ 44#define __EHCA_CLASSES_H__
45 45
46
47struct ehca_module; 46struct ehca_module;
48struct ehca_qp; 47struct ehca_qp;
49struct ehca_cq; 48struct ehca_cq;
@@ -129,6 +128,10 @@ struct ehca_pd {
129 struct ib_pd ib_pd; 128 struct ib_pd ib_pd;
130 struct ipz_pd fw_pd; 129 struct ipz_pd fw_pd;
131 u32 ownpid; 130 u32 ownpid;
131 /* small queue mgmt */
132 struct mutex lock;
133 struct list_head free[2];
134 struct list_head full[2];
132}; 135};
133 136
134enum ehca_ext_qp_type { 137enum ehca_ext_qp_type {
@@ -307,6 +310,8 @@ int ehca_init_av_cache(void);
307void ehca_cleanup_av_cache(void); 310void ehca_cleanup_av_cache(void);
308int ehca_init_mrmw_cache(void); 311int ehca_init_mrmw_cache(void);
309void ehca_cleanup_mrmw_cache(void); 312void ehca_cleanup_mrmw_cache(void);
313int ehca_init_small_qp_cache(void);
314void ehca_cleanup_small_qp_cache(void);
310 315
311extern rwlock_t ehca_qp_idr_lock; 316extern rwlock_t ehca_qp_idr_lock;
312extern rwlock_t ehca_cq_idr_lock; 317extern rwlock_t ehca_cq_idr_lock;
@@ -324,7 +329,7 @@ struct ipzu_queue_resp {
324 u32 queue_length; /* queue length allocated in bytes */ 329 u32 queue_length; /* queue length allocated in bytes */
325 u32 pagesize; 330 u32 pagesize;
326 u32 toggle_state; 331 u32 toggle_state;
327 u32 dummy; /* padding for 8 byte alignment */ 332 u32 offset; /* save offset within a page for small_qp */
328}; 333};
329 334
330struct ehca_create_cq_resp { 335struct ehca_create_cq_resp {
@@ -366,15 +371,29 @@ enum ehca_ll_comp_flags {
366 LLQP_COMP_MASK = 0x60, 371 LLQP_COMP_MASK = 0x60,
367}; 372};
368 373
374struct ehca_alloc_queue_parms {
375 /* input parameters */
376 int max_wr;
377 int max_sge;
378 int page_size;
379 int is_small;
380
381 /* output parameters */
382 u16 act_nr_wqes;
383 u8 act_nr_sges;
384 u32 queue_size; /* bytes for small queues, pages otherwise */
385};
386
369struct ehca_alloc_qp_parms { 387struct ehca_alloc_qp_parms {
370/* input parameters */ 388 struct ehca_alloc_queue_parms squeue;
389 struct ehca_alloc_queue_parms rqueue;
390
391 /* input parameters */
371 enum ehca_service_type servicetype; 392 enum ehca_service_type servicetype;
393 int qp_storage;
372 int sigtype; 394 int sigtype;
373 enum ehca_ext_qp_type ext_type; 395 enum ehca_ext_qp_type ext_type;
374 enum ehca_ll_comp_flags ll_comp_flags; 396 enum ehca_ll_comp_flags ll_comp_flags;
375
376 int max_send_wr, max_recv_wr;
377 int max_send_sge, max_recv_sge;
378 int ud_av_l_key_ctl; 397 int ud_av_l_key_ctl;
379 398
380 u32 token; 399 u32 token;
@@ -384,18 +403,10 @@ struct ehca_alloc_qp_parms {
384 403
385 u32 srq_qpn, srq_token, srq_limit; 404 u32 srq_qpn, srq_token, srq_limit;
386 405
387/* output parameters */ 406 /* output parameters */
388 u32 real_qp_num; 407 u32 real_qp_num;
389 struct ipz_qp_handle qp_handle; 408 struct ipz_qp_handle qp_handle;
390 struct h_galpas galpas; 409 struct h_galpas galpas;
391
392 u16 act_nr_send_wqes;
393 u16 act_nr_recv_wqes;
394 u8 act_nr_recv_sges;
395 u8 act_nr_send_sges;
396
397 u32 nr_rq_pages;
398 u32 nr_sq_pages;
399}; 410};
400 411
401int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp); 412int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp);
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index 1e8ca3fca4aa..81aff36101ba 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -190,8 +190,8 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
190 goto create_cq_exit2; 190 goto create_cq_exit2;
191 } 191 }
192 192
193 ipz_rc = ipz_queue_ctor(&my_cq->ipz_queue, param.act_pages, 193 ipz_rc = ipz_queue_ctor(NULL, &my_cq->ipz_queue, param.act_pages,
194 EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0); 194 EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0, 0);
195 if (!ipz_rc) { 195 if (!ipz_rc) {
196 ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%x device=%p", 196 ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%x device=%p",
197 ipz_rc, device); 197 ipz_rc, device);
@@ -285,7 +285,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
285 return cq; 285 return cq;
286 286
287create_cq_exit4: 287create_cq_exit4:
288 ipz_queue_dtor(&my_cq->ipz_queue); 288 ipz_queue_dtor(NULL, &my_cq->ipz_queue);
289 289
290create_cq_exit3: 290create_cq_exit3:
291 h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); 291 h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
@@ -359,7 +359,7 @@ int ehca_destroy_cq(struct ib_cq *cq)
359 "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num); 359 "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num);
360 return ehca2ib_return_code(h_ret); 360 return ehca2ib_return_code(h_ret);
361 } 361 }
362 ipz_queue_dtor(&my_cq->ipz_queue); 362 ipz_queue_dtor(NULL, &my_cq->ipz_queue);
363 kmem_cache_free(cq_cache, my_cq); 363 kmem_cache_free(cq_cache, my_cq);
364 364
365 return 0; 365 return 0;
diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c
index 4825975f88cf..1d41faa7a337 100644
--- a/drivers/infiniband/hw/ehca/ehca_eq.c
+++ b/drivers/infiniband/hw/ehca/ehca_eq.c
@@ -86,8 +86,8 @@ int ehca_create_eq(struct ehca_shca *shca,
86 return -EINVAL; 86 return -EINVAL;
87 } 87 }
88 88
89 ret = ipz_queue_ctor(&eq->ipz_queue, nr_pages, 89 ret = ipz_queue_ctor(NULL, &eq->ipz_queue, nr_pages,
90 EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0); 90 EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0, 0);
91 if (!ret) { 91 if (!ret) {
92 ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq); 92 ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq);
93 goto create_eq_exit1; 93 goto create_eq_exit1;
@@ -145,7 +145,7 @@ int ehca_create_eq(struct ehca_shca *shca,
145 return 0; 145 return 0;
146 146
147create_eq_exit2: 147create_eq_exit2:
148 ipz_queue_dtor(&eq->ipz_queue); 148 ipz_queue_dtor(NULL, &eq->ipz_queue);
149 149
150create_eq_exit1: 150create_eq_exit1:
151 hipz_h_destroy_eq(shca->ipz_hca_handle, eq); 151 hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
@@ -181,7 +181,7 @@ int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq)
181 ehca_err(&shca->ib_device, "Can't free EQ resources."); 181 ehca_err(&shca->ib_device, "Can't free EQ resources.");
182 return -EINVAL; 182 return -EINVAL;
183 } 183 }
184 ipz_queue_dtor(&eq->ipz_queue); 184 ipz_queue_dtor(NULL, &eq->ipz_queue);
185 185
186 return 0; 186 return 0;
187} 187}
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index bb104b7f73e3..99036b65bb84 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -181,6 +181,12 @@ static int ehca_create_slab_caches(void)
181 goto create_slab_caches5; 181 goto create_slab_caches5;
182 } 182 }
183 183
184 ret = ehca_init_small_qp_cache();
185 if (ret) {
186 ehca_gen_err("Cannot create small queue SLAB cache.");
187 goto create_slab_caches6;
188 }
189
184#ifdef CONFIG_PPC_64K_PAGES 190#ifdef CONFIG_PPC_64K_PAGES
185 ctblk_cache = kmem_cache_create("ehca_cache_ctblk", 191 ctblk_cache = kmem_cache_create("ehca_cache_ctblk",
186 EHCA_PAGESIZE, H_CB_ALIGNMENT, 192 EHCA_PAGESIZE, H_CB_ALIGNMENT,
@@ -188,12 +194,15 @@ static int ehca_create_slab_caches(void)
188 NULL); 194 NULL);
189 if (!ctblk_cache) { 195 if (!ctblk_cache) {
190 ehca_gen_err("Cannot create ctblk SLAB cache."); 196 ehca_gen_err("Cannot create ctblk SLAB cache.");
191 ehca_cleanup_mrmw_cache(); 197 ehca_cleanup_small_qp_cache();
192 goto create_slab_caches5; 198 goto create_slab_caches6;
193 } 199 }
194#endif 200#endif
195 return 0; 201 return 0;
196 202
203create_slab_caches6:
204 ehca_cleanup_mrmw_cache();
205
197create_slab_caches5: 206create_slab_caches5:
198 ehca_cleanup_av_cache(); 207 ehca_cleanup_av_cache();
199 208
@@ -211,6 +220,7 @@ create_slab_caches2:
211 220
212static void ehca_destroy_slab_caches(void) 221static void ehca_destroy_slab_caches(void)
213{ 222{
223 ehca_cleanup_small_qp_cache();
214 ehca_cleanup_mrmw_cache(); 224 ehca_cleanup_mrmw_cache();
215 ehca_cleanup_av_cache(); 225 ehca_cleanup_av_cache();
216 ehca_cleanup_qp_cache(); 226 ehca_cleanup_qp_cache();
diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c
index c85312ad292b..3dafd7ff36cd 100644
--- a/drivers/infiniband/hw/ehca/ehca_pd.c
+++ b/drivers/infiniband/hw/ehca/ehca_pd.c
@@ -49,6 +49,7 @@ struct ib_pd *ehca_alloc_pd(struct ib_device *device,
49 struct ib_ucontext *context, struct ib_udata *udata) 49 struct ib_ucontext *context, struct ib_udata *udata)
50{ 50{
51 struct ehca_pd *pd; 51 struct ehca_pd *pd;
52 int i;
52 53
53 pd = kmem_cache_zalloc(pd_cache, GFP_KERNEL); 54 pd = kmem_cache_zalloc(pd_cache, GFP_KERNEL);
54 if (!pd) { 55 if (!pd) {
@@ -58,6 +59,11 @@ struct ib_pd *ehca_alloc_pd(struct ib_device *device,
58 } 59 }
59 60
60 pd->ownpid = current->tgid; 61 pd->ownpid = current->tgid;
62 for (i = 0; i < 2; i++) {
63 INIT_LIST_HEAD(&pd->free[i]);
64 INIT_LIST_HEAD(&pd->full[i]);
65 }
66 mutex_init(&pd->lock);
61 67
62 /* 68 /*
63 * Kernel PD: when device = -1, 0 69 * Kernel PD: when device = -1, 0
@@ -81,6 +87,9 @@ int ehca_dealloc_pd(struct ib_pd *pd)
81{ 87{
82 u32 cur_pid = current->tgid; 88 u32 cur_pid = current->tgid;
83 struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); 89 struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
90 int i, leftovers = 0;
91 extern struct kmem_cache *small_qp_cache;
92 struct ipz_small_queue_page *page, *tmp;
84 93
85 if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && 94 if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
86 my_pd->ownpid != cur_pid) { 95 my_pd->ownpid != cur_pid) {
@@ -89,8 +98,20 @@ int ehca_dealloc_pd(struct ib_pd *pd)
89 return -EINVAL; 98 return -EINVAL;
90 } 99 }
91 100
92 kmem_cache_free(pd_cache, 101 for (i = 0; i < 2; i++) {
93 container_of(pd, struct ehca_pd, ib_pd)); 102 list_splice(&my_pd->full[i], &my_pd->free[i]);
103 list_for_each_entry_safe(page, tmp, &my_pd->free[i], list) {
104 leftovers = 1;
105 free_page(page->page);
106 kmem_cache_free(small_qp_cache, page);
107 }
108 }
109
110 if (leftovers)
111 ehca_warn(pd->device,
112 "Some small queue pages were not freed");
113
114 kmem_cache_free(pd_cache, my_pd);
94 115
95 return 0; 116 return 0;
96} 117}
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 483f0ca1acc4..b178cba96345 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -275,34 +275,39 @@ static inline void queue2resp(struct ipzu_queue_resp *resp,
275 resp->toggle_state = queue->toggle_state; 275 resp->toggle_state = queue->toggle_state;
276} 276}
277 277
278static inline int ll_qp_msg_size(int nr_sge)
279{
280 return 128 << nr_sge;
281}
282
283/* 278/*
284 * init_qp_queue initializes/constructs r/squeue and registers queue pages. 279 * init_qp_queue initializes/constructs r/squeue and registers queue pages.
285 */ 280 */
286static inline int init_qp_queue(struct ehca_shca *shca, 281static inline int init_qp_queue(struct ehca_shca *shca,
282 struct ehca_pd *pd,
287 struct ehca_qp *my_qp, 283 struct ehca_qp *my_qp,
288 struct ipz_queue *queue, 284 struct ipz_queue *queue,
289 int q_type, 285 int q_type,
290 u64 expected_hret, 286 u64 expected_hret,
291 int nr_q_pages, 287 struct ehca_alloc_queue_parms *parms,
292 int wqe_size, 288 int wqe_size)
293 int nr_sges)
294{ 289{
295 int ret, cnt, ipz_rc; 290 int ret, cnt, ipz_rc, nr_q_pages;
296 void *vpage; 291 void *vpage;
297 u64 rpage, h_ret; 292 u64 rpage, h_ret;
298 struct ib_device *ib_dev = &shca->ib_device; 293 struct ib_device *ib_dev = &shca->ib_device;
299 struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle; 294 struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle;
300 295
301 if (!nr_q_pages) 296 if (!parms->queue_size)
302 return 0; 297 return 0;
303 298
304 ipz_rc = ipz_queue_ctor(queue, nr_q_pages, EHCA_PAGESIZE, 299 if (parms->is_small) {
305 wqe_size, nr_sges); 300 nr_q_pages = 1;
301 ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages,
302 128 << parms->page_size,
303 wqe_size, parms->act_nr_sges, 1);
304 } else {
305 nr_q_pages = parms->queue_size;
306 ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages,
307 EHCA_PAGESIZE, wqe_size,
308 parms->act_nr_sges, 0);
309 }
310
306 if (!ipz_rc) { 311 if (!ipz_rc) {
307 ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%x", 312 ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%x",
308 ipz_rc); 313 ipz_rc);
@@ -323,7 +328,7 @@ static inline int init_qp_queue(struct ehca_shca *shca,
323 h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, 328 h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
324 my_qp->ipz_qp_handle, 329 my_qp->ipz_qp_handle,
325 NULL, 0, q_type, 330 NULL, 0, q_type,
326 rpage, 1, 331 rpage, parms->is_small ? 0 : 1,
327 my_qp->galpas.kernel); 332 my_qp->galpas.kernel);
328 if (cnt == (nr_q_pages - 1)) { /* last page! */ 333 if (cnt == (nr_q_pages - 1)) { /* last page! */
329 if (h_ret != expected_hret) { 334 if (h_ret != expected_hret) {
@@ -354,10 +359,45 @@ static inline int init_qp_queue(struct ehca_shca *shca,
354 return 0; 359 return 0;
355 360
356init_qp_queue1: 361init_qp_queue1:
357 ipz_queue_dtor(queue); 362 ipz_queue_dtor(pd, queue);
358 return ret; 363 return ret;
359} 364}
360 365
366static inline int ehca_calc_wqe_size(int act_nr_sge, int is_llqp)
367{
368 if (is_llqp)
369 return 128 << act_nr_sge;
370 else
371 return offsetof(struct ehca_wqe,
372 u.nud.sg_list[act_nr_sge]);
373}
374
375static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue,
376 int req_nr_sge, int is_llqp)
377{
378 u32 wqe_size, q_size;
379 int act_nr_sge = req_nr_sge;
380
381 if (!is_llqp)
382 /* round up #SGEs so WQE size is a power of 2 */
383 for (act_nr_sge = 4; act_nr_sge <= 252;
384 act_nr_sge = 4 + 2 * act_nr_sge)
385 if (act_nr_sge >= req_nr_sge)
386 break;
387
388 wqe_size = ehca_calc_wqe_size(act_nr_sge, is_llqp);
389 q_size = wqe_size * (queue->max_wr + 1);
390
391 if (q_size <= 512)
392 queue->page_size = 2;
393 else if (q_size <= 1024)
394 queue->page_size = 3;
395 else
396 queue->page_size = 0;
397
398 queue->is_small = (queue->page_size != 0);
399}
400
361/* 401/*
362 * Create an ib_qp struct that is either a QP or an SRQ, depending on 402 * Create an ib_qp struct that is either a QP or an SRQ, depending on
363 * the value of the is_srq parameter. If init_attr and srq_init_attr share 403 * the value of the is_srq parameter. If init_attr and srq_init_attr share
@@ -553,10 +593,20 @@ static struct ehca_qp *internal_create_qp(
553 if (my_qp->recv_cq) 593 if (my_qp->recv_cq)
554 parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle; 594 parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle;
555 595
556 parms.max_send_wr = init_attr->cap.max_send_wr; 596 parms.squeue.max_wr = init_attr->cap.max_send_wr;
557 parms.max_recv_wr = init_attr->cap.max_recv_wr; 597 parms.rqueue.max_wr = init_attr->cap.max_recv_wr;
558 parms.max_send_sge = max_send_sge; 598 parms.squeue.max_sge = max_send_sge;
559 parms.max_recv_sge = max_recv_sge; 599 parms.rqueue.max_sge = max_recv_sge;
600
601 if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)
602 && !(context && udata)) { /* no small QP support in userspace ATM */
603 ehca_determine_small_queue(
604 &parms.squeue, max_send_sge, is_llqp);
605 ehca_determine_small_queue(
606 &parms.rqueue, max_recv_sge, is_llqp);
607 parms.qp_storage =
608 (parms.squeue.is_small || parms.rqueue.is_small);
609 }
560 610
561 h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms); 611 h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms);
562 if (h_ret != H_SUCCESS) { 612 if (h_ret != H_SUCCESS) {
@@ -570,50 +620,33 @@ static struct ehca_qp *internal_create_qp(
570 my_qp->ipz_qp_handle = parms.qp_handle; 620 my_qp->ipz_qp_handle = parms.qp_handle;
571 my_qp->galpas = parms.galpas; 621 my_qp->galpas = parms.galpas;
572 622
623 swqe_size = ehca_calc_wqe_size(parms.squeue.act_nr_sges, is_llqp);
624 rwqe_size = ehca_calc_wqe_size(parms.rqueue.act_nr_sges, is_llqp);
625
573 switch (qp_type) { 626 switch (qp_type) {
574 case IB_QPT_RC: 627 case IB_QPT_RC:
575 if (!is_llqp) { 628 if (is_llqp) {
576 swqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[ 629 parms.squeue.act_nr_sges = 1;
577 (parms.act_nr_send_sges)]); 630 parms.rqueue.act_nr_sges = 1;
578 rwqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[
579 (parms.act_nr_recv_sges)]);
580 } else { /* for LLQP we need to use msg size, not wqe size */
581 swqe_size = ll_qp_msg_size(max_send_sge);
582 rwqe_size = ll_qp_msg_size(max_recv_sge);
583 parms.act_nr_send_sges = 1;
584 parms.act_nr_recv_sges = 1;
585 } 631 }
586 break; 632 break;
587 case IB_QPT_UC:
588 swqe_size = offsetof(struct ehca_wqe,
589 u.nud.sg_list[parms.act_nr_send_sges]);
590 rwqe_size = offsetof(struct ehca_wqe,
591 u.nud.sg_list[parms.act_nr_recv_sges]);
592 break;
593
594 case IB_QPT_UD: 633 case IB_QPT_UD:
595 case IB_QPT_GSI: 634 case IB_QPT_GSI:
596 case IB_QPT_SMI: 635 case IB_QPT_SMI:
636 /* UD circumvention */
597 if (is_llqp) { 637 if (is_llqp) {
598 swqe_size = ll_qp_msg_size(parms.act_nr_send_sges); 638 parms.squeue.act_nr_sges = 1;
599 rwqe_size = ll_qp_msg_size(parms.act_nr_recv_sges); 639 parms.rqueue.act_nr_sges = 1;
600 parms.act_nr_send_sges = 1;
601 parms.act_nr_recv_sges = 1;
602 } else { 640 } else {
603 /* UD circumvention */ 641 parms.squeue.act_nr_sges -= 2;
604 parms.act_nr_send_sges -= 2; 642 parms.rqueue.act_nr_sges -= 2;
605 parms.act_nr_recv_sges -= 2;
606 swqe_size = offsetof(struct ehca_wqe, u.ud_av.sg_list[
607 parms.act_nr_send_sges]);
608 rwqe_size = offsetof(struct ehca_wqe, u.ud_av.sg_list[
609 parms.act_nr_recv_sges]);
610 } 643 }
611 644
612 if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) { 645 if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) {
613 parms.act_nr_send_wqes = init_attr->cap.max_send_wr; 646 parms.squeue.act_nr_wqes = init_attr->cap.max_send_wr;
614 parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr; 647 parms.rqueue.act_nr_wqes = init_attr->cap.max_recv_wr;
615 parms.act_nr_send_sges = init_attr->cap.max_send_sge; 648 parms.squeue.act_nr_sges = init_attr->cap.max_send_sge;
616 parms.act_nr_recv_sges = init_attr->cap.max_recv_sge; 649 parms.rqueue.act_nr_sges = init_attr->cap.max_recv_sge;
617 ib_qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1; 650 ib_qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1;
618 } 651 }
619 652
@@ -626,10 +659,9 @@ static struct ehca_qp *internal_create_qp(
626 /* initialize r/squeue and register queue pages */ 659 /* initialize r/squeue and register queue pages */
627 if (HAS_SQ(my_qp)) { 660 if (HAS_SQ(my_qp)) {
628 ret = init_qp_queue( 661 ret = init_qp_queue(
629 shca, my_qp, &my_qp->ipz_squeue, 0, 662 shca, my_pd, my_qp, &my_qp->ipz_squeue, 0,
630 HAS_RQ(my_qp) ? H_PAGE_REGISTERED : H_SUCCESS, 663 HAS_RQ(my_qp) ? H_PAGE_REGISTERED : H_SUCCESS,
631 parms.nr_sq_pages, swqe_size, 664 &parms.squeue, swqe_size);
632 parms.act_nr_send_sges);
633 if (ret) { 665 if (ret) {
634 ehca_err(pd->device, "Couldn't initialize squeue " 666 ehca_err(pd->device, "Couldn't initialize squeue "
635 "and pages ret=%x", ret); 667 "and pages ret=%x", ret);
@@ -639,9 +671,8 @@ static struct ehca_qp *internal_create_qp(
639 671
640 if (HAS_RQ(my_qp)) { 672 if (HAS_RQ(my_qp)) {
641 ret = init_qp_queue( 673 ret = init_qp_queue(
642 shca, my_qp, &my_qp->ipz_rqueue, 1, 674 shca, my_pd, my_qp, &my_qp->ipz_rqueue, 1,
643 H_SUCCESS, parms.nr_rq_pages, rwqe_size, 675 H_SUCCESS, &parms.rqueue, rwqe_size);
644 parms.act_nr_recv_sges);
645 if (ret) { 676 if (ret) {
646 ehca_err(pd->device, "Couldn't initialize rqueue " 677 ehca_err(pd->device, "Couldn't initialize rqueue "
647 "and pages ret=%x", ret); 678 "and pages ret=%x", ret);
@@ -671,10 +702,10 @@ static struct ehca_qp *internal_create_qp(
671 } 702 }
672 703
673 init_attr->cap.max_inline_data = 0; /* not supported yet */ 704 init_attr->cap.max_inline_data = 0; /* not supported yet */
674 init_attr->cap.max_recv_sge = parms.act_nr_recv_sges; 705 init_attr->cap.max_recv_sge = parms.rqueue.act_nr_sges;
675 init_attr->cap.max_recv_wr = parms.act_nr_recv_wqes; 706 init_attr->cap.max_recv_wr = parms.rqueue.act_nr_wqes;
676 init_attr->cap.max_send_sge = parms.act_nr_send_sges; 707 init_attr->cap.max_send_sge = parms.squeue.act_nr_sges;
677 init_attr->cap.max_send_wr = parms.act_nr_send_wqes; 708 init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes;
678 my_qp->init_attr = *init_attr; 709 my_qp->init_attr = *init_attr;
679 710
680 /* NOTE: define_apq0() not supported yet */ 711 /* NOTE: define_apq0() not supported yet */
@@ -708,6 +739,8 @@ static struct ehca_qp *internal_create_qp(
708 resp.ext_type = my_qp->ext_type; 739 resp.ext_type = my_qp->ext_type;
709 resp.qkey = my_qp->qkey; 740 resp.qkey = my_qp->qkey;
710 resp.real_qp_num = my_qp->real_qp_num; 741 resp.real_qp_num = my_qp->real_qp_num;
742 resp.ipz_rqueue.offset = my_qp->ipz_rqueue.offset;
743 resp.ipz_squeue.offset = my_qp->ipz_squeue.offset;
711 if (HAS_SQ(my_qp)) 744 if (HAS_SQ(my_qp))
712 queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue); 745 queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue);
713 if (HAS_RQ(my_qp)) 746 if (HAS_RQ(my_qp))
@@ -724,11 +757,11 @@ static struct ehca_qp *internal_create_qp(
724 757
725create_qp_exit4: 758create_qp_exit4:
726 if (HAS_RQ(my_qp)) 759 if (HAS_RQ(my_qp))
727 ipz_queue_dtor(&my_qp->ipz_rqueue); 760 ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
728 761
729create_qp_exit3: 762create_qp_exit3:
730 if (HAS_SQ(my_qp)) 763 if (HAS_SQ(my_qp))
731 ipz_queue_dtor(&my_qp->ipz_squeue); 764 ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
732 765
733create_qp_exit2: 766create_qp_exit2:
734 hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); 767 hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
@@ -1735,9 +1768,9 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
1735 } 1768 }
1736 1769
1737 if (HAS_RQ(my_qp)) 1770 if (HAS_RQ(my_qp))
1738 ipz_queue_dtor(&my_qp->ipz_rqueue); 1771 ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
1739 if (HAS_SQ(my_qp)) 1772 if (HAS_SQ(my_qp))
1740 ipz_queue_dtor(&my_qp->ipz_squeue); 1773 ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
1741 kmem_cache_free(qp_cache, my_qp); 1774 kmem_cache_free(qp_cache, my_qp);
1742 return 0; 1775 return 0;
1743} 1776}
diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c
index 05c415744e3b..4bc687fdf531 100644
--- a/drivers/infiniband/hw/ehca/ehca_uverbs.c
+++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c
@@ -149,7 +149,7 @@ static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue,
149 ehca_gen_err("vm_insert_page() failed rc=%x", ret); 149 ehca_gen_err("vm_insert_page() failed rc=%x", ret);
150 return ret; 150 return ret;
151 } 151 }
152 start += PAGE_SIZE; 152 start += PAGE_SIZE;
153 } 153 }
154 vma->vm_private_data = mm_count; 154 vma->vm_private_data = mm_count;
155 (*mm_count)++; 155 (*mm_count)++;
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
index 358796ccf008..fdbfebea7d11 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.c
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -52,10 +52,13 @@
52#define H_ALL_RES_QP_ENHANCED_OPS EHCA_BMASK_IBM(9, 11) 52#define H_ALL_RES_QP_ENHANCED_OPS EHCA_BMASK_IBM(9, 11)
53#define H_ALL_RES_QP_PTE_PIN EHCA_BMASK_IBM(12, 12) 53#define H_ALL_RES_QP_PTE_PIN EHCA_BMASK_IBM(12, 12)
54#define H_ALL_RES_QP_SERVICE_TYPE EHCA_BMASK_IBM(13, 15) 54#define H_ALL_RES_QP_SERVICE_TYPE EHCA_BMASK_IBM(13, 15)
55#define H_ALL_RES_QP_STORAGE EHCA_BMASK_IBM(16, 17)
55#define H_ALL_RES_QP_LL_RQ_CQE_POSTING EHCA_BMASK_IBM(18, 18) 56#define H_ALL_RES_QP_LL_RQ_CQE_POSTING EHCA_BMASK_IBM(18, 18)
56#define H_ALL_RES_QP_LL_SQ_CQE_POSTING EHCA_BMASK_IBM(19, 21) 57#define H_ALL_RES_QP_LL_SQ_CQE_POSTING EHCA_BMASK_IBM(19, 21)
57#define H_ALL_RES_QP_SIGNALING_TYPE EHCA_BMASK_IBM(22, 23) 58#define H_ALL_RES_QP_SIGNALING_TYPE EHCA_BMASK_IBM(22, 23)
58#define H_ALL_RES_QP_UD_AV_LKEY_CTRL EHCA_BMASK_IBM(31, 31) 59#define H_ALL_RES_QP_UD_AV_LKEY_CTRL EHCA_BMASK_IBM(31, 31)
60#define H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE EHCA_BMASK_IBM(32, 35)
61#define H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE EHCA_BMASK_IBM(36, 39)
59#define H_ALL_RES_QP_RESOURCE_TYPE EHCA_BMASK_IBM(56, 63) 62#define H_ALL_RES_QP_RESOURCE_TYPE EHCA_BMASK_IBM(56, 63)
60 63
61#define H_ALL_RES_QP_MAX_OUTST_SEND_WR EHCA_BMASK_IBM(0, 15) 64#define H_ALL_RES_QP_MAX_OUTST_SEND_WR EHCA_BMASK_IBM(0, 15)
@@ -299,6 +302,11 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
299 | EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0) 302 | EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0)
300 | EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype) 303 | EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype)
301 | EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype) 304 | EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype)
305 | EHCA_BMASK_SET(H_ALL_RES_QP_STORAGE, parms->qp_storage)
306 | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE,
307 parms->squeue.page_size)
308 | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE,
309 parms->rqueue.page_size)
302 | EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING, 310 | EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING,
303 !!(parms->ll_comp_flags & LLQP_RECV_COMP)) 311 !!(parms->ll_comp_flags & LLQP_RECV_COMP))
304 | EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING, 312 | EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING,
@@ -309,13 +317,13 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
309 317
310 max_r10_reg = 318 max_r10_reg =
311 EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR, 319 EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR,
312 parms->max_send_wr + 1) 320 parms->squeue.max_wr + 1)
313 | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR, 321 | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR,
314 parms->max_recv_wr + 1) 322 parms->rqueue.max_wr + 1)
315 | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE, 323 | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE,
316 parms->max_send_sge) 324 parms->squeue.max_sge)
317 | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE, 325 | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE,
318 parms->max_recv_sge); 326 parms->rqueue.max_sge);
319 327
320 r11 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QP_TOKEN, parms->srq_token); 328 r11 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QP_TOKEN, parms->srq_token);
321 329
@@ -335,17 +343,17 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
335 343
336 parms->qp_handle.handle = outs[0]; 344 parms->qp_handle.handle = outs[0];
337 parms->real_qp_num = (u32)outs[1]; 345 parms->real_qp_num = (u32)outs[1];
338 parms->act_nr_send_wqes = 346 parms->squeue.act_nr_wqes =
339 (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]); 347 (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]);
340 parms->act_nr_recv_wqes = 348 parms->rqueue.act_nr_wqes =
341 (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]); 349 (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]);
342 parms->act_nr_send_sges = 350 parms->squeue.act_nr_sges =
343 (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]); 351 (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]);
344 parms->act_nr_recv_sges = 352 parms->rqueue.act_nr_sges =
345 (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]); 353 (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]);
346 parms->nr_sq_pages = 354 parms->squeue.queue_size =
347 (u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]); 355 (u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]);
348 parms->nr_rq_pages = 356 parms->rqueue.queue_size =
349 (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]); 357 (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]);
350 358
351 if (ret == H_SUCCESS) 359 if (ret == H_SUCCESS)
@@ -497,7 +505,7 @@ u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
497 const u64 count, 505 const u64 count,
498 const struct h_galpa galpa) 506 const struct h_galpa galpa)
499{ 507{
500 if (count != 1) { 508 if (count > 1) {
501 ehca_gen_err("Page counter=%lx", count); 509 ehca_gen_err("Page counter=%lx", count);
502 return H_PARAMETER; 510 return H_PARAMETER;
503 } 511 }
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
index 9606f13ed092..a090c679c397 100644
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
@@ -40,6 +40,11 @@
40 40
41#include "ehca_tools.h" 41#include "ehca_tools.h"
42#include "ipz_pt_fn.h" 42#include "ipz_pt_fn.h"
43#include "ehca_classes.h"
44
45#define PAGES_PER_KPAGE (PAGE_SIZE >> EHCA_PAGESHIFT)
46
47struct kmem_cache *small_qp_cache;
43 48
44void *ipz_qpageit_get_inc(struct ipz_queue *queue) 49void *ipz_qpageit_get_inc(struct ipz_queue *queue)
45{ 50{
@@ -49,7 +54,7 @@ void *ipz_qpageit_get_inc(struct ipz_queue *queue)
49 queue->current_q_offset -= queue->pagesize; 54 queue->current_q_offset -= queue->pagesize;
50 ret = NULL; 55 ret = NULL;
51 } 56 }
52 if (((u64)ret) % EHCA_PAGESIZE) { 57 if (((u64)ret) % queue->pagesize) {
53 ehca_gen_err("ERROR!! not at PAGE-Boundary"); 58 ehca_gen_err("ERROR!! not at PAGE-Boundary");
54 return NULL; 59 return NULL;
55 } 60 }
@@ -83,80 +88,195 @@ int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset)
83 return -EINVAL; 88 return -EINVAL;
84} 89}
85 90
86int ipz_queue_ctor(struct ipz_queue *queue, 91#if PAGE_SHIFT < EHCA_PAGESHIFT
87 const u32 nr_of_pages, 92#error Kernel pages must be at least as large than eHCA pages (4K) !
88 const u32 pagesize, const u32 qe_size, const u32 nr_of_sg) 93#endif
94
95/*
96 * allocate pages for queue:
97 * outer loop allocates whole kernel pages (page aligned) and
98 * inner loop divides a kernel page into smaller hca queue pages
99 */
100static int alloc_queue_pages(struct ipz_queue *queue, const u32 nr_of_pages)
89{ 101{
90 int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT; 102 int k, f = 0;
91 int f; 103 u8 *kpage;
92 104
93 if (pagesize > PAGE_SIZE) {
94 ehca_gen_err("FATAL ERROR: pagesize=%x is greater "
95 "than kernel page size", pagesize);
96 return 0;
97 }
98 if (!pages_per_kpage) {
99 ehca_gen_err("FATAL ERROR: invalid kernel page size. "
100 "pages_per_kpage=%x", pages_per_kpage);
101 return 0;
102 }
103 queue->queue_length = nr_of_pages * pagesize;
104 queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *));
105 if (!queue->queue_pages) {
106 ehca_gen_err("ERROR!! didn't get the memory");
107 return 0;
108 }
109 memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *));
110 /*
111 * allocate pages for queue:
112 * outer loop allocates whole kernel pages (page aligned) and
113 * inner loop divides a kernel page into smaller hca queue pages
114 */
115 f = 0;
116 while (f < nr_of_pages) { 105 while (f < nr_of_pages) {
117 u8 *kpage = (u8 *)get_zeroed_page(GFP_KERNEL); 106 kpage = (u8 *)get_zeroed_page(GFP_KERNEL);
118 int k;
119 if (!kpage) 107 if (!kpage)
120 goto ipz_queue_ctor_exit0; /*NOMEM*/ 108 goto out;
121 for (k = 0; k < pages_per_kpage && f < nr_of_pages; k++) { 109
122 (queue->queue_pages)[f] = (struct ipz_page *)kpage; 110 for (k = 0; k < PAGES_PER_KPAGE && f < nr_of_pages; k++) {
111 queue->queue_pages[f] = (struct ipz_page *)kpage;
123 kpage += EHCA_PAGESIZE; 112 kpage += EHCA_PAGESIZE;
124 f++; 113 f++;
125 } 114 }
126 } 115 }
116 return 1;
127 117
128 queue->current_q_offset = 0; 118out:
119 for (f = 0; f < nr_of_pages && queue->queue_pages[f];
120 f += PAGES_PER_KPAGE)
121 free_page((unsigned long)(queue->queue_pages)[f]);
122 return 0;
123}
124
125static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd)
126{
127 int order = ilog2(queue->pagesize) - 9;
128 struct ipz_small_queue_page *page;
129 unsigned long bit;
130
131 mutex_lock(&pd->lock);
132
133 if (!list_empty(&pd->free[order]))
134 page = list_entry(pd->free[order].next,
135 struct ipz_small_queue_page, list);
136 else {
137 page = kmem_cache_zalloc(small_qp_cache, GFP_KERNEL);
138 if (!page)
139 goto out;
140
141 page->page = get_zeroed_page(GFP_KERNEL);
142 if (!page->page) {
143 kmem_cache_free(small_qp_cache, page);
144 goto out;
145 }
146
147 list_add(&page->list, &pd->free[order]);
148 }
149
150 bit = find_first_zero_bit(page->bitmap, IPZ_SPAGE_PER_KPAGE >> order);
151 __set_bit(bit, page->bitmap);
152 page->fill++;
153
154 if (page->fill == IPZ_SPAGE_PER_KPAGE >> order)
155 list_move(&page->list, &pd->full[order]);
156
157 mutex_unlock(&pd->lock);
158
159 queue->queue_pages[0] = (void *)(page->page | (bit << (order + 9)));
160 queue->small_page = page;
161 return 1;
162
163out:
164 ehca_err(pd->ib_pd.device, "failed to allocate small queue page");
165 return 0;
166}
167
168static void free_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd)
169{
170 int order = ilog2(queue->pagesize) - 9;
171 struct ipz_small_queue_page *page = queue->small_page;
172 unsigned long bit;
173 int free_page = 0;
174
175 bit = ((unsigned long)queue->queue_pages[0] & PAGE_MASK)
176 >> (order + 9);
177
178 mutex_lock(&pd->lock);
179
180 __clear_bit(bit, page->bitmap);
181 page->fill--;
182
183 if (page->fill == 0) {
184 list_del(&page->list);
185 free_page = 1;
186 }
187
188 if (page->fill == (IPZ_SPAGE_PER_KPAGE >> order) - 1)
189 /* the page was full until we freed the chunk */
190 list_move_tail(&page->list, &pd->free[order]);
191
192 mutex_unlock(&pd->lock);
193
194 if (free_page) {
195 free_page(page->page);
196 kmem_cache_free(small_qp_cache, page);
197 }
198}
199
200int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue,
201 const u32 nr_of_pages, const u32 pagesize,
202 const u32 qe_size, const u32 nr_of_sg,
203 int is_small)
204{
205 if (pagesize > PAGE_SIZE) {
206 ehca_gen_err("FATAL ERROR: pagesize=%x "
207 "is greater than kernel page size", pagesize);
208 return 0;
209 }
210
211 /* init queue fields */
212 queue->queue_length = nr_of_pages * pagesize;
213 queue->pagesize = pagesize;
129 queue->qe_size = qe_size; 214 queue->qe_size = qe_size;
130 queue->act_nr_of_sg = nr_of_sg; 215 queue->act_nr_of_sg = nr_of_sg;
131 queue->pagesize = pagesize; 216 queue->current_q_offset = 0;
132 queue->toggle_state = 1; 217 queue->toggle_state = 1;
133 return 1; 218 queue->small_page = NULL;
134 219
135 ipz_queue_ctor_exit0: 220 /* allocate queue page pointers */
136 ehca_gen_err("Couldn't get alloc pages queue=%p f=%x nr_of_pages=%x", 221 queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *));
137 queue, f, nr_of_pages); 222 if (!queue->queue_pages) {
138 for (f = 0; f < nr_of_pages; f += pages_per_kpage) { 223 ehca_gen_err("Couldn't allocate queue page list");
139 if (!(queue->queue_pages)[f]) 224 return 0;
140 break;
141 free_page((unsigned long)(queue->queue_pages)[f]);
142 } 225 }
226 memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *));
227
228 /* allocate actual queue pages */
229 if (is_small) {
230 if (!alloc_small_queue_page(queue, pd))
231 goto ipz_queue_ctor_exit0;
232 } else
233 if (!alloc_queue_pages(queue, nr_of_pages))
234 goto ipz_queue_ctor_exit0;
235
236 return 1;
237
238ipz_queue_ctor_exit0:
239 ehca_gen_err("Couldn't alloc pages queue=%p "
240 "nr_of_pages=%x", queue, nr_of_pages);
241 vfree(queue->queue_pages);
242
143 return 0; 243 return 0;
144} 244}
145 245
146int ipz_queue_dtor(struct ipz_queue *queue) 246int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue)
147{ 247{
148 int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT; 248 int i, nr_pages;
149 int g;
150 int nr_pages;
151 249
152 if (!queue || !queue->queue_pages) { 250 if (!queue || !queue->queue_pages) {
153 ehca_gen_dbg("queue or queue_pages is NULL"); 251 ehca_gen_dbg("queue or queue_pages is NULL");
154 return 0; 252 return 0;
155 } 253 }
156 nr_pages = queue->queue_length / queue->pagesize; 254
157 for (g = 0; g < nr_pages; g += pages_per_kpage) 255 if (queue->small_page)
158 free_page((unsigned long)(queue->queue_pages)[g]); 256 free_small_queue_page(queue, pd);
257 else {
258 nr_pages = queue->queue_length / queue->pagesize;
259 for (i = 0; i < nr_pages; i += PAGES_PER_KPAGE)
260 free_page((unsigned long)queue->queue_pages[i]);
261 }
262
159 vfree(queue->queue_pages); 263 vfree(queue->queue_pages);
160 264
161 return 1; 265 return 1;
162} 266}
267
268int ehca_init_small_qp_cache(void)
269{
270 small_qp_cache = kmem_cache_create("ehca_cache_small_qp",
271 sizeof(struct ipz_small_queue_page),
272 0, SLAB_HWCACHE_ALIGN, NULL);
273 if (!small_qp_cache)
274 return -ENOMEM;
275
276 return 0;
277}
278
279void ehca_cleanup_small_qp_cache(void)
280{
281 kmem_cache_destroy(small_qp_cache);
282}
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
index 39a4f64aff41..c6937a044e8a 100644
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
@@ -51,11 +51,25 @@
51#include "ehca_tools.h" 51#include "ehca_tools.h"
52#include "ehca_qes.h" 52#include "ehca_qes.h"
53 53
54struct ehca_pd;
55struct ipz_small_queue_page;
56
54/* struct generic ehca page */ 57/* struct generic ehca page */
55struct ipz_page { 58struct ipz_page {
56 u8 entries[EHCA_PAGESIZE]; 59 u8 entries[EHCA_PAGESIZE];
57}; 60};
58 61
62#define IPZ_SPAGE_PER_KPAGE (PAGE_SIZE / 512)
63
64struct ipz_small_queue_page {
65 unsigned long page;
66 unsigned long bitmap[IPZ_SPAGE_PER_KPAGE / BITS_PER_LONG];
67 int fill;
68 void *mapped_addr;
69 u32 mmap_count;
70 struct list_head list;
71};
72
59/* struct generic queue in linux kernel virtual memory (kv) */ 73/* struct generic queue in linux kernel virtual memory (kv) */
60struct ipz_queue { 74struct ipz_queue {
61 u64 current_q_offset; /* current queue entry */ 75 u64 current_q_offset; /* current queue entry */
@@ -66,7 +80,8 @@ struct ipz_queue {
66 u32 queue_length; /* queue length allocated in bytes */ 80 u32 queue_length; /* queue length allocated in bytes */
67 u32 pagesize; 81 u32 pagesize;
68 u32 toggle_state; /* toggle flag - per page */ 82 u32 toggle_state; /* toggle flag - per page */
69 u32 dummy3; /* 64 bit alignment */ 83 u32 offset; /* save offset within page for small_qp */
84 struct ipz_small_queue_page *small_page;
70}; 85};
71 86
72/* 87/*
@@ -188,9 +203,10 @@ struct ipz_qpt {
188 * see ipz_qpt_ctor() 203 * see ipz_qpt_ctor()
189 * returns true if ok, false if out of memory 204 * returns true if ok, false if out of memory
190 */ 205 */
191int ipz_queue_ctor(struct ipz_queue *queue, const u32 nr_of_pages, 206int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue,
192 const u32 pagesize, const u32 qe_size, 207 const u32 nr_of_pages, const u32 pagesize,
193 const u32 nr_of_sg); 208 const u32 qe_size, const u32 nr_of_sg,
209 int is_small);
194 210
195/* 211/*
196 * destructor for a ipz_queue_t 212 * destructor for a ipz_queue_t
@@ -198,7 +214,7 @@ int ipz_queue_ctor(struct ipz_queue *queue, const u32 nr_of_pages,
198 * see ipz_queue_ctor() 214 * see ipz_queue_ctor()
199 * returns true if ok, false if queue was NULL-ptr of free failed 215 * returns true if ok, false if queue was NULL-ptr of free failed
200 */ 216 */
201int ipz_queue_dtor(struct ipz_queue *queue); 217int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue);
202 218
203/* 219/*
204 * constructor for a ipz_qpt_t, 220 * constructor for a ipz_qpt_t,