c5ed497bd124f4bba805f39da3d2df402e47fd4f
[mirror/winof/.git] / hw / mthca / kernel / mthca_provider.c
1 /* 
2  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4  * Copyright (c) 2005 Cisco Systems. All rights reserved.
5  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
6  * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
7  *
8  * This software is available to you under a choice of one of two
9  * licenses.  You may choose to be licensed under the terms of the GNU
10  * General Public License (GPL) Version 2, available from the file
11  * COPYING in the main directory of this source tree, or the
12  * OpenIB.org BSD license below:
13  *
14  *     Redistribution and use in source and binary forms, with or
15  *     without modification, are permitted provided that the following
16  *     conditions are met:
17  *
18  *      - Redistributions of source code must retain the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer.
21  *
22  *      - Redistributions in binary form must reproduce the above
23  *        copyright notice, this list of conditions and the following
24  *        disclaimer in the documentation and/or other materials
25  *        provided with the distribution.
26  *
27  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34  * SOFTWARE.
35  *
36  * $Id: mthca_provider.c 3047 2005-08-10 03:59:35Z roland $
37  */
38
39 #include <ib_smi.h>
40
41 #include "mx_abi.h"
42 #include "mthca_dev.h"
43
44 #if defined(EVENT_TRACING)
45 #ifdef offsetof
46 #undef offsetof
47 #endif
48 #include "mthca_provider.tmh"
49 #endif
50 #include "mthca_cmd.h"
51 #include "mthca_memfree.h"
52
53  void ibv_umem_release(struct ib_device *dev, struct ib_umem *umem);
54  int ibv_umem_get(struct ib_device *dev, struct ib_umem *mem,
55                  void *addr, size_t size, int write);
56  
57  static void init_query_mad(struct ib_smp *mad)
58  {
59          mad->base_version      = 1;
60          mad->mgmt_class                = IB_MGMT_CLASS_SUBN_LID_ROUTED;
61          mad->class_version = 1;
62          mad->method                            = IB_MGMT_METHOD_GET;
63  }
64
65  int mthca_query_device(struct ib_device *ibdev,
66                               struct ib_device_attr *props)
67 {
68         struct ib_smp *in_mad  = NULL;
69         struct ib_smp *out_mad = NULL;
70         int err = -ENOMEM;
71         struct mthca_dev* mdev = to_mdev(ibdev);
72
73         u8 status;
74
75         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
76         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
77         if (!in_mad || !out_mad)
78                 goto out;
79
80         init_query_mad(in_mad);
81         in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
82
83         err = mthca_MAD_IFC(mdev, 1, 1,
84             1, NULL, NULL, in_mad, out_mad, &status);
85         if (err)
86                 goto out;
87         if (status) {
88                 err = -EINVAL;
89                 goto out;
90         }
91
92         RtlZeroMemory(props, sizeof *props);
93         props->fw_ver              = mdev->fw_ver;
94         props->device_cap_flags    = mdev->device_cap_flags;
95         props->vendor_id           = cl_ntoh32(*(__be32 *) (out_mad->data + 36)) &
96                 0xffffff;
97         props->vendor_part_id      = cl_ntoh16(*(__be16 *) (out_mad->data + 30));
98         props->hw_ver              = cl_ntoh32(*(__be32 *) (out_mad->data + 32));
99         memcpy(&props->sys_image_guid, out_mad->data +  4, 8);
100
101         props->max_mr_size         = ~0ull;
102         props->page_size_cap       = mdev->limits.page_size_cap;
103         props->max_qp              = mdev->limits.num_qps - mdev->limits.reserved_qps;
104         props->max_qp_wr           = mdev->limits.max_wqes;
105         props->max_sge             = mdev->limits.max_sg;
106         props->max_cq              = mdev->limits.num_cqs - mdev->limits.reserved_cqs;
107         props->max_cqe             = mdev->limits.max_cqes;
108         props->max_mr              = mdev->limits.num_mpts - mdev->limits.reserved_mrws;
109         props->max_pd              = mdev->limits.num_pds - mdev->limits.reserved_pds;
110         props->max_qp_rd_atom      = 1 << mdev->qp_table.rdb_shift;
111         props->max_qp_init_rd_atom = mdev->limits.max_qp_init_rdma;
112         props->max_res_rd_atom     = props->max_qp_rd_atom * props->max_qp;
113         props->max_srq             = mdev->limits.num_srqs - mdev->limits.reserved_srqs;
114         props->max_srq_wr          = mdev->limits.max_srq_wqes;
115         props->max_srq_sge         = mdev->limits.max_sg;
116         props->local_ca_ack_delay  = (u8)mdev->limits.local_ca_ack_delay;
117         props->atomic_cap          = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ? 
118                                         IB_ATOMIC_LOCAL : IB_ATOMIC_NONE;
119         props->max_pkeys           = (u16)mdev->limits.pkey_table_len;
120         props->max_mcast_grp       = mdev->limits.num_mgms + mdev->limits.num_amgms;
121         props->max_mcast_qp_attach = MTHCA_QP_PER_MGM;
122         props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * 
123                                            props->max_mcast_grp;
124
125         err = 0;
126  out:
127         kfree(in_mad);
128         kfree(out_mad);
129         return err;
130 }
131
132 int mthca_query_port(struct ib_device *ibdev,
133                             u8 port, struct ib_port_attr *props)
134 {
135         struct ib_smp *in_mad  = NULL;
136         struct ib_smp *out_mad = NULL;
137         int err = -ENOMEM;
138         u8 status;
139
140         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
141         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
142         if (!in_mad || !out_mad)
143                 goto out;
144
145         init_query_mad(in_mad);
146         in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
147         in_mad->attr_mod = cl_hton32(port);
148
149         err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
150                             port, NULL, NULL, in_mad, out_mad,
151                             &status);
152         if (err)
153                 goto out;
154         if (status) {
155                 err = -EINVAL;
156                 goto out;
157         }
158
159         RtlZeroMemory(props, sizeof *props);
160         props->lid               = cl_ntoh16(*(__be16 *) (out_mad->data + 16));
161         props->lmc               = out_mad->data[34] & 0x7;
162         props->sm_lid            = cl_ntoh16(*(__be16 *) (out_mad->data + 18));
163         props->sm_sl             = out_mad->data[36] & 0xf;
164         props->state             = out_mad->data[32] & 0xf;
165         props->phys_state        = out_mad->data[33] >> 4;
166         props->port_cap_flags    = cl_ntoh32(*(__be32 *) (out_mad->data + 20));
167         props->gid_tbl_len       = to_mdev(ibdev)->limits.gid_table_len;
168         props->max_msg_sz        = 0x80000000;
169         props->pkey_tbl_len      = (u16)to_mdev(ibdev)->limits.pkey_table_len;
170         props->bad_pkey_cntr     = cl_ntoh16(*(__be16 *) (out_mad->data + 46));
171         props->qkey_viol_cntr    = cl_ntoh16(*(__be16 *) (out_mad->data + 48));
172         props->active_width      = out_mad->data[31] & 0xf;
173         props->active_speed      = out_mad->data[35] >> 4;
174         props->max_mtu           = out_mad->data[41] & 0xf;
175         props->active_mtu        = out_mad->data[36] >> 4;
176         props->subnet_timeout    = out_mad->data[51] & 0x1f;
177
178  out:
179         kfree(in_mad);
180         kfree(out_mad);
181         return err;
182 }
183
184 int mthca_modify_port(struct ib_device *ibdev,
185                              u8 port, int port_modify_mask,
186                              struct ib_port_modify *props)
187 {
188         struct mthca_set_ib_param set_ib;
189         struct ib_port_attr attr;
190         int err;
191         u8 status;
192
193         if (down_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
194                 return -EFAULT;
195
196         err = mthca_query_port(ibdev, port, &attr);
197         if (err)
198                 goto out;
199
200         set_ib.set_si_guid     = 0;
201         set_ib.reset_qkey_viol = !!(port_modify_mask & IB_PORT_RESET_QKEY_CNTR);
202
203         set_ib.cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
204                 ~props->clr_port_cap_mask;
205
206         err = mthca_SET_IB(to_mdev(ibdev), &set_ib, port, &status);
207         if (err)
208                 goto out;
209         if (status) {
210                 err = -EINVAL;
211                 goto out;
212         }
213
214 out:
215         up(&to_mdev(ibdev)->cap_mask_mutex);
216         return err;
217 }
218
219 int mthca_query_pkey(struct ib_device *ibdev,
220                             u8 port, u16 index, u16 *pkey)
221 {
222         struct ib_smp *in_mad  = NULL;
223         struct ib_smp *out_mad = NULL;
224         int err = -ENOMEM;
225         u8 status;
226
227         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
228         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
229         if (!in_mad || !out_mad)
230                 goto out;
231
232         init_query_mad(in_mad);
233         in_mad->attr_id  = IB_SMP_ATTR_PKEY_TABLE;
234         in_mad->attr_mod = cl_hton32(index / 32);
235
236         err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
237                             port, NULL, NULL, in_mad, out_mad,
238                             &status);
239         if (err)
240                 goto out;
241         if (status) {
242                 err = -EINVAL;
243                 goto out;
244         }
245
246         *pkey = cl_ntoh16(((__be16 *) out_mad->data)[index % 32]);
247
248  out:
249         kfree(in_mad);
250         kfree(out_mad);
251         return err;
252 }
253
254 int mthca_query_gid(struct ib_device *ibdev, u8 port,
255                            int index, union ib_gid *gid)
256 {
257         struct ib_smp *in_mad  = NULL;
258         struct ib_smp *out_mad = NULL;
259         int err = -ENOMEM;
260         u8 status;
261
262         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
263         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
264         if (!in_mad || !out_mad)
265                 goto out;
266
267         init_query_mad(in_mad);
268         in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
269         in_mad->attr_mod = cl_hton32(port);
270
271         err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
272                             port, NULL, NULL, in_mad, out_mad,
273                             &status);
274         if (err)
275                 goto out;
276         if (status) {
277                 err = -EINVAL;
278                 goto out;
279         }
280
281         memcpy(gid->raw, out_mad->data + 8, 8);
282
283         init_query_mad(in_mad);
284         in_mad->attr_id  = IB_SMP_ATTR_GUID_INFO;
285         in_mad->attr_mod = cl_hton32(index / 8);
286
287         err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
288                             port, NULL, NULL, in_mad, out_mad,
289                             &status);
290         if (err)
291                 goto out;
292         if (status) {
293                 err = -EINVAL;
294                 goto out;
295         }
296
297         memcpy(gid->raw + 8, out_mad->data + (index % 8) * 16, 8);
298
299  out:
300         kfree(in_mad);
301         kfree(out_mad);
302         return err;
303 }
304
305 struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
306                                                 ci_umv_buf_t* const     p_umv_buf)
307 {
308         struct mthca_alloc_ucontext_resp uresp;
309         struct mthca_ucontext           *context;
310         int                              err;
311
312         RtlZeroMemory(&uresp, sizeof uresp);
313
314         uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps;
315         if (mthca_is_memfree(to_mdev(ibdev)))
316                 uresp.uarc_size = to_mdev(ibdev)->uar_table.uarc_size;
317         else
318                 uresp.uarc_size = 0;
319
320         context = kzalloc(sizeof *context, GFP_KERNEL);
321         if (!context) {
322                 err = -ENOMEM;
323                 goto err_nomem;
324         }
325         
326         err = mthca_uar_alloc(to_mdev(ibdev), &context->uar);
327         if (err) 
328                 goto err_uar_alloc;
329
330         /*
331         * map uar to user space
332         */
333
334         /* map UAR to kernel */
335         context->kva = ioremap(context->uar.pfn << PAGE_SHIFT, PAGE_SIZE,&context->uar_size);
336         if (!context->kva) {
337                 HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_LOW ,("Couldn't map kernel access region, aborting.\n") );
338                 err = -ENOMEM;
339                 goto err_ioremap;
340         }
341
342         /* build MDL */
343         context->mdl = IoAllocateMdl( context->kva, (ULONG)context->uar_size,
344                 FALSE, TRUE, NULL );
345         if( !context->mdl ) {
346                 err = -ENOMEM;
347                 goto err_alloc_mdl;
348         }
349         MmBuildMdlForNonPagedPool( context->mdl );
350
351         /* Map the memory into the calling process's address space. */
352         __try   {
353                 context->ibucontext.user_uar = MmMapLockedPagesSpecifyCache( context->mdl,
354                         UserMode, MmNonCached, NULL, FALSE, NormalPagePriority );
355         }
356         __except(EXCEPTION_EXECUTE_HANDLER) {
357                 err = -EACCES;
358                 goto err_map;
359         }
360
361         /* user_db_tab */
362         context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev));
363         if (IS_ERR(context->db_tab)) {
364                 err = PTR_ERR(context->db_tab);
365                 goto err_init_user;
366         }
367
368         err = ib_copy_to_umv_buf(p_umv_buf, &uresp, sizeof uresp);
369         if (err) 
370                 goto err_copy_to_umv_buf;
371
372         context->ibucontext.device = ibdev;
373         
374         atomic_set(&context->ibucontext.usecnt, 0);
375         return &context->ibucontext;
376
377 err_copy_to_umv_buf:
378         mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar,
379                 context->db_tab);
380 err_init_user:  
381         MmUnmapLockedPages( context->ibucontext.user_uar, context->mdl );
382 err_map:
383         IoFreeMdl(context->mdl);
384 err_alloc_mdl:  
385         iounmap(context->kva, PAGE_SIZE);
386 err_ioremap:    
387         mthca_uar_free(to_mdev(ibdev), &context->uar);
388 err_uar_alloc:
389         kfree(context);
390 err_nomem:      
391         return ERR_PTR(err);
392 }
393
394  int mthca_dealloc_ucontext(struct ib_ucontext *context)
395 {
396          struct mthca_ucontext                                   *mucontext = to_mucontext(context);
397
398         mthca_cleanup_user_db_tab(to_mdev(context->device), &mucontext->uar,
399                                   mucontext->db_tab);
400         MmUnmapLockedPages( mucontext->ibucontext.user_uar, mucontext->mdl );
401         IoFreeMdl(mucontext->mdl);
402         iounmap(mucontext->kva, PAGE_SIZE);
403         mthca_uar_free(to_mdev(context->device), &mucontext->uar);
404         kfree(mucontext);
405         
406         return 0;
407 }
408
409 struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev,
410                                     struct ib_ucontext *context,
411                                     ci_umv_buf_t* const                 p_umv_buf)
412 {
413         int err;
414         struct mthca_pd *pd;
415         struct ibv_alloc_pd_resp resp;
416
417         /* sanity check */
418         if (p_umv_buf && p_umv_buf->command) {
419                 if (p_umv_buf->output_size < sizeof(struct ibv_alloc_pd_resp)) {
420                         err = -EINVAL;
421                         goto err_param;
422                 }
423         }
424         
425         pd = kmalloc(sizeof *pd, GFP_KERNEL);
426         if (!pd) {
427                 err = -ENOMEM;
428                 goto err_mem;
429         }
430
431         err = mthca_pd_alloc(to_mdev(ibdev), !context, pd);
432         if (err) {
433                 goto err_pd_alloc;
434         }
435
436         if (p_umv_buf && p_umv_buf->command) {
437                 resp.pd_handle = (u64)(UINT_PTR)pd;
438                 resp.pdn = pd->pd_num;
439                 if (ib_copy_to_umv_buf(p_umv_buf, &resp, sizeof(struct ibv_alloc_pd_resp))) {
440                         err = -EFAULT;
441                         goto err_copy;
442                 }
443         }
444
445         return &pd->ibpd;
446
447 err_copy:       
448         mthca_pd_free(to_mdev(ibdev), pd);
449 err_pd_alloc:
450         kfree(pd);
451 err_mem:
452 err_param:
453         return ERR_PTR(err);
454 }
455
456 int mthca_dealloc_pd(struct ib_pd *pd)
457 {
458         mthca_pd_free(to_mdev(pd->device), to_mpd(pd));
459         kfree(pd);
460
461         return 0;
462 }
463
464 struct ib_ah *mthca_ah_create(struct ib_pd *pd,
465                                      struct ib_ah_attr *ah_attr)
466 {
467         int err;
468         struct mthca_ah *ah;
469
470         ah = kzalloc(sizeof *ah, GFP_ATOMIC);
471         if (!ah)
472                 return ERR_PTR(-ENOMEM);
473
474         err = mthca_create_ah(to_mdev(pd->device), to_mpd(pd), ah_attr, ah);
475         if (err) {
476                 kfree(ah);
477                 return ERR_PTR(err);
478         }
479
480         return &ah->ibah;
481 }
482
483 int mthca_ah_destroy(struct ib_ah *ah)
484 {
485         mthca_destroy_ah(to_mdev(ah->device), to_mah(ah));
486         kfree(ah);
487
488         return 0;
489 }
490
491 struct ib_srq *mthca_create_srq(struct ib_pd *pd,
492                                        struct ib_srq_init_attr *init_attr,
493                                        ci_umv_buf_t* const                      p_umv_buf)
494 {
495 #ifdef WIN_TO_BE_CHANGED
496         struct mthca_create_srq ucmd;
497         struct mthca_ucontext *context = NULL;
498         struct mthca_srq *srq;
499         int err;
500
501         srq = kmalloc(sizeof *srq, GFP_KERNEL);
502         if (!srq)
503                 return ERR_PTR(-ENOMEM);
504
505         if (pd->ucontext) {
506                 context = to_mucontext(pd->ucontext);
507
508                 if (ib_copy_from_umv_buf(&ucmd, p_umv_buf, sizeof ucmd)) {
509                         err = -EFAULT;
510                         goto err_free;
511                 }
512                 err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
513                                         context->db_tab, ucmd.db_index,
514                                         ucmd.db_page);
515
516                 if (err)
517                         goto err_free;
518
519                 srq->mr.ibmr.lkey = ucmd.lkey;
520                 srq->db_index     = ucmd.db_index;
521         }
522
523         err = mthca_alloc_srq(to_mdev(pd->device), to_mpd(pd),
524                               &init_attr->attr, srq);
525
526         if (err && pd->ucontext)
527                 mthca_unmap_user_db(to_mdev(pd->device), &context->uar,
528                                     context->db_tab, ucmd.db_index);
529
530         if (err)
531                 goto err_free;
532
533         if (context && ib_copy_to_umv_buf(p_umv_buf, &srq->srqn, sizeof (u32))) {
534                 mthca_free_srq(to_mdev(pd->device), srq);
535                 err = -EFAULT;
536                 goto err_free;
537         }
538
539         return &srq->ibsrq;
540
541 err_free:
542         kfree(srq);
543
544         return ERR_PTR(err);
545 #else
546         UNREFERENCED_PARAMETER(p_umv_buf);
547         UNREFERENCED_PARAMETER(init_attr);
548         UNREFERENCED_PARAMETER(pd);
549         return NULL;
550 #endif
551 }
552
553 int mthca_destroy_srq(struct ib_srq *srq)
554 {
555         struct mthca_ucontext *context;
556
557         if (srq->uobject) {
558                 context = to_mucontext(srq->uobject->context);
559
560                 mthca_unmap_user_db(to_mdev(srq->device), &context->uar,
561                                     context->db_tab, to_msrq(srq)->db_index);
562         }
563
564         mthca_free_srq(to_mdev(srq->device), to_msrq(srq));
565         kfree(srq);
566
567         return 0;
568 }
569
570 struct ib_qp *mthca_create_qp(struct ib_pd *pd,
571                                      struct ib_qp_init_attr *init_attr,
572                                       ci_umv_buf_t* const                       p_umv_buf)
573 {
574         struct ibv_create_qp ucmd = {0};
575         struct mthca_qp *qp = NULL;
576         struct mthca_ucontext *context = NULL;
577         int err;
578
579         switch (init_attr->qp_type) {
580         case IB_QPT_RELIABLE_CONN:
581         case IB_QPT_UNRELIABLE_CONN:
582         case IB_QPT_UNRELIABLE_DGRM:
583         {
584
585                 qp = kmalloc(sizeof *qp, GFP_KERNEL);
586                 if (!qp) {
587                         err = -ENOMEM;
588                         goto err_mem;
589                 }
590
591                 if (pd->ucontext) {
592                         context = to_mucontext(pd->ucontext);
593
594                         if (ib_copy_from_umv_buf(&ucmd, p_umv_buf, sizeof ucmd)) {
595                                 err = -EFAULT;
596                                 goto err_copy;
597                         }
598
599                         err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
600                                                 context->db_tab,
601                                                 ucmd.sq_db_index, ucmd.sq_db_page);
602                         if (err) 
603                                 goto err_map1;
604
605                         err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
606                                                 context->db_tab,
607                                                 ucmd.rq_db_index, ucmd.rq_db_page);
608                         if (err) 
609                                 goto err_map2;
610
611                         qp->mr.ibmr.lkey = ucmd.lkey;
612                         qp->sq.db_index  = ucmd.sq_db_index;
613                         qp->rq.db_index  = ucmd.rq_db_index;
614                 }
615
616                 err = mthca_alloc_qp(to_mdev(pd->device), to_mpd(pd),
617                                      to_mcq(init_attr->send_cq),
618                                      to_mcq(init_attr->recv_cq),
619                                      init_attr->qp_type, init_attr->sq_sig_type,
620                                      &init_attr->cap, qp);
621
622                 if (err)
623                         if (pd->ucontext) 
624                                 goto err_alloc_qp_user;
625                         else 
626                                 goto err_copy;
627
628                 qp->ibqp.qp_num = qp->qpn;
629                 break;
630         }
631         case IB_QPT_QP0:
632         case IB_QPT_QP1:
633         {
634                 /* Don't allow userspace to create special QPs */
635                 if (pd->ucontext) {
636                         err = -EINVAL;
637                         goto err_inval;
638                 }
639
640                 qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL);
641                 if (!qp) {
642                         err = -ENOMEM;
643                         goto err_mem;
644                 }
645
646                 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_QP0 ? 0 : 1;
647
648                 err = mthca_alloc_sqp(to_mdev(pd->device), to_mpd(pd),
649                                       to_mcq(init_attr->send_cq),
650                                       to_mcq(init_attr->recv_cq),
651                                       init_attr->sq_sig_type, &init_attr->cap,
652                                       qp->ibqp.qp_num, init_attr->port_num,
653                                       to_msqp(qp));
654                 if (err)
655                         goto err_alloc_sqp;
656                 
657                 break;
658         }
659         default:
660                 /* Don't support raw QPs */
661                 err = -ENOSYS;
662                 goto err_unsupported;
663         }
664
665         init_attr->cap.max_send_wr     = qp->sq.max;
666         init_attr->cap.max_recv_wr     = qp->rq.max;
667         init_attr->cap.max_send_sge    = qp->sq.max_gs;
668         init_attr->cap.max_recv_sge    = qp->rq.max_gs;
669         init_attr->cap.max_inline_data    = qp->max_inline_data;
670
671         return &qp->ibqp;
672
673                 
674 err_alloc_qp_user:
675         if (pd->ucontext) 
676                 mthca_unmap_user_db(to_mdev(pd->device),
677                         &context->uar, context->db_tab, ucmd.rq_db_index);
678 err_map2:
679         if (pd->ucontext) 
680                 mthca_unmap_user_db(to_mdev(pd->device),
681                         &context->uar, context->db_tab, ucmd.sq_db_index);
682 err_map1: err_copy: err_alloc_sqp:
683         if (qp)
684                 kfree(qp);
685 err_mem: err_inval:     err_unsupported:
686         return ERR_PTR(err);
687 }
688
689 int mthca_destroy_qp(struct ib_qp *qp)
690 {
691         if (qp->ucontext) {
692                 mthca_unmap_user_db(to_mdev(qp->device),
693                                     &to_mucontext(qp->ucontext)->uar,
694                                     to_mucontext(qp->ucontext)->db_tab,
695                                     to_mqp(qp)->sq.db_index);
696                 mthca_unmap_user_db(to_mdev(qp->device),
697                                     &to_mucontext(qp->ucontext)->uar,
698                                     to_mucontext(qp->ucontext)->db_tab,
699                                     to_mqp(qp)->rq.db_index);
700         }
701         mthca_free_qp(to_mdev(qp->device), to_mqp(qp));
702         kfree(qp);
703         return 0;
704 }
705
706 struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
707                                      struct ib_ucontext *context,
708                                      ci_umv_buf_t* const                        p_umv_buf)
709 {
710         struct ibv_create_cq ucmd = {0};
711         struct mthca_cq *cq;
712         int nent;
713         int err;
714
715         if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes)   
716                 return ERR_PTR(-EINVAL);
717
718         if (context) {
719                 if (ib_copy_from_umv_buf(&ucmd, p_umv_buf, sizeof ucmd))
720                         return ERR_PTR(-EFAULT);
721
722                 err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
723                                         to_mucontext(context)->db_tab,
724                                         ucmd.set_db_index, ucmd.set_db_page);
725                 if (err)
726                         return ERR_PTR(err);
727
728                 err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
729                                         to_mucontext(context)->db_tab,
730                                         ucmd.arm_db_index, ucmd.arm_db_page);
731                 if (err)
732                         goto err_unmap_set;
733         }
734
735         cq = kmalloc(sizeof *cq, GFP_KERNEL);
736         if (!cq) {
737                 err = -ENOMEM;
738                 goto err_unmap_arm;
739         }
740
741         if (context) {
742                 cq->mr.ibmr.lkey = ucmd.lkey;
743                 cq->set_ci_db_index = ucmd.set_db_index;
744                 cq->arm_db_index    = ucmd.arm_db_index;
745         }
746
747         for (nent = 1; nent <= entries; nent <<= 1)
748                 ; /* nothing */
749
750         err = mthca_init_cq(to_mdev(ibdev), nent, 
751                             context ? to_mucontext(context) : NULL,
752                             context ? ucmd.mr.pdn : to_mdev(ibdev)->driver_pd.pd_num,
753                             cq);
754         if (err)
755                 goto err_free;
756
757         if (context ) {
758                 struct ibv_create_cq_resp *create_cq_resp = (struct ibv_create_cq_resp *)(void*)p_umv_buf->p_inout_buf;
759                 create_cq_resp->cqn = cq->cqn;
760         }
761
762         HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_LOW ,
763                 ("uctx %p, cq_hndl %p, cq_num %#x, cqe  %#x\n",
764                 context, &cq->ibcq, cq->cqn, cq->ibcq.cqe ) );
765         
766         return &cq->ibcq;
767
768 err_free:
769         kfree(cq);
770
771 err_unmap_arm:
772         if (context)
773                 mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
774                                     to_mucontext(context)->db_tab, ucmd.arm_db_index);
775
776 err_unmap_set:
777         if (context)
778                 mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
779                                     to_mucontext(context)->db_tab, ucmd.set_db_index);
780
781         return ERR_PTR(err);
782 }
783
784 int mthca_destroy_cq(struct ib_cq *cq)
785 {
786         if (cq->ucontext) {
787                 mthca_unmap_user_db(to_mdev(cq->device),
788                                     &to_mucontext(cq->ucontext)->uar,
789                                     to_mucontext(cq->ucontext)->db_tab,
790                                     to_mcq(cq)->arm_db_index);
791                 mthca_unmap_user_db(to_mdev(cq->device),
792                                     &to_mucontext(cq->ucontext)->uar,
793                                     to_mucontext(cq->ucontext)->db_tab,
794                                     to_mcq(cq)->set_ci_db_index);
795         }
796         mthca_free_cq(to_mdev(cq->device), to_mcq(cq));
797         kfree(cq);
798
799         return 0;
800 }
801
802 static
803 mthca_mpt_access_t
804 map_qp_mpt(
805         IN                              mthca_qp_access_t                               qp_acl)
806 {
807 #define ACL_MTHCA(mfl,ifl) if (qp_acl & mfl)   mpt_acl |= ifl
808         mthca_mpt_access_t mpt_acl = 0;
809
810         ACL_MTHCA(MTHCA_ACCESS_REMOTE_READ,MTHCA_MPT_FLAG_REMOTE_READ);
811         ACL_MTHCA(MTHCA_ACCESS_REMOTE_WRITE,MTHCA_MPT_FLAG_REMOTE_WRITE);
812         ACL_MTHCA(MTHCA_ACCESS_REMOTE_ATOMIC,MTHCA_MPT_FLAG_ATOMIC);
813         ACL_MTHCA(MTHCA_ACCESS_LOCAL_WRITE,MTHCA_MPT_FLAG_LOCAL_WRITE);
814
815         return (mpt_acl | MTHCA_MPT_FLAG_LOCAL_READ);
816 }
817
818 struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, mthca_qp_access_t acc)
819 {
820         struct mthca_mr *mr;
821         int err;
822
823         mr = kmalloc(sizeof *mr, GFP_KERNEL);
824         if (!mr)
825                 return ERR_PTR(-ENOMEM);
826         RtlZeroMemory(mr, sizeof *mr);
827
828         err = mthca_mr_alloc_notrans(to_mdev(pd->device),
829                                      to_mpd(pd)->pd_num,
830                                      map_qp_mpt(acc), mr);
831
832         if (err) {
833                 kfree(mr);
834                 return ERR_PTR(err);
835         }
836
837         return &mr->ibmr;
838 }
839
840 struct ib_mr *mthca_reg_phys_mr(struct ib_pd       *pd,
841                                        struct ib_phys_buf *buffer_list,
842                                        int                 num_phys_buf,
843                                        mthca_qp_access_t                 acc,
844                                        u64                *iova_start)
845 {
846         struct mthca_mr *mr;
847         u64 *page_list;
848         u64 total_size;
849         u64 mask;
850         int shift;
851         int npages;
852         int err;
853         int i, j, n;
854
855         /* First check that we have enough alignment */
856         if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK))
857                 return ERR_PTR(-EINVAL);
858
859         if (num_phys_buf > 1 &&
860             ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK))
861                 return ERR_PTR(-EINVAL);
862
863         mask = 0;
864         total_size = 0;
865         for (i = 0; i < num_phys_buf; ++i) {
866                 if (i != 0)
867                         mask |= buffer_list[i].addr;
868                 if (i != num_phys_buf - 1)
869                         mask |= buffer_list[i].addr + buffer_list[i].size;
870
871                 total_size += buffer_list[i].size;
872         }
873
874         if (mask & ~PAGE_MASK)
875                 return ERR_PTR(-EINVAL);
876
877         /* Find largest page shift we can use to cover buffers */
878         for (shift = PAGE_SHIFT; shift < 31; ++shift)
879                 if (num_phys_buf > 1) {
880                         if ((1ULL << shift) & mask)
881                                 break;
882                 } else {
883                         if (1ULL << shift >=
884                             buffer_list[0].size +
885                             (buffer_list[0].addr & ((1ULL << shift) - 1)))
886                                 break;
887                 }
888
889         buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1);
890         buffer_list[0].addr &= ~0ull << shift;
891
892         mr = kmalloc(sizeof *mr, GFP_KERNEL);
893         if (!mr)
894                 return ERR_PTR(-ENOMEM);
895         RtlZeroMemory(mr, sizeof *mr);
896
897         npages = 0;
898         for (i = 0; i < num_phys_buf; ++i)
899                 npages += (int)((buffer_list[i].size + (1ULL << shift) - 1) >> shift);
900
901         if (!npages)
902                 return &mr->ibmr;
903
904         page_list = kmalloc(npages * sizeof *page_list, GFP_KERNEL);
905         if (!page_list) {
906                 kfree(mr);
907                 return ERR_PTR(-ENOMEM);
908         }
909
910         n = 0;
911         for (i = 0; i < num_phys_buf; ++i)
912                 for (j = 0;
913                      j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
914                      ++j)
915                         page_list[n++] = buffer_list[i].addr + ((u64) j << shift);
916
917         HCA_PRINT( TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("Registering memory at %I64x (iova %I64x) "
918                   "in PD %x; shift %d, npages %d.\n",
919                   (unsigned long long) buffer_list[0].addr,
920                   (unsigned long long) *iova_start,
921                   to_mpd(pd)->pd_num,
922                   shift, npages));
923
924         err = mthca_mr_alloc_phys(to_mdev(pd->device),
925                                   to_mpd(pd)->pd_num,
926                                   page_list, shift, npages,
927                                   *iova_start, total_size,
928                                   map_qp_mpt(acc), mr);
929
930         if (err) {
931                 kfree(page_list);
932                 kfree(mr);
933                 return ERR_PTR(err);
934         }
935
936         kfree(page_list);
937         return &mr->ibmr;
938 }
939
940 struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, 
941         void* __ptr64   vaddr, uint64_t length, uint64_t hca_va, mthca_qp_access_t acc)
942 {
943         struct mthca_dev *dev = to_mdev(pd->device);
944         struct ib_umem_chunk *chunk;
945         struct mthca_mr *mr;
946         struct ib_umem *region;
947         u64 *pages;
948         int shift, n, len;
949         int i, j, k;
950         int err = 0;
951
952         HCA_ENTER(HCA_DBG_MEMORY);
953         mr = kzalloc(sizeof *mr, GFP_KERNEL);
954         if (!mr) {
955                 err = -ENOMEM;
956                 goto err_nomem;
957         }
958         region = &mr->umem;
959
960         /*
961          * We ask for writable memory if any access flags other than
962          * "remote read" are set.  "Local write" and "remote write"
963          * obviously require write access.  "Remote atomic" can do
964          * things like fetch and add, which will modify memory, and
965          * "MW bind" can change permissions by binding a window.
966          */
967         err = ibv_umem_get(pd->device, region,
968                           (void *)vaddr, (size_t)length,
969                           !!(acc & ~MTHCA_ACCESS_REMOTE_READ));
970         if (err)
971                 goto err_umem_get;
972
973         region->virt_base = hca_va;     /* va in HCA */
974
975         n = 0;
976         shift = ffs(region->page_size) - 1;
977         list_for_each_entry(chunk, &region->chunk_list, list,struct ib_umem_chunk)
978                 n += chunk->nents;
979
980         mr->mtt = mthca_alloc_mtt(dev, n);
981         if (IS_ERR(mr->mtt)) {
982                 err = PTR_ERR(mr->mtt);
983                 goto err_alloc_mtt;
984         }
985
986         pages = (u64 *) kmalloc(PAGE_SIZE,GFP_KERNEL);
987         if (!pages) {
988                 err = -ENOMEM;
989                 goto err_pages;
990         }
991
992         i = n = 0;
993
994         list_for_each_entry(chunk, &region->chunk_list, list,struct ib_umem_chunk)
995                 for (j = 0; j < chunk->nmap; ++j) {
996                         len = sg_dma_len(&chunk->page_list[j]) >> shift;
997                         for (k = 0; k < len; ++k) {
998                                 pages[i++] = sg_dma_address(&chunk->page_list[j]) +
999                                         region->page_size * k;
1000                                 /*
1001                                  * Be friendly to WRITE_MTT command
1002                                  * and leave two empty slots for the
1003                                  * index and reserved fields of the
1004                                  * mailbox.
1005                                  */
1006                                 if (i == PAGE_SIZE / sizeof (u64) - 2) {
1007                                         err = mthca_write_mtt(dev, mr->mtt,
1008                                                               n, pages, i);
1009                                         if (err)
1010                                                 goto err_write_mtt;
1011                                         n += i;
1012                                         i = 0;
1013                                 }
1014                         }
1015                 }
1016
1017         if (i) {
1018                 err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
1019                 if (err)
1020                         goto err_write_mtt;
1021         }       
1022
1023         err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, region->virt_base,
1024                              region->length, map_qp_mpt(acc), mr);
1025         if (err)
1026                 goto err_mt_alloc;
1027
1028         free_page((void*) pages);
1029         HCA_EXIT(HCA_DBG_MEMORY);
1030         return &mr->ibmr;
1031
1032 err_mt_alloc:
1033 err_write_mtt:
1034         free_page((void*) pages);
1035 err_pages:
1036         mthca_free_mtt(dev, mr->mtt);
1037 err_alloc_mtt:
1038         ibv_umem_release(pd->device, region);
1039 err_umem_get:   
1040         kfree(mr);
1041 err_nomem:      
1042         HCA_EXIT(HCA_DBG_MEMORY);
1043         return ERR_PTR(err);
1044 }
1045
1046 int mthca_dereg_mr(struct ib_mr *mr)
1047 {
1048         struct mthca_mr *mmr = to_mmr(mr);
1049         mthca_free_mr(to_mdev(mr->device), mmr);
1050         if (mr->pd->ucontext)
1051                 ibv_umem_release(mr->pd->device, &mmr->umem);
1052         kfree(mmr);
1053         return 0;
1054 }
1055
1056 struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, mthca_qp_access_t acc,
1057                                       struct ib_fmr_attr *fmr_attr)
1058 {
1059         struct mthca_fmr *fmr;
1060         int err;
1061
1062         fmr = kmalloc(sizeof *fmr, GFP_KERNEL);
1063         if (!fmr)
1064                 return ERR_PTR(-ENOMEM);
1065
1066         memcpy(&fmr->attr, fmr_attr, sizeof *fmr_attr);
1067         err = mthca_fmr_alloc(to_mdev(pd->device), to_mpd(pd)->pd_num,
1068                              map_qp_mpt(acc), fmr);
1069
1070         if (err) {
1071                 kfree(fmr);
1072                 return ERR_PTR(err);
1073         }
1074
1075         return &fmr->ibmr;
1076 }
1077
1078 int mthca_dealloc_fmr(struct ib_fmr *fmr)
1079 {
1080         struct mthca_fmr *mfmr = to_mfmr(fmr);
1081         int err;
1082
1083         err = mthca_free_fmr(to_mdev(fmr->device), mfmr);
1084         if (err)
1085                 return err;
1086
1087         kfree(mfmr);
1088         return 0;
1089 }
1090
1091 int mthca_unmap_fmr(struct list_head *fmr_list)
1092 {
1093         struct ib_fmr *fmr;
1094         int err;
1095         u8 status;
1096         struct mthca_dev *mdev = NULL;
1097
1098         list_for_each_entry(fmr, fmr_list, list,struct ib_fmr) {
1099                 if (mdev && to_mdev(fmr->device) != mdev)
1100                         return -EINVAL;
1101                 mdev = to_mdev(fmr->device);
1102         }
1103
1104         if (!mdev)
1105                 return 0;
1106
1107         if (mthca_is_memfree(mdev)) {
1108                 list_for_each_entry(fmr, fmr_list, list,struct ib_fmr)
1109                         mthca_arbel_fmr_unmap(mdev, to_mfmr(fmr));
1110
1111                 wmb();
1112         } else
1113                 list_for_each_entry(fmr, fmr_list, list,struct ib_fmr)
1114                         mthca_tavor_fmr_unmap(mdev, to_mfmr(fmr));
1115
1116         err = mthca_SYNC_TPT(mdev, &status);
1117         if (err)
1118                 return err;
1119         if (status)
1120                 return -EINVAL;
1121         return 0;
1122 }
1123
1124 static int mthca_init_node_data(struct mthca_dev *dev)
1125 {
1126         struct ib_smp *in_mad  = NULL;
1127         struct ib_smp *out_mad = NULL;
1128         int err = -ENOMEM;
1129         u8 status;
1130
1131         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
1132         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
1133         if (!in_mad || !out_mad)
1134                 goto out;
1135
1136         init_query_mad(in_mad);
1137         in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
1138
1139         err = mthca_MAD_IFC(dev, 1, 1,
1140                             1, NULL, NULL, in_mad, out_mad,
1141                             &status);
1142         if (err)
1143                 goto out;
1144         if (status) {
1145                 err = -EINVAL;
1146                 goto out;
1147         }
1148
1149         memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
1150
1151 out:
1152         kfree(in_mad);
1153         kfree(out_mad);
1154         return err;
1155 }
1156
1157 int mthca_register_device(struct mthca_dev *dev)
1158 {
1159         int ret;
1160
1161         ret = mthca_init_node_data(dev);        
1162         if (ret)
1163                 return ret;
1164
1165         strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
1166         dev->ib_dev.node_type            = IB_NODE_CA;
1167         dev->ib_dev.phys_port_cnt        = (u8)dev->limits.num_ports;
1168         dev->ib_dev.mdev                                = dev;
1169         dev->ib_dev.query_device         = mthca_query_device;
1170         dev->ib_dev.query_port           = mthca_query_port;
1171         dev->ib_dev.modify_port          = mthca_modify_port;
1172         dev->ib_dev.query_pkey           = mthca_query_pkey;
1173         dev->ib_dev.query_gid            = mthca_query_gid;
1174         dev->ib_dev.alloc_ucontext       = mthca_alloc_ucontext;
1175         dev->ib_dev.dealloc_ucontext     = mthca_dealloc_ucontext;
1176         dev->ib_dev.alloc_pd             = mthca_alloc_pd;
1177         dev->ib_dev.dealloc_pd           = mthca_dealloc_pd;
1178         dev->ib_dev.create_ah            = mthca_ah_create;
1179         dev->ib_dev.destroy_ah           = mthca_ah_destroy;
1180
1181         if (dev->mthca_flags & MTHCA_FLAG_SRQ) {
1182                 dev->ib_dev.create_srq           = mthca_create_srq;
1183                 dev->ib_dev.modify_srq           = mthca_modify_srq;
1184                 dev->ib_dev.destroy_srq          = mthca_destroy_srq;
1185
1186                 if (mthca_is_memfree(dev))
1187                         dev->ib_dev.post_srq_recv = mthca_arbel_post_srq_recv;
1188                 else
1189                         dev->ib_dev.post_srq_recv = mthca_tavor_post_srq_recv;
1190         }
1191
1192         dev->ib_dev.create_qp            = mthca_create_qp;
1193         dev->ib_dev.modify_qp            = mthca_modify_qp;
1194         dev->ib_dev.destroy_qp           = mthca_destroy_qp;
1195         dev->ib_dev.create_cq            = mthca_create_cq;
1196         dev->ib_dev.destroy_cq           = mthca_destroy_cq;
1197         dev->ib_dev.poll_cq              = mthca_poll_cq;
1198         dev->ib_dev.get_dma_mr           = mthca_get_dma_mr;
1199         dev->ib_dev.reg_phys_mr          = mthca_reg_phys_mr;
1200         dev->ib_dev.reg_user_mr          = mthca_reg_user_mr;
1201         dev->ib_dev.dereg_mr             = mthca_dereg_mr;
1202
1203         if (dev->mthca_flags & MTHCA_FLAG_FMR) {
1204                 dev->ib_dev.alloc_fmr            = mthca_alloc_fmr;
1205                 dev->ib_dev.unmap_fmr            = mthca_unmap_fmr;
1206                 dev->ib_dev.dealloc_fmr          = mthca_dealloc_fmr;
1207                 if (mthca_is_memfree(dev))
1208                         dev->ib_dev.map_phys_fmr = mthca_arbel_map_phys_fmr;
1209                 else
1210                         dev->ib_dev.map_phys_fmr = mthca_tavor_map_phys_fmr;
1211         }
1212
1213         dev->ib_dev.attach_mcast         = mthca_multicast_attach;
1214         dev->ib_dev.detach_mcast         = mthca_multicast_detach;
1215         dev->ib_dev.process_mad          = mthca_process_mad;
1216
1217         if (mthca_is_memfree(dev)) {
1218                 dev->ib_dev.req_notify_cq = mthca_arbel_arm_cq;
1219                 dev->ib_dev.post_send     = mthca_arbel_post_send;
1220                 dev->ib_dev.post_recv     = mthca_arbel_post_receive;
1221         } else {
1222                 dev->ib_dev.req_notify_cq = mthca_tavor_arm_cq;
1223                 dev->ib_dev.post_send     = mthca_tavor_post_send;
1224                 dev->ib_dev.post_recv     = mthca_tavor_post_receive;
1225         }
1226
1227         KeInitializeMutex(&dev->cap_mask_mutex, 0);
1228
1229         ret = ib_register_device(&dev->ib_dev);
1230         if (ret)
1231                 return ret;
1232
1233         mthca_start_catas_poll(dev);
1234
1235         return 0;
1236 }
1237
1238 void mthca_unregister_device(struct mthca_dev *dev)
1239 {
1240         mthca_stop_catas_poll(dev);
1241         ib_unregister_device(&dev->ib_dev);
1242 }