[ALL] Enable warning C4826.
authorleonidk <leonidk@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Sun, 13 Jul 2008 11:31:46 +0000 (11:31 +0000)
committerleonidk <leonidk@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Sun, 13 Jul 2008 11:31:46 +0000 (11:31 +0000)
Warning C4826 warns you if you have signed pointer extension when a 32-bit pointer is extended to 64-bits.  This patch turns the warning on by default (for any users of complib), and fixes all instances of problematic code.  It also eradicates __ptr64 usage from the Qlogic VNIC driver.

Code affected:
- IBAL
- MTHCA
- MLX4
- QLGCVNIC

Note that I didn't change DAPL because it's licensed under the CPL and not BSD. So someone else will have to fix that.  In fact, DAPL should probably just go back into the SourceForge project rather than being duplicated everywhere.  Code in the WinOF SVN should be BSD only according to the contributor's agreement.

Signed-off-by: Fab Tillier <ftillier@microsoft.com>
git-svn-id: svn://openib.tc.cornell.edu/gen1/trunk@1392 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86

15 files changed:
core/al/kernel/al_ndi_cm.c
hw/mlx4/kernel/bus/core/iobuf.c
hw/mlx4/kernel/hca/mr.c
hw/mthca/kernel/hca_memory.c
hw/mthca/kernel/mt_memory.c
hw/mthca/kernel/mthca_provider.c
hw/mthca/user/mlnx_uvp_qp.c
inc/kernel/complib/cl_types_osd.h
inc/user/complib/cl_types_osd.h
ulp/qlgcvnic/kernel/vnic_adapter.c
ulp/qlgcvnic/kernel/vnic_control.c
ulp/qlgcvnic/kernel/vnic_data.c
ulp/qlgcvnic/kernel/vnic_ib.c
ulp/qlgcvnic/kernel/vnic_ib.h
ulp/qlgcvnic/kernel/vnic_util.h

index 6f9d7a7..66994e5 100644 (file)
@@ -492,7 +492,7 @@ ndi_qp_init(
 \r
 AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI,\r
        ("Creating h_qp %#I64x, uhdl %#I64x \n", \r
-       (uint64_t)h_qp, h_qp->obj.hdl ) );\r
+       (uint64_t)(ULONG_PTR)h_qp, h_qp->obj.hdl ) );\r
 \r
 exit:\r
        AL_EXIT( AL_DBG_NDI );\r
@@ -513,7 +513,7 @@ ndi_qp_destroy(
        {\r
                AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI,\r
                        ("Destroying h_qp %#I64x, uhdl %#I64x, cid %d\n", \r
-                       (uint64_t)h_qp, h_qp->obj.hdl, ((al_conn_qp_t*)h_qp)->cid ) );\r
+                       (uint64_t)(ULONG_PTR)h_qp, h_qp->obj.hdl, ((al_conn_qp_t*)h_qp)->cid ) );\r
 \r
                /* Move the state before flushing, so that all new IRPs fail to queue. */\r
                __ndi_acquire_lock( &h_qp->p_irp_queue->csq, &irql );\r
@@ -526,7 +526,7 @@ ndi_qp_destroy(
                {\r
                        AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI, \r
                                ("h_qp %#I64x, uhdl %#I64x, ref_cnt %d\n", \r
-                               (uint64_t)h_qp, h_qp->obj.hdl, h_qp->obj.ref_cnt ) );\r
+                               (uint64_t)(ULONG_PTR)h_qp, h_qp->obj.hdl, h_qp->obj.ref_cnt ) );\r
 \r
                        __ndi_complete_cancelled_irp( &h_qp->p_irp_queue->csq, Irp );\r
                }\r
@@ -535,13 +535,13 @@ ndi_qp_destroy(
                {\r
                        AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI, \r
                                ("h_qp %#I64x, uhdl %#I64x, ref_cnt %d\n", \r
-                               (uint64_t)h_qp, h_qp->obj.hdl, h_qp->obj.ref_cnt ) );\r
+                               (uint64_t)(ULONG_PTR)h_qp, h_qp->obj.hdl, h_qp->obj.ref_cnt ) );\r
 \r
                        __ndi_complete_cancelled_irp( &h_qp->p_irp_queue->csq, Irp );\r
                }\r
                AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI, \r
                        ("h_qp %#I64x, uhdl %#I64x, ref_cnt %d\n", \r
-                       (uint64_t)h_qp, h_qp->obj.hdl, h_qp->obj.ref_cnt ) );\r
+                       (uint64_t)(ULONG_PTR)h_qp, h_qp->obj.hdl, h_qp->obj.ref_cnt ) );\r
        }\r
 \r
        AL_EXIT( AL_DBG_NDI );\r
@@ -652,7 +652,7 @@ __ndi_proc_rej(
 \r
        AL_PRINT(TRACE_LEVEL_ERROR, AL_DBG_ERROR, \r
                ("p_rej %p, h_qp %#I64x, uhdl %#I64x, connect reject, reason=%hd\n", \r
-               p_rej, (uint64_t)h_qp, h_qp->obj.hdl, cl_ntoh16(p_rej->reason) ) );\r
+               p_rej, (uint64_t)(ULONG_PTR)h_qp, h_qp->obj.hdl, cl_ntoh16(p_rej->reason) ) );\r
 \r
        p_irp = IoCsqRemoveNextIrp( &h_qp->p_irp_queue->csq, NULL );\r
        __ndi_notify_dreq( h_qp );\r
@@ -1185,7 +1185,7 @@ __ndi_pr_query(
        {\r
                AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, \r
                        ("STATUS_CONNECTION_ACTIVE: h_qp %#I64x, uhdl %#I64x, ref_cnt %d\n",\r
-                       (uint64_t)h_qp, h_qp->obj.hdl, h_qp->obj.ref_cnt ) );\r
+                       (uint64_t)(ULONG_PTR)h_qp, h_qp->obj.hdl, h_qp->obj.ref_cnt ) );\r
                return STATUS_CONNECTION_ACTIVE;\r
        }\r
 \r
@@ -1596,7 +1596,7 @@ __ndi_send_dreq(
        {\r
                AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, \r
                        ("STATUS_CONNECTION_ACTIVE: h_qp %#I64x, uhdl %#I64x, ref_cnt %d\n",\r
-                       (uint64_t)h_qp, h_qp->obj.hdl, h_qp->obj.ref_cnt ) );\r
+                       (uint64_t)(ULONG_PTR)h_qp, h_qp->obj.hdl, h_qp->obj.ref_cnt ) );\r
                return STATUS_CONNECTION_INVALID;\r
        }\r
 \r
index 4e6dbe2..62ce6af 100644 (file)
-/*
- * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
- * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- * Copyright (c) 2005 Mellanox Technologies.  All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * $Id: mt_memory.c 2020 2007-05-01 09:29:10Z leonid $
- */
-#include <mlx4_debug.h>
-#include "l2w.h"
-#include "pa_cash.h"
-#include "ib_verbs.h"
-
-#if defined (EVENT_TRACING)
-#ifdef offsetof
-#undef offsetof
-#endif
-#include "iobuf.tmh"
-#endif 
-
-
-
-
-/*
-*      Function: map user buffer to kernel and lock it
-*
-*      Return: 
-*/
-int get_user_pages(
-       IN              struct mlx4_dev *dev,   /* device */
-       IN              u64 start,                                                      /* address in user space */
-       IN              int npages,                                             /* size in pages */
-       IN              int write_access,                               /* access rights */
-       OUT     struct scatterlist *sg                  /* s/g list */
-       )
-{
-       PMDL mdl_p;
-       int size = npages << PAGE_SHIFT;
-       int access = (write_access) ? IoWriteAccess : IoReadAccess;
-       int err;
-       void * kva;     /* kernel virtual address */
-
-       UNREFERENCED_PARAMETER(dev);
-       
-       MLX4_ENTER(MLX4_DBG_MEMORY);
-       ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL);
-       
-       /* allocate MDL */
-       mdl_p = IoAllocateMdl( (PVOID)(ULONG_PTR)start, (ULONG)size, 
-               FALSE,
-               FALSE,          /* not charge quota */
-               NULL);
-       if (mdl_p == NULL) {
-               err = -ENOMEM;  
-               goto err0;
-       }
-
-       /* lock memory */
-       __try   {       
-               MmProbeAndLockPages( mdl_p, UserMode,   access ); 
-       } 
-       __except (EXCEPTION_EXECUTE_HANDLER)
-       {
-               NTSTATUS Status = GetExceptionCode();
-               MLX4_PRINT(TRACE_LEVEL_ERROR ,MLX4_DBG_MEMORY ,("Exception 0x%x on MmProbeAndLockPages(), addr 0x%I64x, size %d\n", Status, start, size));
-               switch(Status){
-                       case STATUS_WORKING_SET_QUOTA:
-                               err = -ENOMEM;break;
-                       case STATUS_ACCESS_VIOLATION:
-                               err = -EACCES;break;
-                       default :
-                               err = -EINVAL;
-                       }
-
-               goto err1;
-       }
-
-       /* map it to kernel */
-       kva = MmMapLockedPagesSpecifyCache( mdl_p, 
-               KernelMode, MmNonCached, 
-               NULL, FALSE, NormalPagePriority );
-       if (kva == NULL) {
-               MLX4_PRINT(TRACE_LEVEL_ERROR ,MLX4_DBG_MEMORY ,("MmMapLockedPagesSpecifyCache failed\n"));
-               err = -EFAULT;
-               goto err2;
-       }
-
-       sg->dma_addr.va = kva;
-       sg->dma_addr.sz = size;
-       sg->offset = (unsigned int)(start & ~PAGE_MASK);
-       sg->p_mdl = mdl_p;      
-       // TODO: has to be dma address, not physical one
-       sg->dma_addr.da = MmGetPhysicalAddress(kva).QuadPart;
-       return 0;       
-       
-err2:  
-       MmUnlockPages(mdl_p);
-err1:          
-       IoFreeMdl(mdl_p);
-err0:
-       MLX4_EXIT(MLX4_DBG_MEMORY);
-       return err;
-               
- }
-
-void put_page(struct scatterlist *sg)
-{
-       if (sg->p_mdl) {
-               MmUnmapLockedPages( sg->dma_addr.va, sg->p_mdl );
-               MmUnlockPages(sg->p_mdl);
-               IoFreeMdl(sg->p_mdl);
-       }
-}
-
-
-typedef struct _iobuf_seg {
-       LIST_ENTRY      link;
-       PMDL   mdl_p;
-       u64 va;  /* virtual address of the buffer */
-       u64 size;     /* size in bytes of the buffer */
-       u32 nr_pages;
-       int     is_user;
-} iobuf_seg_t;
-
-// Returns: 0 on success, -ENOMEM or -EACCESS on error
-static int register_segment(
-       IN              u64 va,
-       IN              u64 size,
-       IN              int is_user,
-       IN              enum ib_access_flags acc,
-       OUT iobuf_seg_t **iobuf_seg)
-{
-       PMDL mdl_p;
-       int rc;
-       KPROCESSOR_MODE mode;  
-       iobuf_seg_t * new_iobuf;
-       static ULONG cnt=0;
-       LOCK_OPERATION Operation;
-
-       // set Operation
-       if (acc & IB_ACCESS_LOCAL_WRITE)
-               Operation = IoModifyAccess;
-       else
-               Operation = IoReadAccess;
-       
-       // allocate IOBUF segment object
-       new_iobuf = (iobuf_seg_t *)kmalloc(sizeof(iobuf_seg_t), GFP_KERNEL );
-       if (new_iobuf == NULL) {
-               rc = -ENOMEM;
-               goto err_nomem;
-       }
-
-       // allocate MDL 
-       mdl_p = IoAllocateMdl( (PVOID)(ULONG_PTR)va, (ULONG)size, FALSE,FALSE,NULL);
-       if (mdl_p == NULL) {
-               rc = -ENOMEM;
-               goto err_alloc_mdl;
-       }
-
-       // make context-dependent things
-       if (is_user) {
-               ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL);
-               mode = UserMode;
-       }
-       else {  /* Mapping to kernel virtual address */
-               //    MmBuildMdlForNonPagedPool(mdl_p);   // fill MDL ??? - should we do that really ?
-               mode = KernelMode;
-       }
-
-       __try { /* try */
-               MmProbeAndLockPages( mdl_p, mode, Operation );   /* lock memory */
-       } /* try */
-               
-       __except (EXCEPTION_EXECUTE_HANDLER)    {
-               MLX4_PRINT(TRACE_LEVEL_ERROR, MLX4_DBG_MEMORY, 
-                       ("MOSAL_iobuf_register: Exception 0x%x on MmProbeAndLockPages(), va %I64d, sz %I64d\n", 
-                       GetExceptionCode(), va, size));
-               rc = -EACCES;
-               goto err_probe;
-       }
-       
-       // fill IOBUF object
-       new_iobuf->va = va;
-       new_iobuf->size= size;
-       new_iobuf->nr_pages = ADDRESS_AND_SIZE_TO_SPAN_PAGES( va, size );
-       new_iobuf->mdl_p = mdl_p;
-       new_iobuf->is_user = is_user;
-       *iobuf_seg = new_iobuf;
-       return 0;
-
-err_probe:
-       IoFreeMdl(mdl_p);
-err_alloc_mdl:  
-       ExFreePool((PVOID)new_iobuf);
-err_nomem:  
-       return rc;
-}
-
-void iobuf_init(
-       IN              u64 va,
-       IN              u64 size,
-       IN              int is_user,
-       IN OUT  iobuf_t *iobuf_p)
-{
-       iobuf_p->va = va;
-       iobuf_p->size= size;
-       iobuf_p->is_user = is_user;
-       InitializeListHead( &iobuf_p->seg_que );
-       iobuf_p->seg_num = 0;
-       iobuf_p->nr_pages = 0;
-       iobuf_p->is_cashed = 0;
-}
-
-int iobuf_register(
-       IN              u64 va,
-       IN              u64 size,
-       IN              int is_user,
-       IN              enum ib_access_flags acc,
-       IN OUT  iobuf_t *iobuf_p)
-{
-       int rc=0;
-       u64 seg_va;     // current segment start
-       u64 seg_size;   // current segment size
-       u64 rdc;                        // remain data counter - what is rest to lock
-       u64 delta;                              // he size of the last not full page of the first segment
-       iobuf_seg_t * new_iobuf;
-       unsigned page_size = PAGE_SIZE;
-
-// 32 - for any case  
-#define PFNS_IN_PAGE_SIZE_MDL          ((PAGE_SIZE - sizeof(struct _MDL) - 32) / sizeof(long))
-#define MIN_IOBUF_SEGMENT_SIZE (PAGE_SIZE * PFNS_IN_PAGE_SIZE_MDL)     // 4MB  
-
-       ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL);
-
-       // we'll try to register all at once.
-       seg_va = va;
-       seg_size = rdc = size;
-               
-       // allocate segments
-       while (rdc > 0) {
-               // map a segment
-               rc = register_segment(seg_va, seg_size, is_user, acc, &new_iobuf );
-
-               // success - move to another segment
-               if (!rc) {
-                       rdc -= seg_size;
-                       seg_va += seg_size;
-                       InsertTailList( &iobuf_p->seg_que, &new_iobuf->link );
-                       iobuf_p->seg_num++;
-                       // round the segment size to the next page boundary 
-                       delta = (seg_va + seg_size) & (page_size - 1);
-                       if (delta) {
-                               seg_size -= delta;
-                               seg_size += page_size;
-                       }
-                       if (seg_size > rdc)
-                               seg_size = rdc;
-                       continue;
-               }
-
-               // failure - too large a buffer: lessen it and try once more
-               if (rc == -ENOMEM) {
-                       // no where to lessen - too low memory
-                       if (seg_size <= MIN_IOBUF_SEGMENT_SIZE)
-                               break;
-                       // lessen the size
-                       seg_size >>= 1;
-                       // round the segment size to the next page boundary 
-                       delta = (seg_va + seg_size) & (page_size - 1);
-                       if (delta) {
-                               seg_size -= delta;
-                               seg_size += page_size;
-                       }
-                       if (seg_size > rdc)
-                               seg_size = rdc;
-                       continue;
-               }
-
-               // got unrecoverable error
-               break;
-       }
-
-       // SUCCESS
-       if (rc) 
-               iobuf_deregister( iobuf_p );
-       else     
-               iobuf_p->nr_pages += ADDRESS_AND_SIZE_TO_SPAN_PAGES( va, size );
-
-       return rc;
-}
-
-
-static void __iobuf_copy(
-       IN OUT  iobuf_t *dst_iobuf_p,
-       IN              iobuf_t *src_iobuf_p
-       )
-{
-       int i;
-       iobuf_seg_t *iobuf_seg_p;
-       
-       *dst_iobuf_p = *src_iobuf_p;
-       InitializeListHead( &dst_iobuf_p->seg_que );
-       for (i=0; i<src_iobuf_p->seg_num; ++i) {
-               iobuf_seg_p = (iobuf_seg_t *)(PVOID)RemoveHeadList( &src_iobuf_p->seg_que );
-               InsertTailList( &dst_iobuf_p->seg_que, &iobuf_seg_p->link );
-       }
-}
-
-/* if the buffer to be registered overlaps a buffer, already registered, 
-       a race can happen between HCA, writing to the previously registered
-       buffer and the probing functions (MmProbeAndLockPages, MmSecureVirtualMemory),
-       used in the algorithm of memory registration.
-       To prevent the race we maintain reference counters for the physical pages, being registered, 
-       and register every physical page FOR THE WRITE ACCESS only once.*/
-
-int iobuf_register_with_cash(
-       IN              u64 vaddr,
-       IN              u64 size,
-       IN              int is_user,
-       IN OUT  enum ib_access_flags *acc_p,
-       IN OUT  iobuf_t *iobuf_p)
-{
-       int rc, pa_in;
-       iobuf_t sec_iobuf;
-       int i, page_in , page_out, page_in_total;
-       int nr_pages;
-       char *subregion_start, *va;
-       u64 subregion_size;
-       u64 rdc;                                        // remain data counter - what is rest to lock
-       u64 delta;                              // he size of the last not full page of the first segment
-       enum ib_access_flags acc;
-
-       mutex_lock(&g_pa_mutex);
-
-       // register memory for read access to bring pages into the memory
-       rc = iobuf_register( vaddr, size, is_user, 0, iobuf_p);
-
-       // on error or read access - exit
-       if (rc || !(*acc_p & IB_ACCESS_LOCAL_WRITE))
-               goto exit;
-
-       // re-register buffer with the correct access rights
-       iobuf_init( (u64)vaddr, size, is_user, &sec_iobuf );
-       nr_pages = ADDRESS_AND_SIZE_TO_SPAN_PAGES( vaddr, size );
-       subregion_start = va = (char*)(ULONG_PTR)vaddr;
-       rdc = size;
-       pa_in = page_in = page_in_total = page_out = 0;
-
-       for (i=0; i<nr_pages; ++i, va+=PAGE_SIZE) {
-               // check whether a phys page is to be registered
-               PHYSICAL_ADDRESS pa = MmGetPhysicalAddress(va);
-               pa_in = pa_is_registered(pa.QuadPart);
-               if (pa_in) {
-                       ++page_in;
-                       ++page_in_total;
-               }
-               else
-                       ++page_out;
-
-               // check whether we get at the end of a subregion with the same rights wrt cash
-               if (page_in && page_out) {
-                       // prepare to registration of the subregion
-                       if (pa_in) {            // SUBREGION WITH WRITE ACCESS
-                               acc = IB_ACCESS_LOCAL_WRITE;
-                               subregion_size = (u64)page_out * PAGE_SIZE;
-                               page_out = 0;
-                       }
-                       else {          // SUBREGION WITH READ ACCESS
-                               acc = 0;
-                               subregion_size = (u64)page_in * PAGE_SIZE;
-                               page_in = 0;
-                       }
-                       
-                       // round the subregion size to the page boundary 
-                       delta = (u64)(subregion_start + subregion_size) & (PAGE_SIZE - 1);
-                       subregion_size -= delta;
-                       if (subregion_size > rdc)
-                               subregion_size = rdc;
-
-                       // register the subregion
-                       rc = iobuf_register( (u64)subregion_start, subregion_size, is_user, acc, &sec_iobuf);
-                       if (rc)
-                               goto cleanup;
-
-                       // prepare to the next loop
-                       rdc -= subregion_size;
-                       subregion_start +=subregion_size;
-               }
-       }
-
-       // prepare to registration of the subregion
-       if (pa_in) {            // SUBREGION WITH READ ACCESS
-               acc = 0;
-               subregion_size = (u64)page_in * PAGE_SIZE;
-       }
-       else {          // SUBREGION WITH WRITE ACCESS
-               acc = IB_ACCESS_LOCAL_WRITE;
-               subregion_size = (u64)page_out * PAGE_SIZE;
-       }
-       
-       // round the subregion size to the page boundary 
-       delta = (u64)(subregion_start + subregion_size) & (PAGE_SIZE - 1);
-       subregion_size -= delta;
-       if (subregion_size > rdc)
-               subregion_size = rdc;
-       
-       // register the subregion
-       rc = iobuf_register( (u64)subregion_start, subregion_size, is_user, acc, &sec_iobuf);
-       if (rc)
-               goto cleanup;
-
-       // cash phys pages
-       rc = pa_register(iobuf_p);
-       if (rc)
-               goto err_pa_reg;
-
-       // replace the iobuf
-       iobuf_deregister( iobuf_p );
-       sec_iobuf.is_cashed = TRUE;
-       __iobuf_copy( iobuf_p, &sec_iobuf );
-       
-       // buffer is a part of also registered buffer - change the rights 
-       if (page_in_total)
-               *acc_p &= ~IB_ACCESS_LOCAL_WRITE;
-
-       goto exit;
-       
-err_pa_reg:    
-       iobuf_deregister( &sec_iobuf );
-cleanup:
-       iobuf_deregister( iobuf_p );
-exit:  
-       mutex_unlock(&g_pa_mutex);
-       return rc;
-}
-
-static void deregister_segment(iobuf_seg_t * iobuf_seg_p)
-{
-       MmUnlockPages( iobuf_seg_p->mdl_p );    // unlock the buffer 
-       IoFreeMdl( iobuf_seg_p->mdl_p );        // free MDL
-       ExFreePool(iobuf_seg_p);
-}
-
-void iobuf_deregister(iobuf_t *iobuf_p)
-{
-       iobuf_seg_t *iobuf_seg_p;       // pointer to current segment object
-
-       ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL);
-
-       // release segments
-       while (!IsListEmpty( &iobuf_p->seg_que )) {
-               iobuf_seg_p = (iobuf_seg_t *)(PVOID)RemoveTailList( &iobuf_p->seg_que );
-               deregister_segment(iobuf_seg_p);
-               iobuf_p->seg_num--;
-       }
-       ASSERT(iobuf_p->seg_num == 0);
-}
-
-void iobuf_deregister_with_cash(iobuf_t *iobuf_p)
-{
-       ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL);
-
-       mutex_lock(&g_pa_mutex);
-       if (iobuf_p->is_cashed)
-               pa_deregister(iobuf_p);
-       iobuf_deregister(iobuf_p);
-       mutex_unlock(&g_pa_mutex);
-}
-
-void iobuf_iter_init(
-       IN              iobuf_t *iobuf_p, 
-       IN OUT  iobuf_iter_t *iterator_p)
-{
-       iterator_p->seg_p = iobuf_p->seg_que.Flink;
-       iterator_p->pfn_ix = 0;
-}
-
-// the function returns phys addresses of the pages, also for the first page
-// if one wants to get the phys address of the buffer, one has to 
-// add the offset from the start of the page to the first phys address
-// Returns: the number of entries, filled in page_tbl_p
-// Returns 0  while at the end of list.
-uint32_t iobuf_get_tpt_seg(
-       IN              iobuf_t *iobuf_p, 
-       IN OUT  iobuf_iter_t *iterator_p,
-       IN              uint32_t n_pages_in, 
-       IN OUT  uint64_t *page_tbl_p )
-{
-       uint32_t i=0;   // has to be initialized here for a premature exit
-       iobuf_seg_t *seg_p;     // pointer to current segment object 
-       PPFN_NUMBER     pfn_p; 
-       uint32_t        pfn_ix; // index of PFN in PFN array of the current segment
-       uint64_t *pa_buf_p = page_tbl_p;
-
-       // prepare to the loop
-       seg_p = iterator_p->seg_p;      // first segment of the first iobuf
-       pfn_ix= iterator_p->pfn_ix;
-
-       // check, whether we at the end of the list
-       if ((PVOID)seg_p == (PVOID)&iobuf_p->seg_que)
-               goto exit;
-       pfn_p = MmGetMdlPfnArray( seg_p->mdl_p ) + pfn_ix;
-
-       // pass along all the PFN arrays
-       for (; i < n_pages_in; i++, pa_buf_p++) {
-               // convert PFN to the physical address
-               *pa_buf_p = (uint64_t)*pfn_p++ << PAGE_SHIFT;
-       
-               // get to the next PFN 
-               if (++pfn_ix >= seg_p->nr_pages) {
-                       seg_p = (iobuf_seg_t*)seg_p->link.Flink;
-                       pfn_ix = 0;
-                       if ((PVOID)seg_p == (PVOID)&iobuf_p->seg_que) {
-                               i++;
-                               break;
-                       }
-                       pfn_p = MmGetMdlPfnArray( seg_p->mdl_p );
-               }
-       }
-
-exit:
-       iterator_p->seg_p = seg_p;
-       iterator_p->pfn_ix = pfn_ix;
-       return i;
-}
-
-
+/*\r
+ * Copyright (c) 2004 Topspin Corporation.  All rights reserved.\r
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.\r
+ * Copyright (c) 2005 Mellanox Technologies.  All rights reserved.\r
+ * Portions Copyright (c) 2008 Microsoft Corporation.  All rights reserved.\r
+ *\r
+ * This software is available to you under the OpenIB.org BSD license\r
+ * below:\r
+ *\r
+ *     Redistribution and use in source and binary forms, with or\r
+ *     without modification, are permitted provided that the following\r
+ *     conditions are met:\r
+ *\r
+ *      - Redistributions of source code must retain the above\r
+ *        copyright notice, this list of conditions and the following\r
+ *        disclaimer.\r
+ *\r
+ *      - Redistributions in binary form must reproduce the above\r
+ *        copyright notice, this list of conditions and the following\r
+ *        disclaimer in the documentation and/or other materials\r
+ *        provided with the distribution.\r
+ *\r
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\r
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
+ * SOFTWARE.\r
+ *\r
+ * $Id: mt_memory.c 2020 2007-05-01 09:29:10Z leonid $\r
+ */\r
+#include <mlx4_debug.h>\r
+#include "l2w.h"\r
+#include "pa_cash.h"\r
+#include "ib_verbs.h"\r
+\r
+#if defined (EVENT_TRACING)\r
+#ifdef offsetof\r
+#undef offsetof\r
+#endif\r
+#include "iobuf.tmh"\r
+#endif \r
+\r
+\r
+\r
+\r
+/*\r
+*      Function: map user buffer to kernel and lock it\r
+*\r
+*      Return: \r
+*/\r
+int get_user_pages(\r
+       IN              struct mlx4_dev *dev,   /* device */\r
+       IN              u64 start,                                                      /* address in user space */\r
+       IN              int npages,                                             /* size in pages */\r
+       IN              int write_access,                               /* access rights */\r
+       OUT     struct scatterlist *sg                  /* s/g list */\r
+       )\r
+{\r
+       PMDL mdl_p;\r
+       int size = npages << PAGE_SHIFT;\r
+       int access = (write_access) ? IoWriteAccess : IoReadAccess;\r
+       int err;\r
+       void * kva;     /* kernel virtual address */\r
+\r
+       UNREFERENCED_PARAMETER(dev);\r
+       \r
+       MLX4_ENTER(MLX4_DBG_MEMORY);\r
+       ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL);\r
+       \r
+       /* allocate MDL */\r
+       mdl_p = IoAllocateMdl( (PVOID)(ULONG_PTR)start, (ULONG)size, \r
+               FALSE,\r
+               FALSE,          /* not charge quota */\r
+               NULL);\r
+       if (mdl_p == NULL) {\r
+               err = -ENOMEM;  \r
+               goto err0;\r
+       }\r
+\r
+       /* lock memory */\r
+       __try   {       \r
+               MmProbeAndLockPages( mdl_p, UserMode,   access ); \r
+       } \r
+       __except (EXCEPTION_EXECUTE_HANDLER)\r
+       {\r
+               NTSTATUS Status = GetExceptionCode();\r
+               MLX4_PRINT(TRACE_LEVEL_ERROR ,MLX4_DBG_MEMORY ,("Exception 0x%x on MmProbeAndLockPages(), addr 0x%I64x, size %d\n", Status, start, size));\r
+               switch(Status){\r
+                       case STATUS_WORKING_SET_QUOTA:\r
+                               err = -ENOMEM;break;\r
+                       case STATUS_ACCESS_VIOLATION:\r
+                               err = -EACCES;break;\r
+                       default :\r
+                               err = -EINVAL;\r
+                       }\r
+\r
+               goto err1;\r
+       }\r
+\r
+       /* map it to kernel */\r
+       kva = MmMapLockedPagesSpecifyCache( mdl_p, \r
+               KernelMode, MmNonCached, \r
+               NULL, FALSE, NormalPagePriority );\r
+       if (kva == NULL) {\r
+               MLX4_PRINT(TRACE_LEVEL_ERROR ,MLX4_DBG_MEMORY ,("MmMapLockedPagesSpecifyCache failed\n"));\r
+               err = -EFAULT;\r
+               goto err2;\r
+       }\r
+\r
+       sg->dma_addr.va = kva;\r
+       sg->dma_addr.sz = size;\r
+       sg->offset = (unsigned int)(start & ~PAGE_MASK);\r
+       sg->p_mdl = mdl_p;      \r
+       // TODO: has to be dma address, not physical one\r
+       sg->dma_addr.da = MmGetPhysicalAddress(kva).QuadPart;\r
+       return 0;       \r
+       \r
+err2:  \r
+       MmUnlockPages(mdl_p);\r
+err1:          \r
+       IoFreeMdl(mdl_p);\r
+err0:\r
+       MLX4_EXIT(MLX4_DBG_MEMORY);\r
+       return err;\r
+               \r
+ }\r
+\r
+void put_page(struct scatterlist *sg)\r
+{\r
+       if (sg->p_mdl) {\r
+               MmUnmapLockedPages( sg->dma_addr.va, sg->p_mdl );\r
+               MmUnlockPages(sg->p_mdl);\r
+               IoFreeMdl(sg->p_mdl);\r
+       }\r
+}\r
+\r
+\r
+typedef struct _iobuf_seg {\r
+       LIST_ENTRY      link;\r
+       PMDL   mdl_p;\r
+       u64 va;  /* virtual address of the buffer */\r
+       u64 size;     /* size in bytes of the buffer */\r
+       u32 nr_pages;\r
+       int     is_user;\r
+} iobuf_seg_t;\r
+\r
+// Returns: 0 on success, -ENOMEM or -EACCESS on error\r
+static int register_segment(\r
+       IN              u64 va,\r
+       IN              u64 size,\r
+       IN              int is_user,\r
+       IN              enum ib_access_flags acc,\r
+       OUT iobuf_seg_t **iobuf_seg)\r
+{\r
+       PMDL mdl_p;\r
+       int rc;\r
+       KPROCESSOR_MODE mode;  \r
+       iobuf_seg_t * new_iobuf;\r
+       static ULONG cnt=0;\r
+       LOCK_OPERATION Operation;\r
+\r
+       // set Operation\r
+       if (acc & IB_ACCESS_LOCAL_WRITE)\r
+               Operation = IoModifyAccess;\r
+       else\r
+               Operation = IoReadAccess;\r
+       \r
+       // allocate IOBUF segment object\r
+       new_iobuf = (iobuf_seg_t *)kmalloc(sizeof(iobuf_seg_t), GFP_KERNEL );\r
+       if (new_iobuf == NULL) {\r
+               rc = -ENOMEM;\r
+               goto err_nomem;\r
+       }\r
+\r
+       // allocate MDL \r
+       mdl_p = IoAllocateMdl( (PVOID)(ULONG_PTR)va, (ULONG)size, FALSE,FALSE,NULL);\r
+       if (mdl_p == NULL) {\r
+               rc = -ENOMEM;\r
+               goto err_alloc_mdl;\r
+       }\r
+\r
+       // make context-dependent things\r
+       if (is_user) {\r
+               ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL);\r
+               mode = UserMode;\r
+       }\r
+       else {  /* Mapping to kernel virtual address */\r
+               //    MmBuildMdlForNonPagedPool(mdl_p);   // fill MDL ??? - should we do that really ?\r
+               mode = KernelMode;\r
+       }\r
+\r
+       __try { /* try */\r
+               MmProbeAndLockPages( mdl_p, mode, Operation );   /* lock memory */\r
+       } /* try */\r
+               \r
+       __except (EXCEPTION_EXECUTE_HANDLER)    {\r
+               MLX4_PRINT(TRACE_LEVEL_ERROR, MLX4_DBG_MEMORY, \r
+                       ("MOSAL_iobuf_register: Exception 0x%x on MmProbeAndLockPages(), va %I64d, sz %I64d\n", \r
+                       GetExceptionCode(), va, size));\r
+               rc = -EACCES;\r
+               goto err_probe;\r
+       }\r
+       \r
+       // fill IOBUF object\r
+       new_iobuf->va = va;\r
+       new_iobuf->size= size;\r
+       new_iobuf->nr_pages = ADDRESS_AND_SIZE_TO_SPAN_PAGES( va, size );\r
+       new_iobuf->mdl_p = mdl_p;\r
+       new_iobuf->is_user = is_user;\r
+       *iobuf_seg = new_iobuf;\r
+       return 0;\r
+\r
+err_probe:\r
+       IoFreeMdl(mdl_p);\r
+err_alloc_mdl:  \r
+       ExFreePool((PVOID)new_iobuf);\r
+err_nomem:  \r
+       return rc;\r
+}\r
+\r
+void iobuf_init(\r
+       IN              u64 va,\r
+       IN              u64 size,\r
+       IN              int is_user,\r
+       IN OUT  iobuf_t *iobuf_p)\r
+{\r
+       iobuf_p->va = va;\r
+       iobuf_p->size= size;\r
+       iobuf_p->is_user = is_user;\r
+       InitializeListHead( &iobuf_p->seg_que );\r
+       iobuf_p->seg_num = 0;\r
+       iobuf_p->nr_pages = 0;\r
+       iobuf_p->is_cashed = 0;\r
+}\r
+\r
+int iobuf_register(\r
+       IN              u64 va,\r
+       IN              u64 size,\r
+       IN              int is_user,\r
+       IN              enum ib_access_flags acc,\r
+       IN OUT  iobuf_t *iobuf_p)\r
+{\r
+       int rc=0;\r
+       u64 seg_va;     // current segment start\r
+       u64 seg_size;   // current segment size\r
+       u64 rdc;                        // remain data counter - what is rest to lock\r
+       u64 delta;                              // he size of the last not full page of the first segment\r
+       iobuf_seg_t * new_iobuf;\r
+       unsigned page_size = PAGE_SIZE;\r
+\r
+// 32 - for any case  \r
+#define PFNS_IN_PAGE_SIZE_MDL          ((PAGE_SIZE - sizeof(struct _MDL) - 32) / sizeof(long))\r
+#define MIN_IOBUF_SEGMENT_SIZE (PAGE_SIZE * PFNS_IN_PAGE_SIZE_MDL)     // 4MB  \r
+\r
+       ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL);\r
+\r
+       // we'll try to register all at once.\r
+       seg_va = va;\r
+       seg_size = rdc = size;\r
+               \r
+       // allocate segments\r
+       while (rdc > 0) {\r
+               // map a segment\r
+               rc = register_segment(seg_va, seg_size, is_user, acc, &new_iobuf );\r
+\r
+               // success - move to another segment\r
+               if (!rc) {\r
+                       rdc -= seg_size;\r
+                       seg_va += seg_size;\r
+                       InsertTailList( &iobuf_p->seg_que, &new_iobuf->link );\r
+                       iobuf_p->seg_num++;\r
+                       // round the segment size to the next page boundary \r
+                       delta = (seg_va + seg_size) & (page_size - 1);\r
+                       if (delta) {\r
+                               seg_size -= delta;\r
+                               seg_size += page_size;\r
+                       }\r
+                       if (seg_size > rdc)\r
+                               seg_size = rdc;\r
+                       continue;\r
+               }\r
+\r
+               // failure - too large a buffer: lessen it and try once more\r
+               if (rc == -ENOMEM) {\r
+                       // no where to lessen - too low memory\r
+                       if (seg_size <= MIN_IOBUF_SEGMENT_SIZE)\r
+                               break;\r
+                       // lessen the size\r
+                       seg_size >>= 1;\r
+                       // round the segment size to the next page boundary \r
+                       delta = (seg_va + seg_size) & (page_size - 1);\r
+                       if (delta) {\r
+                               seg_size -= delta;\r
+                               seg_size += page_size;\r
+                       }\r
+                       if (seg_size > rdc)\r
+                               seg_size = rdc;\r
+                       continue;\r
+               }\r
+\r
+               // got unrecoverable error\r
+               break;\r
+       }\r
+\r
+       // SUCCESS\r
+       if (rc) \r
+               iobuf_deregister( iobuf_p );\r
+       else     \r
+               iobuf_p->nr_pages += ADDRESS_AND_SIZE_TO_SPAN_PAGES( va, size );\r
+\r
+       return rc;\r
+}\r
+\r
+\r
+static void __iobuf_copy(\r
+       IN OUT  iobuf_t *dst_iobuf_p,\r
+       IN              iobuf_t *src_iobuf_p\r
+       )\r
+{\r
+       int i;\r
+       iobuf_seg_t *iobuf_seg_p;\r
+       \r
+       *dst_iobuf_p = *src_iobuf_p;\r
+       InitializeListHead( &dst_iobuf_p->seg_que );\r
+       for (i=0; i<src_iobuf_p->seg_num; ++i) {\r
+               iobuf_seg_p = (iobuf_seg_t *)(PVOID)RemoveHeadList( &src_iobuf_p->seg_que );\r
+               InsertTailList( &dst_iobuf_p->seg_que, &iobuf_seg_p->link );\r
+       }\r
+}\r
+\r
+/* if the buffer to be registered overlaps a buffer, already registered, \r
+       a race can happen between HCA, writing to the previously registered\r
+       buffer and the probing functions (MmProbeAndLockPages, MmSecureVirtualMemory),\r
+       used in the algorithm of memory registration.\r
+       To prevent the race we maintain reference counters for the physical pages, being registered, \r
+       and register every physical page FOR THE WRITE ACCESS only once.*/\r
+\r
+int iobuf_register_with_cash(\r
+       IN              u64 vaddr,\r
+       IN              u64 size,\r
+       IN              int is_user,\r
+       IN OUT  enum ib_access_flags *acc_p,\r
+       IN OUT  iobuf_t *iobuf_p)\r
+{\r
+       int rc, pa_in;\r
+       iobuf_t sec_iobuf;\r
+       int i, page_in , page_out, page_in_total;\r
+       int nr_pages;\r
+       char *subregion_start, *va;\r
+       u64 subregion_size;\r
+       u64 rdc;                                        // remain data counter - what is rest to lock\r
+       u64 delta;                              // he size of the last not full page of the first segment\r
+       enum ib_access_flags acc;\r
+\r
+       mutex_lock(&g_pa_mutex);\r
+\r
+       // register memory for read access to bring pages into the memory\r
+       rc = iobuf_register( vaddr, size, is_user, 0, iobuf_p);\r
+\r
+       // on error or read access - exit\r
+       if (rc || !(*acc_p & IB_ACCESS_LOCAL_WRITE))\r
+               goto exit;\r
+\r
+       // re-register buffer with the correct access rights\r
+       iobuf_init( (u64)vaddr, size, is_user, &sec_iobuf );\r
+       nr_pages = ADDRESS_AND_SIZE_TO_SPAN_PAGES( vaddr, size );\r
+       subregion_start = va = (char*)(ULONG_PTR)vaddr;\r
+       rdc = size;\r
+       pa_in = page_in = page_in_total = page_out = 0;\r
+\r
+       for (i=0; i<nr_pages; ++i, va+=PAGE_SIZE) {\r
+               // check whether a phys page is to be registered\r
+               PHYSICAL_ADDRESS pa = MmGetPhysicalAddress(va);\r
+               pa_in = pa_is_registered(pa.QuadPart);\r
+               if (pa_in) {\r
+                       ++page_in;\r
+                       ++page_in_total;\r
+               }\r
+               else\r
+                       ++page_out;\r
+\r
+               // check whether we get at the end of a subregion with the same rights wrt cash\r
+               if (page_in && page_out) {\r
+                       // prepare to registration of the subregion\r
+                       if (pa_in) {            // SUBREGION WITH WRITE ACCESS\r
+                               acc = IB_ACCESS_LOCAL_WRITE;\r
+                               subregion_size = (u64)page_out * PAGE_SIZE;\r
+                               page_out = 0;\r
+                       }\r
+                       else {          // SUBREGION WITH READ ACCESS\r
+                               acc = 0;\r
+                               subregion_size = (u64)page_in * PAGE_SIZE;\r
+                               page_in = 0;\r
+                       }\r
+                       \r
+                       // round the subregion size to the page boundary \r
+                       delta = (ULONG_PTR)(subregion_start + subregion_size) & (PAGE_SIZE - 1);\r
+                       subregion_size -= delta;\r
+                       if (subregion_size > rdc)\r
+                               subregion_size = rdc;\r
+\r
+                       // register the subregion\r
+                       rc = iobuf_register( (ULONG_PTR)subregion_start, subregion_size, is_user, acc, &sec_iobuf);\r
+                       if (rc)\r
+                               goto cleanup;\r
+\r
+                       // prepare to the next loop\r
+                       rdc -= subregion_size;\r
+                       subregion_start +=subregion_size;\r
+               }\r
+       }\r
+\r
+       // prepare to registration of the subregion\r
+       if (pa_in) {            // SUBREGION WITH READ ACCESS\r
+               acc = 0;\r
+               subregion_size = (u64)page_in * PAGE_SIZE;\r
+       }\r
+       else {          // SUBREGION WITH WRITE ACCESS\r
+               acc = IB_ACCESS_LOCAL_WRITE;\r
+               subregion_size = (u64)page_out * PAGE_SIZE;\r
+       }\r
+       \r
+       // round the subregion size to the page boundary \r
+       delta = (ULONG_PTR)(subregion_start + subregion_size) & (PAGE_SIZE - 1);\r
+       subregion_size -= delta;\r
+       if (subregion_size > rdc)\r
+               subregion_size = rdc;\r
+       \r
+       // register the subregion\r
+       rc = iobuf_register( (ULONG_PTR)subregion_start, subregion_size, is_user, acc, &sec_iobuf);\r
+       if (rc)\r
+               goto cleanup;\r
+\r
+       // cash phys pages\r
+       rc = pa_register(iobuf_p);\r
+       if (rc)\r
+               goto err_pa_reg;\r
+\r
+       // replace the iobuf\r
+       iobuf_deregister( iobuf_p );\r
+       sec_iobuf.is_cashed = TRUE;\r
+       __iobuf_copy( iobuf_p, &sec_iobuf );\r
+       \r
+       // buffer is a part of also registered buffer - change the rights \r
+       if (page_in_total)\r
+               *acc_p &= ~IB_ACCESS_LOCAL_WRITE;\r
+\r
+       goto exit;\r
+       \r
+err_pa_reg:    \r
+       iobuf_deregister( &sec_iobuf );\r
+cleanup:\r
+       iobuf_deregister( iobuf_p );\r
+exit:  \r
+       mutex_unlock(&g_pa_mutex);\r
+       return rc;\r
+}\r
+\r
+static void deregister_segment(iobuf_seg_t * iobuf_seg_p)\r
+{\r
+       MmUnlockPages( iobuf_seg_p->mdl_p );    // unlock the buffer \r
+       IoFreeMdl( iobuf_seg_p->mdl_p );        // free MDL\r
+       ExFreePool(iobuf_seg_p);\r
+}\r
+\r
+void iobuf_deregister(iobuf_t *iobuf_p)\r
+{\r
+       iobuf_seg_t *iobuf_seg_p;       // pointer to current segment object\r
+\r
+       ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL);\r
+\r
+       // release segments\r
+       while (!IsListEmpty( &iobuf_p->seg_que )) {\r
+               iobuf_seg_p = (iobuf_seg_t *)(PVOID)RemoveTailList( &iobuf_p->seg_que );\r
+               deregister_segment(iobuf_seg_p);\r
+               iobuf_p->seg_num--;\r
+       }\r
+       ASSERT(iobuf_p->seg_num == 0);\r
+}\r
+\r
+void iobuf_deregister_with_cash(iobuf_t *iobuf_p)\r
+{\r
+       ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL);\r
+\r
+       mutex_lock(&g_pa_mutex);\r
+       if (iobuf_p->is_cashed)\r
+               pa_deregister(iobuf_p);\r
+       iobuf_deregister(iobuf_p);\r
+       mutex_unlock(&g_pa_mutex);\r
+}\r
+\r
+void iobuf_iter_init(\r
+       IN              iobuf_t *iobuf_p, \r
+       IN OUT  iobuf_iter_t *iterator_p)\r
+{\r
+       iterator_p->seg_p = iobuf_p->seg_que.Flink;\r
+       iterator_p->pfn_ix = 0;\r
+}\r
+\r
+// the function returns phys addresses of the pages, also for the first page\r
+// if one wants to get the phys address of the buffer, one has to \r
+// add the offset from the start of the page to the first phys address\r
+// Returns: the number of entries, filled in page_tbl_p\r
+// Returns 0  while at the end of list.\r
+uint32_t iobuf_get_tpt_seg(\r
+       IN              iobuf_t *iobuf_p, \r
+       IN OUT  iobuf_iter_t *iterator_p,\r
+       IN              uint32_t n_pages_in, \r
+       IN OUT  uint64_t *page_tbl_p )\r
+{\r
+       uint32_t i=0;   // has to be initialized here for a premature exit\r
+       iobuf_seg_t *seg_p;     // pointer to current segment object \r
+       PPFN_NUMBER     pfn_p; \r
+       uint32_t        pfn_ix; // index of PFN in PFN array of the current segment\r
+       uint64_t *pa_buf_p = page_tbl_p;\r
+\r
+       // prepare to the loop\r
+       seg_p = iterator_p->seg_p;      // first segment of the first iobuf\r
+       pfn_ix= iterator_p->pfn_ix;\r
+\r
+       // check, whether we at the end of the list\r
+       if ((PVOID)seg_p == (PVOID)&iobuf_p->seg_que)\r
+               goto exit;\r
+       pfn_p = MmGetMdlPfnArray( seg_p->mdl_p ) + pfn_ix;\r
+\r
+       // pass along all the PFN arrays\r
+       for (; i < n_pages_in; i++, pa_buf_p++) {\r
+               // convert PFN to the physical address\r
+               *pa_buf_p = (uint64_t)*pfn_p++ << PAGE_SHIFT;\r
+       \r
+               // get to the next PFN \r
+               if (++pfn_ix >= seg_p->nr_pages) {\r
+                       seg_p = (iobuf_seg_t*)seg_p->link.Flink;\r
+                       pfn_ix = 0;\r
+                       if ((PVOID)seg_p == (PVOID)&iobuf_p->seg_que) {\r
+                               i++;\r
+                               break;\r
+                       }\r
+                       pfn_p = MmGetMdlPfnArray( seg_p->mdl_p );\r
+               }\r
+       }\r
+\r
+exit:\r
+       iterator_p->seg_p = seg_p;\r
+       iterator_p->pfn_ix = pfn_ix;\r
+       return i;\r
+}\r
+\r
+\r
index 32cda6e..b23b039 100644 (file)
@@ -84,8 +84,8 @@ mlnx_register_mr (
        }\r
 \r
        // register mr \r
-       p_ib_mr = ibv_reg_mr(p_ib_pd, (u64)(ULONG_PTR)(void*)p_mr_create->vaddr, \r
-               p_mr_create->length, (uint64_t)p_mr_create->vaddr, \r
+       p_ib_mr = ibv_reg_mr(p_ib_pd, (ULONG_PTR)p_mr_create->vaddr, \r
+               p_mr_create->length, (ULONG_PTR)p_mr_create->vaddr, \r
                to_qp_acl(p_mr_create->access_ctrl), um_call ? &umv_buf : NULL );\r
        if (IS_ERR(p_ib_mr)) {\r
                err = PTR_ERR(p_ib_mr);\r
index fdb626f..4786c3d 100644 (file)
@@ -89,7 +89,7 @@ mlnx_register_mr (
        // register mr \r
        mr_p = ibv_reg_mr(ib_pd_p, map_qp_ibal_acl(p_mr_create->access_ctrl), \r
                p_mr_create->vaddr, p_mr_create->length, \r
-               (uint64_t)p_mr_create->vaddr, um_call, TRUE );\r
+               (ULONG_PTR)p_mr_create->vaddr, um_call, TRUE );\r
        if (IS_ERR(mr_p)) {\r
                err = PTR_ERR(mr_p);\r
                HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_MEMORY,\r
index 868472a..5a8bd1c 100644 (file)
-/*
- * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
- * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- * Copyright (c) 2005 Mellanox Technologies.  All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * $Id$
- */
- #include "hca_driver.h"
-#include "mthca_dev.h"
-#if defined (EVENT_TRACING)
-#ifdef offsetof
-#undef offsetof
-#endif
-#include "mt_memory.tmh"
-#endif 
-
-#include "mt_pa_cash.h"
-
-
-/*
-*      Function: map user buffer to kernel and lock it
-*
-*      Return: 
-*/
-int get_user_pages(
-       IN              struct mthca_dev *dev,  /* device */
-       IN              u64 start,                                                      /* address in user space */
-       IN              int npages,                                             /* size in pages */
-       IN              int write_access,                               /* access rights */
-       OUT     struct scatterlist *sg                  /* s/g list */
-       )
-{
-       PMDL mdl_p;
-       int size = npages << PAGE_SHIFT;
-       int access = (write_access) ? IoWriteAccess : IoReadAccess;
-       int err;
-       void * kva;     /* kernel virtual address */
-
-       UNREFERENCED_PARAMETER(dev);
-       
-       HCA_ENTER(HCA_DBG_MEMORY);
-       ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL);
-       
-       /* allocate MDL */
-       mdl_p = IoAllocateMdl( (PVOID)(ULONG_PTR)start, (ULONG)size, 
-               FALSE,
-               FALSE,          /* not charge quota */
-               NULL);
-       if (mdl_p == NULL) {
-               err = -ENOMEM;  
-               goto err0;
-       }
-
-       /* lock memory */
-       __try   {       
-               MmProbeAndLockPages( mdl_p, UserMode,   access ); 
-       } 
-       __except (EXCEPTION_EXECUTE_HANDLER)
-       {
-               NTSTATUS Status = GetExceptionCode();
-               HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY ,("Exception 0x%x on MmProbeAndLockPages(), addr 0x%I64x, size %d\n", Status, start, size));
-               switch(Status){
-                       case STATUS_WORKING_SET_QUOTA:
-                               err = -ENOMEM;break;
-                       case STATUS_ACCESS_VIOLATION:
-                               err = -EACCES;break;
-                       default :
-                               err = -EINVAL;
-                       }
-
-               goto err1;
-       }
-
-       /* map it to kernel */
-       kva = MmMapLockedPagesSpecifyCache( mdl_p, 
-               KernelMode, MmNonCached, 
-               NULL, FALSE, NormalPagePriority );
-       if (kva == NULL) {
-               HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY ,("MmMapLockedPagesSpecifyCache failed\n"));
-               err = -EFAULT;
-               goto err2;
-       }
-
-       sg->page = kva;
-       sg->length = size;
-       sg->offset = (unsigned int)(start & ~PAGE_MASK);
-       sg->p_mdl = mdl_p;      
-       sg->dma_address = MmGetPhysicalAddress(kva).QuadPart;
-       return 0;       
-       
-err2:  
-       MmUnlockPages(mdl_p);
-err1:          
-    IoFreeMdl(mdl_p);
-err0:
-       HCA_EXIT(HCA_DBG_MEMORY);
-       return err;
-               
- }
-
-void put_page(struct scatterlist *sg)
-{
-       if (sg->p_mdl) {
-               MmUnmapLockedPages( sg->page, sg->p_mdl );
-               MmUnlockPages(sg->p_mdl);
-               IoFreeMdl(sg->p_mdl);
-       }
-}
-
-VOID
-  AdapterListControl(
-    IN PDEVICE_OBJECT  DeviceObject,
-    IN PIRP  Irp,
-    IN PSCATTER_GATHER_LIST  ScatterGather,
-    IN PVOID  Context
-    )
-{
-       struct scatterlist *p_sg = (struct scatterlist *)Context;
-
-       UNREFERENCED_PARAMETER(DeviceObject);
-       UNREFERENCED_PARAMETER(Irp);
-
-       // sanity checks
-       if (!ScatterGather || !Context) {
-               HCA_PRINT(TRACE_LEVEL_ERROR   ,HCA_DBG_LOW   ,("AdapterListControl failed: invalid parameters\n"));
-               return;
-       }
-       if (ScatterGather->NumberOfElements > 1) {
-               HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("AdapterListControl failed: unexpected sg size; %d elements \n",
-                       ScatterGather->NumberOfElements ));
-       }
-       if (ScatterGather->Elements[0].Length != p_sg->length) {
-               HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("AdapterListControl failed: unexpected buffer size %#x (expected %#x) \n",
-                       ScatterGather->Elements[0].Length, p_sg->length ));
-       }
-
-       // results      
-       p_sg->dma_address = ScatterGather->Elements[0].Address.QuadPart;        // get logical address
-       p_sg->p_os_sg = ScatterGather;          // store sg list address for releasing
-       //NB: we do not flush the buffers by FlushAdapterBuffers(), because we don't really transfer data
-}
-
-/* Returns: the number of mapped sg elements */
-int pci_map_sg(struct mthca_dev *dev, 
-       struct scatterlist *sg,         int nents, int direction)
-{
-#ifndef USE_GET_SG_LIST
-
-       UNREFERENCED_PARAMETER(dev);
-       UNREFERENCED_PARAMETER(sg);
-       UNREFERENCED_PARAMETER(direction);
-
-       // mapping was performed in alloc_dma_mem
-       return nents;
-
-#else
-
-       int i;
-       NTSTATUS status;
-       hca_dev_ext_t *p_ext = dev->ext;
-       struct scatterlist *p_sg = sg;
-       KIRQL irql = KeRaiseIrqlToDpcLevel();
-
-       for (i=0; i<nents; ++i, ++p_sg) {
-               status =        p_ext->p_dma_adapter->DmaOperations->GetScatterGatherList( 
-                       p_ext->p_dma_adapter, p_ext->cl_ext.p_self_do, p_sg->p_mdl, p_sg->page, 
-                       p_sg->length, AdapterListControl, sg, (BOOLEAN)direction );
-               if (!NT_SUCCESS(status)) {
-                       HCA_PRINT(TRACE_LEVEL_ERROR   ,HCA_DBG_LOW   ,("GetScatterGatherList failed %#x\n", status)));
-                       break;
-               }
-       }
-       KeLowerIrql(irql);
-       return i; /* i.e., we mapped all the entries */
-
-#endif 
-}
-
-/* Returns: the number of unmapped sg elements */
-int pci_unmap_sg(struct mthca_dev *dev, 
-       struct scatterlist *sg,         int nents, int direction)
-{
-#ifndef USE_GET_SG_LIST
-       
-               UNREFERENCED_PARAMETER(dev);
-               UNREFERENCED_PARAMETER(sg);
-               UNREFERENCED_PARAMETER(direction);
-               // mapping was performed in alloc_dma_mem
-               return nents;
-       
-#else
-
-       int i;
-       hca_dev_ext_t *p_ext = dev->ext;
-       struct scatterlist *p_sg = sg;
-       KIRQL irql = KeRaiseIrqlToDpcLevel();
-       void *p_os_sg = p_sg->p_os_sg;
-
-       for (i=0; i<nents; ++i, ++p_sg) {
-               if (p_os_sg)
-                       p_sg->p_os_sg = NULL;
-                       p_ext->p_dma_adapter->DmaOperations->PutScatterGatherList( 
-                               p_ext->p_dma_adapter, p_os_sg, (BOOLEAN)direction );
-       }
-       KeLowerIrql(irql);
-       return i; /* i.e., we mapped all the entries */
-
-#endif 
-}
-
-/* The function zeroes 'struct scatterlist' and then fills it with values.
- On error 'struct scatterlist' is returned zeroed */
-void *alloc_dma_mem(
-       IN              struct mthca_dev *dev, 
-       IN              unsigned long size,
-       OUT     struct scatterlist *p_sg)
-{
-       void *va;
-       DMA_ADAPTER *p_dma = dev->ext->p_dma_adapter;
-
-#ifndef USE_GET_SG_LIST
-
-       PHYSICAL_ADDRESS  pa = {0};
-       ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL);
-
-       RtlZeroMemory(p_sg,sizeof *p_sg);
-       if (!size)
-               return NULL;
-
-       va  = p_dma->DmaOperations->AllocateCommonBuffer(
-               p_dma, size, &pa, FALSE );
-       if (va) {
-               p_sg->length    = size;
-               p_sg->dma_address = pa.QuadPart;
-               p_sg->page = va;
-       }
-
-#else
-
-       int err;
-       PHYSICAL_ADDRESS la = {0}, ba = {0}, ha = {(u64)(-1I64)};
-       PMDL p_mdl;
-
-       ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL);
-
-       RtlZeroMemory(p_sg,sizeof *p_sg);
-       if (!size)
-               return NULL;
-
-       // allocate memory
-       va = MmAllocateContiguousMemorySpecifyCache(
-               size, la, ha, ba, MmNonCached );
-       if (!va) {
-               HCA_PRINT(TRACE_LEVEL_ERROR   ,HCA_DBG_LOW   ,("MmAllocateContiguousMemorySpecifyCache failed on %#x size\n", size )));
-               goto err_alloc;
-       }
-
-       // allocate MDL 
-       p_mdl = IoAllocateMdl( va, size, FALSE, FALSE, NULL );
-       if (!p_mdl) {
-               HCA_PRINT(TRACE_LEVEL_ERROR   ,HCA_DBG_LOW   ,("MmAllocateContiguousMemorySpecifyCache failed on %#x size\n", size )));
-               goto err_mdl;
-       }
-       MmBuildMdlForNonPagedPool( p_mdl );
-
-       p_sg->p_mdl = p_mdl;
-       p_sg->length    = size;
-       p_sg->page = va;
-
-       goto end;
-
-err_mdl:
-       MmFreeContiguousMemory(va);
-       va = NULL;
-err_alloc:
-end:
-
-#endif
-
-       return va;
-}
-
-void free_dma_mem(
-       IN              struct mthca_dev *dev, 
-       IN              struct scatterlist *p_sg)
-{
-#ifndef USE_GET_SG_LIST
-
-       PHYSICAL_ADDRESS  pa;
-       DMA_ADAPTER *p_dma = dev->ext->p_dma_adapter;
-
-       ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL);
-
-       if (p_sg->length) {
-               pa.QuadPart = p_sg->dma_address;
-               p_dma->DmaOperations->FreeCommonBuffer( 
-                       p_dma, p_sg->length, pa, 
-                       p_sg->page, FALSE );
-       }
-
-#else
-
-       PMDL p_mdl = p_sg->p_mdl;
-       PVOID page = p_sg->page;
-
-       ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL);
-       if (p_mdl) {
-               p_sg->p_mdl = NULL;
-               IoFreeMdl( p_mdl );
-       }
-       if (page) {
-               p_sg->page = NULL;
-               MmFreeContiguousMemory(page);   
-       }
-
-#endif
-}
-
-
-typedef struct _mt_iobuf_seg {
-       LIST_ENTRY      link;
-       PMDL   mdl_p;
-       u64 va;  /* virtual address of the buffer */
-       u64 size;     /* size in bytes of the buffer */
-       u32 nr_pages;
-       int     is_user;
-} mt_iobuf_seg_t;
-
-// Returns: 0 on success, -ENOMEM or -EACCESS on error
-static int register_segment(
-       IN              u64 va,
-       IN              u64 size,
-       IN              int is_user,
-       IN              ib_access_t acc,
-       OUT mt_iobuf_seg_t **iobuf_seg)
-{
-       PMDL mdl_p;
-       int rc;
-       KPROCESSOR_MODE mode;  
-       mt_iobuf_seg_t * new_iobuf;
-       static ULONG cnt=0;
-       LOCK_OPERATION Operation;
-
-       // set Operation
-       if (acc & IB_AC_LOCAL_WRITE)
-               Operation = IoModifyAccess;
-       else
-               Operation = IoReadAccess;
-       
-       // allocate IOBUF segment object
-       new_iobuf = (mt_iobuf_seg_t *)kmalloc(sizeof(mt_iobuf_seg_t), GFP_KERNEL );
-       if (new_iobuf == NULL) {
-               rc = -ENOMEM;
-               goto err_nomem;
-       }
-
-       // allocate MDL 
-       mdl_p = IoAllocateMdl( (PVOID)(ULONG_PTR)va, (ULONG)size, FALSE,FALSE,NULL);
-       if (mdl_p == NULL) {
-               rc = -ENOMEM;
-               goto err_alloc_mdl;
-       }
-
-       // make context-dependent things
-       if (is_user) {
-               ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL);
-               mode = UserMode;
-       }
-       else {  /* Mapping to kernel virtual address */
-               //    MmBuildMdlForNonPagedPool(mdl_p);   // fill MDL ??? - should we do that really ?
-               mode = KernelMode;
-       }
-
-       __try { /* try */
-               MmProbeAndLockPages( mdl_p, mode, Operation );   /* lock memory */
-       } /* try */
-               
-       __except (EXCEPTION_EXECUTE_HANDLER)    {
-               HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_MEMORY, 
-                       ("MOSAL_iobuf_register: Exception 0x%x on MmProbeAndLockPages(), va %I64d, sz %I64d\n", 
-                       GetExceptionCode(), va, size));
-               rc = -EACCES;
-               goto err_probe;
-       }
-       
-       // fill IOBUF object
-       new_iobuf->va = va;
-       new_iobuf->size= size;
-       new_iobuf->nr_pages = ADDRESS_AND_SIZE_TO_SPAN_PAGES( va, size );
-       new_iobuf->mdl_p = mdl_p;
-       new_iobuf->is_user = is_user;
-       *iobuf_seg = new_iobuf;
-       return 0;
-
-err_probe:
-       IoFreeMdl(mdl_p);
-err_alloc_mdl:  
-       ExFreePool((PVOID)new_iobuf);
-err_nomem:  
-       return rc;
-}
-
-void iobuf_init(
-       IN              u64 va,
-       IN              u64 size,
-       IN              int is_user,
-       IN OUT  mt_iobuf_t *iobuf_p)
-{
-       iobuf_p->va = va;
-       iobuf_p->size= size;
-       iobuf_p->is_user = is_user;
-       InitializeListHead( &iobuf_p->seg_que );
-       iobuf_p->seg_num = 0;
-       iobuf_p->nr_pages = 0;
-       iobuf_p->is_cashed = 0;
-}
-
-int iobuf_register(
-       IN              u64 va,
-       IN              u64 size,
-       IN              int is_user,
-       IN              ib_access_t acc,
-       IN OUT  mt_iobuf_t *iobuf_p)
-{
-       int rc=0;
-       u64 seg_va;     // current segment start
-       u64 seg_size;   // current segment size
-       u64 rdc;                        // remain data counter - what is rest to lock
-       u64 delta;                              // he size of the last not full page of the first segment
-       mt_iobuf_seg_t * new_iobuf;
-       unsigned page_size = PAGE_SIZE;
-
-// 32 - for any case  
-#define PFNS_IN_PAGE_SIZE_MDL          ((PAGE_SIZE - sizeof(struct _MDL) - 32) / sizeof(long))
-#define MIN_IOBUF_SEGMENT_SIZE (PAGE_SIZE * PFNS_IN_PAGE_SIZE_MDL)     // 4MB  
-
-       ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL);
-
-       // we'll try to register all at once.
-       seg_va = va;
-       seg_size = rdc = size;
-               
-       // allocate segments
-       while (rdc > 0) {
-               // map a segment
-               rc = register_segment(seg_va, seg_size, is_user, acc, &new_iobuf );
-
-               // success - move to another segment
-               if (!rc) {
-                       rdc -= seg_size;
-                       seg_va += seg_size;
-                       InsertTailList( &iobuf_p->seg_que, &new_iobuf->link );
-                       iobuf_p->seg_num++;
-                       // round the segment size to the next page boundary 
-                       delta = (seg_va + seg_size) & (page_size - 1);
-                       if (delta) {
-                               seg_size -= delta;
-                               seg_size += page_size;
-                       }
-                       if (seg_size > rdc)
-                               seg_size = rdc;
-                       continue;
-               }
-
-               // failure - too large a buffer: lessen it and try once more
-               if (rc == -ENOMEM) {
-                       // no where to lessen - too low memory
-                       if (seg_size <= MIN_IOBUF_SEGMENT_SIZE)
-                               break;
-                       // lessen the size
-                       seg_size >>= 1;
-                       // round the segment size to the next page boundary 
-                       delta = (seg_va + seg_size) & (page_size - 1);
-                       if (delta) {
-                               seg_size -= delta;
-                               seg_size += page_size;
-                       }
-                       if (seg_size > rdc)
-                               seg_size = rdc;
-                       continue;
-               }
-
-               // got unrecoverable error
-               break;
-       }
-
-       // SUCCESS
-       if (rc) 
-               iobuf_deregister( iobuf_p );
-       else     
-               iobuf_p->nr_pages += ADDRESS_AND_SIZE_TO_SPAN_PAGES( va, size );
-
-       return rc;
-}
-
-
-static void __iobuf_copy(
-       IN OUT  mt_iobuf_t *dst_iobuf_p,
-       IN              mt_iobuf_t *src_iobuf_p
-       )
-{
-       int i;
-       mt_iobuf_seg_t *iobuf_seg_p;
-       
-       *dst_iobuf_p = *src_iobuf_p;
-       InitializeListHead( &dst_iobuf_p->seg_que );
-       for (i=0; i<src_iobuf_p->seg_num; ++i) {
-               iobuf_seg_p = (mt_iobuf_seg_t *)(PVOID)RemoveHeadList( &src_iobuf_p->seg_que );
-               InsertTailList( &dst_iobuf_p->seg_que, &iobuf_seg_p->link );
-       }
-}
-
-/* if the buffer to be registered overlaps a buffer, already registered, 
-       a race can happen between HCA, writing to the previously registered
-       buffer and the probing functions (MmProbeAndLockPages, MmSecureVirtualMemory),
-       used in the algorithm of memory registration.
-       To prevent the race we maintain reference counters for the physical pages, being registered, 
-       and register every physical page FOR THE WRITE ACCESS only once.*/
-
-int iobuf_register_with_cash(
-       IN              u64 vaddr,
-       IN              u64 size,
-       IN              int is_user,
-       IN OUT  ib_access_t *acc_p,
-       IN OUT  mt_iobuf_t *iobuf_p)
-{
-       int rc, pa_in;
-       mt_iobuf_t sec_iobuf;
-       int i, page_in , page_out, page_in_total;
-       int nr_pages;
-       char *subregion_start, *va;
-       u64 subregion_size;
-       u64 rdc;                                        // remain data counter - what is rest to lock
-       u64 delta;                              // he size of the last not full page of the first segment
-       ib_access_t acc;
-
-       down(&g_pa_mutex);
-
-       // register memory for read access to bring pages into the memory
-       rc = iobuf_register( vaddr, size, is_user, 0, iobuf_p);
-
-       // on error or read access - exit
-       if (rc || !(*acc_p & IB_AC_LOCAL_WRITE))
-               goto exit;
-
-       // re-register buffer with the correct access rights
-       iobuf_init( (u64)vaddr, size, is_user, &sec_iobuf );
-       nr_pages = ADDRESS_AND_SIZE_TO_SPAN_PAGES( vaddr, size );
-       subregion_start = va = (char*)(ULONG_PTR)vaddr;
-       rdc = size;
-       pa_in = page_in = page_in_total = page_out = 0;
-
-       for (i=0; i<nr_pages; ++i, va+=PAGE_SIZE) {
-               // check whether a phys page is to be registered
-               PHYSICAL_ADDRESS pa = MmGetPhysicalAddress(va);
-               pa_in = pa_is_registered(pa.QuadPart);
-               if (pa_in) {
-                       ++page_in;
-                       ++page_in_total;
-               }
-               else
-                       ++page_out;
-
-               // check whether we get at the end of a subregion with the same rights wrt cash
-               if (page_in && page_out) {
-                       // prepare to registration of the subregion
-                       if (pa_in) {            // SUBREGION WITH WRITE ACCESS
-                               acc = IB_AC_LOCAL_WRITE;
-                               subregion_size = (u64)page_out * PAGE_SIZE;
-                               page_out = 0;
-                       }
-                       else {          // SUBREGION WITH READ ACCESS
-                               acc = 0;
-                               subregion_size = (u64)page_in * PAGE_SIZE;
-                               page_in = 0;
-                       }
-                       
-                       // round the subregion size to the page boundary 
-                       delta = (u64)(subregion_start + subregion_size) & (PAGE_SIZE - 1);
-                       subregion_size -= delta;
-                       if (subregion_size > rdc)
-                               subregion_size = rdc;
-
-                       // register the subregion
-                       rc = iobuf_register( (u64)subregion_start, subregion_size, is_user, acc, &sec_iobuf);
-                       if (rc)
-                               goto cleanup;
-
-                       // prepare to the next loop
-                       rdc -= subregion_size;
-                       subregion_start +=subregion_size;
-               }
-       }
-
-       // prepare to registration of the subregion
-       if (pa_in) {            // SUBREGION WITH READ ACCESS
-               acc = 0;
-               subregion_size = (u64)page_in * PAGE_SIZE;
-       }
-       else {          // SUBREGION WITH WRITE ACCESS
-               acc = IB_AC_LOCAL_WRITE;
-               subregion_size = (u64)page_out * PAGE_SIZE;
-       }
-       
-       // round the subregion size to the page boundary 
-       delta = (u64)(subregion_start + subregion_size) & (PAGE_SIZE - 1);
-       subregion_size -= delta;
-       if (subregion_size > rdc)
-               subregion_size = rdc;
-       
-       // register the subregion
-       rc = iobuf_register( (u64)subregion_start, subregion_size, is_user, acc, &sec_iobuf);
-       if (rc)
-               goto cleanup;
-
-       // cash phys pages
-       rc = pa_register(iobuf_p);
-       if (rc)
-               goto err_pa_reg;
-
-       // replace the iobuf
-       iobuf_deregister( iobuf_p );
-       sec_iobuf.is_cashed = TRUE;
-       __iobuf_copy( iobuf_p, &sec_iobuf );
-       
-       // buffer is a part of also registered buffer - change the rights 
-       if (page_in_total)
-               *acc_p = MTHCA_ACCESS_REMOTE_READ;
-
-       goto exit;
-       
-err_pa_reg:    
-       iobuf_deregister( &sec_iobuf );
-cleanup:
-       iobuf_deregister( iobuf_p );
-exit:  
-       up(&g_pa_mutex);
-       return rc;
-}
-
-static void deregister_segment(mt_iobuf_seg_t * iobuf_seg_p)
-{
-       MmUnlockPages( iobuf_seg_p->mdl_p );    // unlock the buffer 
-       IoFreeMdl( iobuf_seg_p->mdl_p );        // free MDL
-       ExFreePool(iobuf_seg_p);
-}
-
-void iobuf_deregister(mt_iobuf_t *iobuf_p)
-{
-       mt_iobuf_seg_t *iobuf_seg_p;    // pointer to current segment object
-
-       ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL);
-
-       // release segments
-       while (!IsListEmpty( &iobuf_p->seg_que )) {
-               iobuf_seg_p = (mt_iobuf_seg_t *)(PVOID)RemoveTailList( &iobuf_p->seg_que );
-               deregister_segment(iobuf_seg_p);
-               iobuf_p->seg_num--;
-       }
-       ASSERT(iobuf_p->seg_num == 0);
-}
-
-void iobuf_deregister_with_cash(mt_iobuf_t *iobuf_p)
-{
-       ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL);
-
-       down(&g_pa_mutex);
-       if (iobuf_p->is_cashed)
-               pa_deregister(iobuf_p);
-        iobuf_deregister(iobuf_p);
-       up(&g_pa_mutex);
-}
-
-void iobuf_iter_init(
-       IN              mt_iobuf_t *iobuf_p, 
-       IN OUT  mt_iobuf_iter_t *iterator_p)
-{
-       iterator_p->seg_p = iobuf_p->seg_que.Flink;
-       iterator_p->pfn_ix = 0;
-}
-
-// the function returns phys addresses of the pages, also for the first page
-// if one wants to get the phys address of the buffer, one has to 
-// add the offset from the start of the page to the first phys address
-// Returns: the number of entries, filled in page_tbl_p
-// Returns 0  while at the end of list.
-uint32_t iobuf_get_tpt_seg(
-       IN              mt_iobuf_t *iobuf_p, 
-       IN OUT  mt_iobuf_iter_t *iterator_p,
-       IN              uint32_t n_pages_in, 
-       IN OUT  uint64_t *page_tbl_p )
-{
-       uint32_t i=0;   // has to be initialized here for a premature exit
-       mt_iobuf_seg_t *seg_p;  // pointer to current segment object 
-       PPFN_NUMBER     pfn_p; 
-       uint32_t        pfn_ix; // index of PFN in PFN array of the current segment
-       uint64_t *pa_buf_p = page_tbl_p;
-
-       // prepare to the loop
-       seg_p = iterator_p->seg_p;      // first segment of the first iobuf
-       pfn_ix= iterator_p->pfn_ix;
-
-       // check, whether we at the end of the list
-       if ((PVOID)seg_p == (PVOID)&iobuf_p->seg_que)
-               goto exit;
-       pfn_p = MmGetMdlPfnArray( seg_p->mdl_p ) + pfn_ix;
-
-       // pass along all the PFN arrays
-       for (; i < n_pages_in; i++, pa_buf_p++) {
-               // convert PFN to the physical address
-               *pa_buf_p = (uint64_t)*pfn_p++ << PAGE_SHIFT;
-       
-               // get to the next PFN 
-               if (++pfn_ix >= seg_p->nr_pages) {
-                       seg_p = (mt_iobuf_seg_t*)seg_p->link.Flink;
-                       pfn_ix = 0;
-                       if ((PVOID)seg_p == (PVOID)&iobuf_p->seg_que) {
-                               i++;
-                               break;
-                       }
-                       pfn_p = MmGetMdlPfnArray( seg_p->mdl_p );
-               }
-       }
-
-exit:
-       iterator_p->seg_p = seg_p;
-       iterator_p->pfn_ix = pfn_ix;
-       return i;
-}
-
-
-
-
+/*\r
+ * Copyright (c) 2004 Topspin Corporation.  All rights reserved.\r
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.\r
+ * Copyright (c) 2005 Mellanox Technologies.  All rights reserved.\r
+ * Portions Copyright (c) 2008 Microsoft Corporation.  All rights reserved.\r
+ *\r
+ * This software is available to you under a choice of one of two\r
+ * licenses.  You may choose to be licensed under the terms of the GNU\r
+ * General Public License (GPL) Version 2, available from the file\r
+ * COPYING in the main directory of this source tree, or the\r
+ * OpenIB.org BSD license below:\r
+ *\r
+ *     Redistribution and use in source and binary forms, with or\r
+ *     without modification, are permitted provided that the following\r
+ *     conditions are met:\r
+ *\r
+ *      - Redistributions of source code must retain the above\r
+ *        copyright notice, this list of conditions and the following\r
+ *        disclaimer.\r
+ *\r
+ *      - Redistributions in binary form must reproduce the above\r
+ *        copyright notice, this list of conditions and the following\r
+ *        disclaimer in the documentation and/or other materials\r
+ *        provided with the distribution.\r
+ *\r
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\r
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
+ * SOFTWARE.\r
+ *\r
+ * $Id$\r
+ */\r
+ #include "hca_driver.h"\r
+#include "mthca_dev.h"\r
+#if defined (EVENT_TRACING)\r
+#ifdef offsetof\r
+#undef offsetof\r
+#endif\r
+#include "mt_memory.tmh"\r
+#endif \r
+\r
+#include "mt_pa_cash.h"\r
+\r
+\r
+/*\r
+*      Function: map user buffer to kernel and lock it\r
+*\r
+*      Return: \r
+*/\r
+int get_user_pages(\r
+       IN              struct mthca_dev *dev,  /* device */\r
+       IN              u64 start,                                                      /* address in user space */\r
+       IN              int npages,                                             /* size in pages */\r
+       IN              int write_access,                               /* access rights */\r
+       OUT     struct scatterlist *sg                  /* s/g list */\r
+       )\r
+{\r
+       PMDL mdl_p;\r
+       int size = npages << PAGE_SHIFT;\r
+       int access = (write_access) ? IoWriteAccess : IoReadAccess;\r
+       int err;\r
+       void * kva;     /* kernel virtual address */\r
+\r
+       UNREFERENCED_PARAMETER(dev);\r
+       \r
+       HCA_ENTER(HCA_DBG_MEMORY);\r
+       ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL);\r
+       \r
+       /* allocate MDL */\r
+       mdl_p = IoAllocateMdl( (PVOID)(ULONG_PTR)start, (ULONG)size, \r
+               FALSE,\r
+               FALSE,          /* not charge quota */\r
+               NULL);\r
+       if (mdl_p == NULL) {\r
+               err = -ENOMEM;  \r
+               goto err0;\r
+       }\r
+\r
+       /* lock memory */\r
+       __try   {       \r
+               MmProbeAndLockPages( mdl_p, UserMode,   access ); \r
+       } \r
+       __except (EXCEPTION_EXECUTE_HANDLER)\r
+       {\r
+               NTSTATUS Status = GetExceptionCode();\r
+               HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY ,("Exception 0x%x on MmProbeAndLockPages(), addr 0x%I64x, size %d\n", Status, start, size));\r
+               switch(Status){\r
+                       case STATUS_WORKING_SET_QUOTA:\r
+                               err = -ENOMEM;break;\r
+                       case STATUS_ACCESS_VIOLATION:\r
+                               err = -EACCES;break;\r
+                       default :\r
+                               err = -EINVAL;\r
+                       }\r
+\r
+               goto err1;\r
+       }\r
+\r
+       /* map it to kernel */\r
+       kva = MmMapLockedPagesSpecifyCache( mdl_p, \r
+               KernelMode, MmNonCached, \r
+               NULL, FALSE, NormalPagePriority );\r
+       if (kva == NULL) {\r
+               HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY ,("MmMapLockedPagesSpecifyCache failed\n"));\r
+               err = -EFAULT;\r
+               goto err2;\r
+       }\r
+\r
+       sg->page = kva;\r
+       sg->length = size;\r
+       sg->offset = (unsigned int)(start & ~PAGE_MASK);\r
+       sg->p_mdl = mdl_p;      \r
+       sg->dma_address = MmGetPhysicalAddress(kva).QuadPart;\r
+       return 0;       \r
+       \r
+err2:  \r
+       MmUnlockPages(mdl_p);\r
+err1:          \r
+    IoFreeMdl(mdl_p);\r
+err0:\r
+       HCA_EXIT(HCA_DBG_MEMORY);\r
+       return err;\r
+               \r
+ }\r
+\r
+void put_page(struct scatterlist *sg)\r
+{\r
+       if (sg->p_mdl) {\r
+               MmUnmapLockedPages( sg->page, sg->p_mdl );\r
+               MmUnlockPages(sg->p_mdl);\r
+               IoFreeMdl(sg->p_mdl);\r
+       }\r
+}\r
+\r
+VOID\r
+  AdapterListControl(\r
+    IN PDEVICE_OBJECT  DeviceObject,\r
+    IN PIRP  Irp,\r
+    IN PSCATTER_GATHER_LIST  ScatterGather,\r
+    IN PVOID  Context\r
+    )\r
+{\r
+       struct scatterlist *p_sg = (struct scatterlist *)Context;\r
+\r
+       UNREFERENCED_PARAMETER(DeviceObject);\r
+       UNREFERENCED_PARAMETER(Irp);\r
+\r
+       // sanity checks\r
+       if (!ScatterGather || !Context) {\r
+               HCA_PRINT(TRACE_LEVEL_ERROR   ,HCA_DBG_LOW   ,("AdapterListControl failed: invalid parameters\n"));\r
+               return;\r
+       }\r
+       if (ScatterGather->NumberOfElements > 1) {\r
+               HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("AdapterListControl failed: unexpected sg size; %d elements \n",\r
+                       ScatterGather->NumberOfElements ));\r
+       }\r
+       if (ScatterGather->Elements[0].Length != p_sg->length) {\r
+               HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("AdapterListControl failed: unexpected buffer size %#x (expected %#x) \n",\r
+                       ScatterGather->Elements[0].Length, p_sg->length ));\r
+       }\r
+\r
+       // results      \r
+       p_sg->dma_address = ScatterGather->Elements[0].Address.QuadPart;        // get logical address\r
+       p_sg->p_os_sg = ScatterGather;          // store sg list address for releasing\r
+       //NB: we do not flush the buffers by FlushAdapterBuffers(), because we don't really transfer data\r
+}\r
+\r
+/* Returns: the number of mapped sg elements */\r
+int pci_map_sg(struct mthca_dev *dev, \r
+       struct scatterlist *sg,         int nents, int direction)\r
+{\r
+#ifndef USE_GET_SG_LIST\r
+\r
+       UNREFERENCED_PARAMETER(dev);\r
+       UNREFERENCED_PARAMETER(sg);\r
+       UNREFERENCED_PARAMETER(direction);\r
+\r
+       // mapping was performed in alloc_dma_mem\r
+       return nents;\r
+\r
+#else\r
+\r
+       int i;\r
+       NTSTATUS status;\r
+       hca_dev_ext_t *p_ext = dev->ext;\r
+       struct scatterlist *p_sg = sg;\r
+       KIRQL irql = KeRaiseIrqlToDpcLevel();\r
+\r
+       for (i=0; i<nents; ++i, ++p_sg) {\r
+               status =        p_ext->p_dma_adapter->DmaOperations->GetScatterGatherList( \r
+                       p_ext->p_dma_adapter, p_ext->cl_ext.p_self_do, p_sg->p_mdl, p_sg->page, \r
+                       p_sg->length, AdapterListControl, sg, (BOOLEAN)direction );\r
+               if (!NT_SUCCESS(status)) {\r
+                       HCA_PRINT(TRACE_LEVEL_ERROR   ,HCA_DBG_LOW   ,("GetScatterGatherList failed %#x\n", status)));\r
+                       break;\r
+               }\r
+       }\r
+       KeLowerIrql(irql);\r
+       return i; /* i.e., we mapped all the entries */\r
+\r
+#endif \r
+}\r
+\r
+/* Returns: the number of unmapped sg elements */\r
+int pci_unmap_sg(struct mthca_dev *dev, \r
+       struct scatterlist *sg,         int nents, int direction)\r
+{\r
+#ifndef USE_GET_SG_LIST\r
+       \r
+               UNREFERENCED_PARAMETER(dev);\r
+               UNREFERENCED_PARAMETER(sg);\r
+               UNREFERENCED_PARAMETER(direction);\r
+               // mapping was performed in alloc_dma_mem\r
+               return nents;\r
+       \r
+#else\r
+\r
+       int i;\r
+       hca_dev_ext_t *p_ext = dev->ext;\r
+       struct scatterlist *p_sg = sg;\r
+       KIRQL irql = KeRaiseIrqlToDpcLevel();\r
+       void *p_os_sg = p_sg->p_os_sg;\r
+\r
+       for (i=0; i<nents; ++i, ++p_sg) {\r
+               if (p_os_sg)\r
+                       p_sg->p_os_sg = NULL;\r
+                       p_ext->p_dma_adapter->DmaOperations->PutScatterGatherList( \r
+                               p_ext->p_dma_adapter, p_os_sg, (BOOLEAN)direction );\r
+       }\r
+       KeLowerIrql(irql);\r
+       return i; /* i.e., we mapped all the entries */\r
+\r
+#endif \r
+}\r
+\r
+/* The function zeroes 'struct scatterlist' and then fills it with values.\r
+ On error 'struct scatterlist' is returned zeroed */\r
+void *alloc_dma_mem(\r
+       IN              struct mthca_dev *dev, \r
+       IN              unsigned long size,\r
+       OUT     struct scatterlist *p_sg)\r
+{\r
+       void *va;\r
+       DMA_ADAPTER *p_dma = dev->ext->p_dma_adapter;\r
+\r
+#ifndef USE_GET_SG_LIST\r
+\r
+       PHYSICAL_ADDRESS  pa = {0};\r
+       ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL);\r
+\r
+       RtlZeroMemory(p_sg,sizeof *p_sg);\r
+       if (!size)\r
+               return NULL;\r
+\r
+       va  = p_dma->DmaOperations->AllocateCommonBuffer(\r
+               p_dma, size, &pa, FALSE );\r
+       if (va) {\r
+               p_sg->length    = size;\r
+               p_sg->dma_address = pa.QuadPart;\r
+               p_sg->page = va;\r
+       }\r
+\r
+#else\r
+\r
+       int err;\r
+       PHYSICAL_ADDRESS la = {0}, ba = {0}, ha = {(u64)(-1I64)};\r
+       PMDL p_mdl;\r
+\r
+       ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL);\r
+\r
+       RtlZeroMemory(p_sg,sizeof *p_sg);\r
+       if (!size)\r
+               return NULL;\r
+\r
+       // allocate memory\r
+       va = MmAllocateContiguousMemorySpecifyCache(\r
+               size, la, ha, ba, MmNonCached );\r
+       if (!va) {\r
+               HCA_PRINT(TRACE_LEVEL_ERROR   ,HCA_DBG_LOW   ,("MmAllocateContiguousMemorySpecifyCache failed on %#x size\n", size )));\r
+               goto err_alloc;\r
+       }\r
+\r
+       // allocate MDL \r
+       p_mdl = IoAllocateMdl( va, size, FALSE, FALSE, NULL );\r
+       if (!p_mdl) {\r
+               HCA_PRINT(TRACE_LEVEL_ERROR   ,HCA_DBG_LOW   ,("MmAllocateContiguousMemorySpecifyCache failed on %#x size\n", size )));\r
+               goto err_mdl;\r
+       }\r
+       MmBuildMdlForNonPagedPool( p_mdl );\r
+\r
+       p_sg->p_mdl = p_mdl;\r
+       p_sg->length    = size;\r
+       p_sg->page = va;\r
+\r
+       goto end;\r
+\r
+err_mdl:\r
+       MmFreeContiguousMemory(va);\r
+       va = NULL;\r
+err_alloc:\r
+end:\r
+\r
+#endif\r
+\r
+       return va;\r
+}\r
+\r
+void free_dma_mem(\r
+       IN              struct mthca_dev *dev, \r
+       IN              struct scatterlist *p_sg)\r
+{\r
+#ifndef USE_GET_SG_LIST\r
+\r
+       PHYSICAL_ADDRESS  pa;\r
+       DMA_ADAPTER *p_dma = dev->ext->p_dma_adapter;\r
+\r
+       ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL);\r
+\r
+       if (p_sg->length) {\r
+               pa.QuadPart = p_sg->dma_address;\r
+               p_dma->DmaOperations->FreeCommonBuffer( \r
+                       p_dma, p_sg->length, pa, \r
+                       p_sg->page, FALSE );\r
+       }\r
+\r
+#else\r
+\r
+       PMDL p_mdl = p_sg->p_mdl;\r
+       PVOID page = p_sg->page;\r
+\r
+       ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL);\r
+       if (p_mdl) {\r
+               p_sg->p_mdl = NULL;\r
+               IoFreeMdl( p_mdl );\r
+       }\r
+       if (page) {\r
+               p_sg->page = NULL;\r
+               MmFreeContiguousMemory(page);   \r
+       }\r
+\r
+#endif\r
+}\r
+\r
+\r
+typedef struct _mt_iobuf_seg {\r
+       LIST_ENTRY      link;\r
+       PMDL   mdl_p;\r
+       u64 va;  /* virtual address of the buffer */\r
+       u64 size;     /* size in bytes of the buffer */\r
+       u32 nr_pages;\r
+       int     is_user;\r
+} mt_iobuf_seg_t;\r
+\r
+// Returns: 0 on success, -ENOMEM or -EACCESS on error\r
+static int register_segment(\r
+       IN              u64 va,\r
+       IN              u64 size,\r
+       IN              int is_user,\r
+       IN              ib_access_t acc,\r
+       OUT mt_iobuf_seg_t **iobuf_seg)\r
+{\r
+       PMDL mdl_p;\r
+       int rc;\r
+       KPROCESSOR_MODE mode;  \r
+       mt_iobuf_seg_t * new_iobuf;\r
+       static ULONG cnt=0;\r
+       LOCK_OPERATION Operation;\r
+\r
+       // set Operation\r
+       if (acc & IB_AC_LOCAL_WRITE)\r
+               Operation = IoModifyAccess;\r
+       else\r
+               Operation = IoReadAccess;\r
+       \r
+       // allocate IOBUF segment object\r
+       new_iobuf = (mt_iobuf_seg_t *)kmalloc(sizeof(mt_iobuf_seg_t), GFP_KERNEL );\r
+       if (new_iobuf == NULL) {\r
+               rc = -ENOMEM;\r
+               goto err_nomem;\r
+       }\r
+\r
+       // allocate MDL \r
+       mdl_p = IoAllocateMdl( (PVOID)(ULONG_PTR)va, (ULONG)size, FALSE,FALSE,NULL);\r
+       if (mdl_p == NULL) {\r
+               rc = -ENOMEM;\r
+               goto err_alloc_mdl;\r
+       }\r
+\r
+       // make context-dependent things\r
+       if (is_user) {\r
+               ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL);\r
+               mode = UserMode;\r
+       }\r
+       else {  /* Mapping to kernel virtual address */\r
+               //    MmBuildMdlForNonPagedPool(mdl_p);   // fill MDL ??? - should we do that really ?\r
+               mode = KernelMode;\r
+       }\r
+\r
+       __try { /* try */\r
+               MmProbeAndLockPages( mdl_p, mode, Operation );   /* lock memory */\r
+       } /* try */\r
+               \r
+       __except (EXCEPTION_EXECUTE_HANDLER)    {\r
+               HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_MEMORY, \r
+                       ("MOSAL_iobuf_register: Exception 0x%x on MmProbeAndLockPages(), va %I64d, sz %I64d\n", \r
+                       GetExceptionCode(), va, size));\r
+               rc = -EACCES;\r
+               goto err_probe;\r
+       }\r
+       \r
+       // fill IOBUF object\r
+       new_iobuf->va = va;\r
+       new_iobuf->size= size;\r
+       new_iobuf->nr_pages = ADDRESS_AND_SIZE_TO_SPAN_PAGES( va, size );\r
+       new_iobuf->mdl_p = mdl_p;\r
+       new_iobuf->is_user = is_user;\r
+       *iobuf_seg = new_iobuf;\r
+       return 0;\r
+\r
+err_probe:\r
+       IoFreeMdl(mdl_p);\r
+err_alloc_mdl:  \r
+       ExFreePool((PVOID)new_iobuf);\r
+err_nomem:  \r
+       return rc;\r
+}\r
+\r
+void iobuf_init(\r
+       IN              u64 va,\r
+       IN              u64 size,\r
+       IN              int is_user,\r
+       IN OUT  mt_iobuf_t *iobuf_p)\r
+{\r
+       iobuf_p->va = va;\r
+       iobuf_p->size= size;\r
+       iobuf_p->is_user = is_user;\r
+       InitializeListHead( &iobuf_p->seg_que );\r
+       iobuf_p->seg_num = 0;\r
+       iobuf_p->nr_pages = 0;\r
+       iobuf_p->is_cashed = 0;\r
+}\r
+\r
+int iobuf_register(\r
+       IN              u64 va,\r
+       IN              u64 size,\r
+       IN              int is_user,\r
+       IN              ib_access_t acc,\r
+       IN OUT  mt_iobuf_t *iobuf_p)\r
+{\r
+       int rc=0;\r
+       u64 seg_va;     // current segment start\r
+       u64 seg_size;   // current segment size\r
+       u64 rdc;                        // remain data counter - what is rest to lock\r
+       u64 delta;                              // he size of the last not full page of the first segment\r
+       mt_iobuf_seg_t * new_iobuf;\r
+       unsigned page_size = PAGE_SIZE;\r
+\r
+// 32 - for any case  \r
+#define PFNS_IN_PAGE_SIZE_MDL          ((PAGE_SIZE - sizeof(struct _MDL) - 32) / sizeof(long))\r
+#define MIN_IOBUF_SEGMENT_SIZE (PAGE_SIZE * PFNS_IN_PAGE_SIZE_MDL)     // 4MB  \r
+\r
+       ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL);\r
+\r
+       // we'll try to register all at once.\r
+       seg_va = va;\r
+       seg_size = rdc = size;\r
+               \r
+       // allocate segments\r
+       while (rdc > 0) {\r
+               // map a segment\r
+               rc = register_segment(seg_va, seg_size, is_user, acc, &new_iobuf );\r
+\r
+               // success - move to another segment\r
+               if (!rc) {\r
+                       rdc -= seg_size;\r
+                       seg_va += seg_size;\r
+                       InsertTailList( &iobuf_p->seg_que, &new_iobuf->link );\r
+                       iobuf_p->seg_num++;\r
+                       // round the segment size to the next page boundary \r
+                       delta = (seg_va + seg_size) & (page_size - 1);\r
+                       if (delta) {\r
+                               seg_size -= delta;\r
+                               seg_size += page_size;\r
+                       }\r
+                       if (seg_size > rdc)\r
+                               seg_size = rdc;\r
+                       continue;\r
+               }\r
+\r
+               // failure - too large a buffer: lessen it and try once more\r
+               if (rc == -ENOMEM) {\r
+                       // no where to lessen - too low memory\r
+                       if (seg_size <= MIN_IOBUF_SEGMENT_SIZE)\r
+                               break;\r
+                       // lessen the size\r
+                       seg_size >>= 1;\r
+                       // round the segment size to the next page boundary \r
+                       delta = (seg_va + seg_size) & (page_size - 1);\r
+                       if (delta) {\r
+                               seg_size -= delta;\r
+                               seg_size += page_size;\r
+                       }\r
+                       if (seg_size > rdc)\r
+                               seg_size = rdc;\r
+                       continue;\r
+               }\r
+\r
+               // got unrecoverable error\r
+               break;\r
+       }\r
+\r
+       // SUCCESS\r
+       if (rc) \r
+               iobuf_deregister( iobuf_p );\r
+       else     \r
+               iobuf_p->nr_pages += ADDRESS_AND_SIZE_TO_SPAN_PAGES( va, size );\r
+\r
+       return rc;\r
+}\r
+\r
+\r
+static void __iobuf_copy(\r
+       IN OUT  mt_iobuf_t *dst_iobuf_p,\r
+       IN              mt_iobuf_t *src_iobuf_p\r
+       )\r
+{\r
+       int i;\r
+       mt_iobuf_seg_t *iobuf_seg_p;\r
+       \r
+       *dst_iobuf_p = *src_iobuf_p;\r
+       InitializeListHead( &dst_iobuf_p->seg_que );\r
+       for (i=0; i<src_iobuf_p->seg_num; ++i) {\r
+               iobuf_seg_p = (mt_iobuf_seg_t *)(PVOID)RemoveHeadList( &src_iobuf_p->seg_que );\r
+               InsertTailList( &dst_iobuf_p->seg_que, &iobuf_seg_p->link );\r
+       }\r
+}\r
+\r
+/* if the buffer to be registered overlaps a buffer, already registered, \r
+       a race can happen between HCA, writing to the previously registered\r
+       buffer and the probing functions (MmProbeAndLockPages, MmSecureVirtualMemory),\r
+       used in the algorithm of memory registration.\r
+       To prevent the race we maintain reference counters for the physical pages, being registered, \r
+       and register every physical page FOR THE WRITE ACCESS only once.*/\r
+\r
+int iobuf_register_with_cash(\r
+       IN              u64 vaddr,\r
+       IN              u64 size,\r
+       IN              int is_user,\r
+       IN OUT  ib_access_t *acc_p,\r
+       IN OUT  mt_iobuf_t *iobuf_p)\r
+{\r
+       int rc, pa_in;\r
+       mt_iobuf_t sec_iobuf;\r
+       int i, page_in , page_out, page_in_total;\r
+       int nr_pages;\r
+       char *subregion_start, *va;\r
+       u64 subregion_size;\r
+       u64 rdc;                                        // remain data counter - what is rest to lock\r
+       u64 delta;                              // he size of the last not full page of the first segment\r
+       ib_access_t acc;\r
+\r
+       down(&g_pa_mutex);\r
+\r
+       // register memory for read access to bring pages into the memory\r
+       rc = iobuf_register( vaddr, size, is_user, 0, iobuf_p);\r
+\r
+       // on error or read access - exit\r
+       if (rc || !(*acc_p & IB_AC_LOCAL_WRITE))\r
+               goto exit;\r
+\r
+       // re-register buffer with the correct access rights\r
+       iobuf_init( (u64)vaddr, size, is_user, &sec_iobuf );\r
+       nr_pages = ADDRESS_AND_SIZE_TO_SPAN_PAGES( vaddr, size );\r
+       subregion_start = va = (char*)(ULONG_PTR)vaddr;\r
+       rdc = size;\r
+       pa_in = page_in = page_in_total = page_out = 0;\r
+\r
+       for (i=0; i<nr_pages; ++i, va+=PAGE_SIZE) {\r
+               // check whether a phys page is to be registered\r
+               PHYSICAL_ADDRESS pa = MmGetPhysicalAddress(va);\r
+               pa_in = pa_is_registered(pa.QuadPart);\r
+               if (pa_in) {\r
+                       ++page_in;\r
+                       ++page_in_total;\r
+               }\r
+               else\r
+                       ++page_out;\r
+\r
+               // check whether we get at the end of a subregion with the same rights wrt cash\r
+               if (page_in && page_out) {\r
+                       // prepare to registration of the subregion\r
+                       if (pa_in) {            // SUBREGION WITH WRITE ACCESS\r
+                               acc = IB_AC_LOCAL_WRITE;\r
+                               subregion_size = (u64)page_out * PAGE_SIZE;\r
+                               page_out = 0;\r
+                       }\r
+                       else {          // SUBREGION WITH READ ACCESS\r
+                               acc = 0;\r
+                               subregion_size = (u64)page_in * PAGE_SIZE;\r
+                               page_in = 0;\r
+                       }\r
+                       \r
+                       // round the subregion size to the page boundary \r
+                       delta = (ULONG_PTR)(subregion_start + subregion_size) & (PAGE_SIZE - 1);\r
+                       subregion_size -= delta;\r
+                       if (subregion_size > rdc)\r
+                               subregion_size = rdc;\r
+\r
+                       // register the subregion\r
+                       rc = iobuf_register( (ULONG_PTR)subregion_start, subregion_size, is_user, acc, &sec_iobuf);\r
+                       if (rc)\r
+                               goto cleanup;\r
+\r
+                       // prepare to the next loop\r
+                       rdc -= subregion_size;\r
+                       subregion_start +=subregion_size;\r
+               }\r
+       }\r
+\r
+       // prepare to registration of the subregion\r
+       if (pa_in) {            // SUBREGION WITH READ ACCESS\r
+               acc = 0;\r
+               subregion_size = (u64)page_in * PAGE_SIZE;\r
+       }\r
+       else {          // SUBREGION WITH WRITE ACCESS\r
+               acc = IB_AC_LOCAL_WRITE;\r
+               subregion_size = (u64)page_out * PAGE_SIZE;\r
+       }\r
+       \r
+       // round the subregion size to the page boundary \r
+       delta = (ULONG_PTR)(subregion_start + subregion_size) & (PAGE_SIZE - 1);\r
+       subregion_size -= delta;\r
+       if (subregion_size > rdc)\r
+               subregion_size = rdc;\r
+       \r
+       // register the subregion\r
+       rc = iobuf_register( (ULONG_PTR)subregion_start, subregion_size, is_user, acc, &sec_iobuf);\r
+       if (rc)\r
+               goto cleanup;\r
+\r
+       // cash phys pages\r
+       rc = pa_register(iobuf_p);\r
+       if (rc)\r
+               goto err_pa_reg;\r
+\r
+       // replace the iobuf\r
+       iobuf_deregister( iobuf_p );\r
+       sec_iobuf.is_cashed = TRUE;\r
+       __iobuf_copy( iobuf_p, &sec_iobuf );\r
+       \r
+       // buffer is a part of also registered buffer - change the rights \r
+       if (page_in_total)\r
+               *acc_p = MTHCA_ACCESS_REMOTE_READ;\r
+\r
+       goto exit;\r
+       \r
+err_pa_reg:    \r
+       iobuf_deregister( &sec_iobuf );\r
+cleanup:\r
+       iobuf_deregister( iobuf_p );\r
+exit:  \r
+       up(&g_pa_mutex);\r
+       return rc;\r
+}\r
+\r
+static void deregister_segment(mt_iobuf_seg_t * iobuf_seg_p)\r
+{\r
+       MmUnlockPages( iobuf_seg_p->mdl_p );    // unlock the buffer \r
+       IoFreeMdl( iobuf_seg_p->mdl_p );        // free MDL\r
+       ExFreePool(iobuf_seg_p);\r
+}\r
+\r
+void iobuf_deregister(mt_iobuf_t *iobuf_p)\r
+{\r
+       mt_iobuf_seg_t *iobuf_seg_p;    // pointer to current segment object\r
+\r
+       ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL);\r
+\r
+       // release segments\r
+       while (!IsListEmpty( &iobuf_p->seg_que )) {\r
+               iobuf_seg_p = (mt_iobuf_seg_t *)(PVOID)RemoveTailList( &iobuf_p->seg_que );\r
+               deregister_segment(iobuf_seg_p);\r
+               iobuf_p->seg_num--;\r
+       }\r
+       ASSERT(iobuf_p->seg_num == 0);\r
+}\r
+\r
+void iobuf_deregister_with_cash(mt_iobuf_t *iobuf_p)\r
+{\r
+       ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL);\r
+\r
+       down(&g_pa_mutex);\r
+       if (iobuf_p->is_cashed)\r
+               pa_deregister(iobuf_p);\r
+        iobuf_deregister(iobuf_p);\r
+       up(&g_pa_mutex);\r
+}\r
+\r
+void iobuf_iter_init(\r
+       IN              mt_iobuf_t *iobuf_p, \r
+       IN OUT  mt_iobuf_iter_t *iterator_p)\r
+{\r
+       iterator_p->seg_p = iobuf_p->seg_que.Flink;\r
+       iterator_p->pfn_ix = 0;\r
+}\r
+\r
+// the function returns phys addresses of the pages, also for the first page\r
+// if one wants to get the phys address of the buffer, one has to \r
+// add the offset from the start of the page to the first phys address\r
+// Returns: the number of entries, filled in page_tbl_p\r
+// Returns 0  while at the end of list.\r
+uint32_t iobuf_get_tpt_seg(\r
+       IN              mt_iobuf_t *iobuf_p, \r
+       IN OUT  mt_iobuf_iter_t *iterator_p,\r
+       IN              uint32_t n_pages_in, \r
+       IN OUT  uint64_t *page_tbl_p )\r
+{\r
+       uint32_t i=0;   // has to be initialized here for a premature exit\r
+       mt_iobuf_seg_t *seg_p;  // pointer to current segment object \r
+       PPFN_NUMBER     pfn_p; \r
+       uint32_t        pfn_ix; // index of PFN in PFN array of the current segment\r
+       uint64_t *pa_buf_p = page_tbl_p;\r
+\r
+       // prepare to the loop\r
+       seg_p = iterator_p->seg_p;      // first segment of the first iobuf\r
+       pfn_ix= iterator_p->pfn_ix;\r
+\r
+       // check, whether we at the end of the list\r
+       if ((PVOID)seg_p == (PVOID)&iobuf_p->seg_que)\r
+               goto exit;\r
+       pfn_p = MmGetMdlPfnArray( seg_p->mdl_p ) + pfn_ix;\r
+\r
+       // pass along all the PFN arrays\r
+       for (; i < n_pages_in; i++, pa_buf_p++) {\r
+               // convert PFN to the physical address\r
+               *pa_buf_p = (uint64_t)*pfn_p++ << PAGE_SHIFT;\r
+       \r
+               // get to the next PFN \r
+               if (++pfn_ix >= seg_p->nr_pages) {\r
+                       seg_p = (mt_iobuf_seg_t*)seg_p->link.Flink;\r
+                       pfn_ix = 0;\r
+                       if ((PVOID)seg_p == (PVOID)&iobuf_p->seg_que) {\r
+                               i++;\r
+                               break;\r
+                       }\r
+                       pfn_p = MmGetMdlPfnArray( seg_p->mdl_p );\r
+               }\r
+       }\r
+\r
+exit:\r
+       iterator_p->seg_p = seg_p;\r
+       iterator_p->pfn_ix = pfn_ix;\r
+       return i;\r
+}\r
+\r
+\r
+\r
+\r
index d0f4d76..60d8079 100644 (file)
@@ -1029,9 +1029,9 @@ static struct ib_mr *mthca_reg_virt_mr(struct ib_pd *pd,
 \r
        // try register the buffer\r
        iobuf_p = &mr->iobuf;\r
-       iobuf_init( (u64)vaddr, length, um_call, iobuf_p);\r
+       iobuf_init( (ULONG_PTR)vaddr, length, um_call, iobuf_p);\r
        ib_acc = (acc & ~MTHCA_ACCESS_REMOTE_READ) ? IB_AC_LOCAL_WRITE : 0;\r
-       err =  iobuf_register_with_cash( (u64)vaddr, length, um_call, \r
+       err =  iobuf_register_with_cash( (ULONG_PTR)vaddr, length, um_call, \r
                &ib_acc, iobuf_p );\r
        if (err)\r
                goto err_reg_mem;\r
index d742534..43d28e6 100644 (file)
-/*
- * Copyright (c) 2005 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * $Id$
- */
-
-#include <mt_l2w.h>
-#include "mlnx_uvp.h"
-#include "mlnx_uvp_doorbell.h"
-#include "mthca_wqe.h"
-#include "mlnx_ual_data.h"
-
-#if defined(EVENT_TRACING)
-#include "mlnx_uvp_qp.tmh"
-#endif
-
-static const uint8_t mthca_opcode[] = {
-       MTHCA_OPCODE_RDMA_WRITE,
-       MTHCA_OPCODE_RDMA_WRITE_IMM,
-       MTHCA_OPCODE_SEND,
-       MTHCA_OPCODE_SEND_IMM,
-       MTHCA_OPCODE_RDMA_READ,
-       MTHCA_OPCODE_ATOMIC_CS,
-       MTHCA_OPCODE_ATOMIC_FA
-};
-
-static enum mthca_wr_opcode conv_ibal_wr_opcode(struct _ib_send_wr *wr)
-{
-       enum mthca_wr_opcode opcode = -1; //= wr->wr_type;
-
-       switch (wr->wr_type) {
-               case WR_SEND: 
-                       opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_SEND_IMM : MTHCA_OPCODE_SEND;
-                       break;
-               case WR_RDMA_WRITE:     
-                       opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_RDMA_WRITE_IMM : MTHCA_OPCODE_RDMA_WRITE;
-                       break;
-               case WR_RDMA_READ:              opcode = MTHCA_OPCODE_RDMA_READ; break;
-               case WR_COMPARE_SWAP: opcode = MTHCA_OPCODE_ATOMIC_CS; break;
-               case WR_FETCH_ADD:                      opcode = MTHCA_OPCODE_ATOMIC_FA; break;
-               default:                                                opcode = MTHCA_OPCODE_INVALID;break;
-       }
-       return opcode;
-}
-
-
-static void dump_wqe(uint32_t print_lvl, uint32_t *wqe_ptr , struct mthca_qp *qp_ptr)
-{
-       net32_t *wqe = wqe_ptr;
-
-       (void) wqe;     /* avoid warning if mthca_dbg compiled away... */
-       UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents  QPN 0x%06x \n",qp_ptr->ibv_qp.qp_num));
-       UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",0
-               , cl_ntoh32(wqe[0]), cl_ntoh32(wqe[1]), cl_ntoh32(wqe[2]), cl_ntoh32(wqe[3])));
-       UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",4
-               , cl_ntoh32(wqe[4]), cl_ntoh32(wqe[5]), cl_ntoh32(wqe[6]), cl_ntoh32(wqe[7])));
-       UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",8
-               , cl_ntoh32(wqe[8]), cl_ntoh32(wqe[9]), cl_ntoh32(wqe[10]), cl_ntoh32(wqe[11])));
-       UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",12
-               , cl_ntoh32(wqe[12]), cl_ntoh32(wqe[13]), cl_ntoh32(wqe[14]), cl_ntoh32(wqe[15])));
-
-}
-static void *get_recv_wqe(struct mthca_qp *qp, int n)
-{
-       return qp->buf + (n << qp->rq.wqe_shift);
-}
-
-static void *get_send_wqe(struct mthca_qp *qp, int n)
-{
-       void *wqe_addr = qp->buf + qp->send_wqe_offset + (n << qp->sq.wqe_shift);
-       UVP_PRINT(TRACE_LEVEL_INFORMATION,UVP_DBG_QP,
-               ("wqe %p, qp_buf %p, offset %#x,  index %d, shift %d \n",
-                wqe_addr, qp->buf, qp->send_wqe_offset, n, 
-               qp->sq.wqe_shift));
-       
-       return wqe_addr;
-}
-
-void mthca_init_qp_indices(struct mthca_qp *qp)
-{
-       qp->sq.next_ind  = 0;
-       qp->sq.last_comp = qp->sq.max - 1;
-       qp->sq.head      = 0;
-       qp->sq.tail      = 0;
-       qp->sq.last      = get_send_wqe(qp, qp->sq.max - 1);
-
-       qp->rq.next_ind  = 0;
-       qp->rq.last_comp = qp->rq.max - 1;
-       qp->rq.head      = 0;
-       qp->rq.tail      = 0;
-       qp->rq.last      = get_recv_wqe(qp, qp->rq.max - 1);
-}
-
-static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq, struct mthca_cq *cq)
-{
-       unsigned cur;
-
-       cur = wq->head - wq->tail;
-       if ((int)(cur + nreq) < wq->max)
-               return 0;
-
-       cl_spinlock_acquire(&cq->lock);
-       cur = wq->head - wq->tail;
-       cl_spinlock_release(&cq->lock);
-
-       return (int)(cur + nreq) >= wq->max;
-}
-
-
-int mthca_tavor_post_send(struct ibv_qp *ibqp, struct _ib_send_wr *wr,
-                         struct _ib_send_wr **bad_wr)
-{
-       struct mthca_qp *qp = to_mqp(ibqp);
-       uint8_t *wqe;
-       uint8_t *prev_wqe;
-       int ret = 0;
-       int nreq;
-       int i;
-       int size;
-       int size0 = 0;
-       uint32_t f0 = unlikely(wr->send_opt & IB_SEND_OPT_FENCE) ? MTHCA_SEND_DOORBELL_FENCE : 0;
-       int ind;
-       int op0 = 0;
-       enum ib_wr_opcode opcode;
-       
-       UVP_ENTER(UVP_DBG_QP);
-       cl_spinlock_acquire(&qp->sq.lock);
-
-       /* XXX check that state is OK to post send */
-
-       ind = qp->sq.next_ind;
-
-       if(ibqp->state == IBV_QPS_RESET) {
-               ret = -EBUSY;
-               if (bad_wr)
-                       *bad_wr = wr;
-               goto err_busy;
-       }
-
-       for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
-
-               if (mthca_wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
-                       UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("SQ %06x full (%u head, %u tail,"
-                                       " %d max, %d nreq)\n", ibqp->qp_num,
-                                       qp->sq.head, qp->sq.tail,
-                                       qp->sq.max, nreq));
-                       ret = -ENOMEM;
-                       if (bad_wr)
-                               *bad_wr = wr;
-                       goto out;
-               }
-
-               wqe = get_send_wqe(qp, ind);
-               prev_wqe = qp->sq.last;
-               qp->sq.last = wqe;
-               opcode = conv_ibal_wr_opcode(wr);
-               if (opcode == MTHCA_OPCODE_INVALID) {
-                       UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x opcode invalid\n",ibqp->qp_num));
-                       ret = -EINVAL;
-                       if (bad_wr)
-                               *bad_wr = wr;
-                       goto out;
-               }
-
-
-               ((struct mthca_next_seg *) wqe)->nda_op = 0;
-               ((struct mthca_next_seg *) wqe)->ee_nds = 0;
-               ((struct mthca_next_seg *) wqe)->flags =
-                       ((wr->send_opt & IB_SEND_OPT_SIGNALED) ?
-                        cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) |
-                       ((wr->send_opt & IB_SEND_OPT_SOLICITED) ?
-                        cl_hton32(MTHCA_NEXT_SOLICIT) : 0)   |
-                       cl_hton32(1);
-               if (opcode == MTHCA_OPCODE_SEND_IMM||
-                   opcode == MTHCA_OPCODE_RDMA_WRITE_IMM)
-                       ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data;
-
-               wqe += sizeof (struct mthca_next_seg);
-               size = sizeof (struct mthca_next_seg) / 16;
-
-
-               switch (ibqp->qp_type) {
-               case IB_QPT_RELIABLE_CONN:
-                       switch (opcode) {
-                       case MTHCA_OPCODE_ATOMIC_CS:
-                       case MTHCA_OPCODE_ATOMIC_FA:
-                               ((struct mthca_raddr_seg *) wqe)->raddr =
-                                       cl_hton64(wr->remote_ops.vaddr);
-                               ((struct mthca_raddr_seg *) wqe)->rkey =
-                                       wr->remote_ops.rkey;
-                               ((struct mthca_raddr_seg *) wqe)->reserved = 0;
-
-                               wqe += sizeof (struct mthca_raddr_seg);
-
-                               if (opcode == MTHCA_OPCODE_ATOMIC_CS) {
-                                       ((struct mthca_atomic_seg *) wqe)->swap_add =
-                                               cl_hton64(wr->remote_ops.atomic2);
-                                       ((struct mthca_atomic_seg *) wqe)->compare =
-                                               cl_hton64(wr->remote_ops.atomic1);
-                               } else {
-                                       ((struct mthca_atomic_seg *) wqe)->swap_add =
-                                               cl_hton64(wr->remote_ops.atomic1);
-                                       ((struct mthca_atomic_seg *) wqe)->compare = 0;
-                               }
-
-                               wqe += sizeof (struct mthca_atomic_seg);
-                               size += (sizeof (struct mthca_raddr_seg) +
-                                        sizeof (struct mthca_atomic_seg)) / 16;
-                               break;
-
-                       case MTHCA_OPCODE_RDMA_WRITE:
-                       case MTHCA_OPCODE_RDMA_WRITE_IMM:
-                       case MTHCA_OPCODE_RDMA_READ:
-                               ((struct mthca_raddr_seg *) wqe)->raddr =
-                                       cl_hton64(wr->remote_ops.vaddr);
-                               ((struct mthca_raddr_seg *) wqe)->rkey =
-                                       wr->remote_ops.rkey;
-                               ((struct mthca_raddr_seg *) wqe)->reserved = 0;
-                               wqe += sizeof (struct mthca_raddr_seg);
-                               size += sizeof (struct mthca_raddr_seg) / 16;
-                               break;
-
-                       default:
-                               /* No extra segments required for sends */
-                               break;
-                       }
-
-                       break;
-
-               case IB_QPT_UNRELIABLE_CONN:
-                       switch (opcode) {
-                       case MTHCA_OPCODE_RDMA_WRITE:
-                       case MTHCA_OPCODE_RDMA_WRITE_IMM:
-                               ((struct mthca_raddr_seg *) wqe)->raddr =
-                                       cl_hton64(wr->remote_ops.vaddr);
-                               ((struct mthca_raddr_seg *) wqe)->rkey =
-                                       wr->remote_ops.rkey;
-                               ((struct mthca_raddr_seg *) wqe)->reserved = 0;
-                               wqe += sizeof (struct mthca_raddr_seg);
-                               size += sizeof (struct mthca_raddr_seg) / 16;
-                               break;
-
-                       default:
-                               /* No extra segments required for sends */
-                               break;
-                       }
-
-                       break;
-
-               case IB_QPT_UNRELIABLE_DGRM:
-                       {
-                               struct mthca_ah *ah = ((struct mthca_ah *)wr->dgrm.ud.h_av);
-                               ((struct mthca_tavor_ud_seg *) wqe)->lkey =
-                                       cl_hton32(ah->key);
-                               ((struct mthca_tavor_ud_seg *) wqe)->av_addr =
-                                       cl_hton64((uint64_t)ah->av);
-                               ((struct mthca_tavor_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp;
-                               ((struct mthca_tavor_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey;
-
-                               wqe += sizeof (struct mthca_tavor_ud_seg);
-                               size += sizeof (struct mthca_tavor_ud_seg) / 16;
-                               break;
-                       }
-
-               default:
-                       break;
-               }
-
-               if ((int)(int)wr->num_ds > qp->sq.max_gs) {
-                       UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x too many gathers\n",ibqp->qp_num));
-                       ret = -ERANGE;
-                       if (bad_wr)
-                               *bad_wr = wr;
-                       goto out;
-               }
-//TODO sleybo:
-               if (wr->send_opt & IB_SEND_OPT_INLINE) {
-                       if (wr->num_ds) {
-                               struct mthca_inline_seg *seg = (struct mthca_inline_seg *)wqe;
-                               uint32_t s = 0;
-
-                               wqe += sizeof *seg;
-                               for (i = 0; i < (int)wr->num_ds; ++i) {
-                                       struct _ib_local_ds *sge = &wr->ds_array[i];
-
-                                       s += sge->length;
-
-                                       if (s > (uint32_t)qp->max_inline_data) {
-                                               ret = -1;
-                                               if (bad_wr)
-                                                       *bad_wr = wr;
-                                               goto out;
-                                       }
-
-                                       memcpy(wqe, (void *) (ULONG_PTR) sge->vaddr,
-                                              sge->length);
-                                       wqe += sge->length;
-                               }
-
-                               seg->byte_count = cl_hton32(MTHCA_INLINE_SEG | s);
-                               size += align(s + sizeof *seg, 16) / 16;
-                       }
-               } else {
-                       for (i = 0; i < (int)wr->num_ds; ++i) {
-                               ((struct mthca_data_seg *) wqe)->byte_count =
-                                       cl_hton32(wr->ds_array[i].length);
-                               ((struct mthca_data_seg *) wqe)->lkey =
-                                       cl_hton32(wr->ds_array[i].lkey);
-                               ((struct mthca_data_seg *) wqe)->addr =
-                                       cl_hton64(wr->ds_array[i].vaddr);
-                               wqe += sizeof (struct mthca_data_seg);
-                               size += sizeof (struct mthca_data_seg) / 16;
-                       }
-               }
-
-               qp->wrid[ind + qp->rq.max] = wr->wr_id;
-
-               ((struct mthca_next_seg *) prev_wqe)->nda_op =
-                       cl_hton32(((ind << qp->sq.wqe_shift) +
-                       qp->send_wqe_offset) |opcode);
-               
-               wmb();
-               
-               ((struct mthca_next_seg *) prev_wqe)->ee_nds =
-                       cl_hton32((size0 ? 0 : MTHCA_NEXT_DBD) | size |
-                       ((wr->send_opt& IB_SEND_OPT_FENCE) ?
-                        MTHCA_NEXT_FENCE : 0));
-
-               if (!size0) {
-                       size0 = size;
-                       op0   = opcode;
-               }
-               
-               dump_wqe( TRACE_LEVEL_VERBOSE, (uint32_t*)qp->sq.last,qp);
-               
-               ++ind;
-               if (unlikely(ind >= qp->sq.max))
-                       ind -= qp->sq.max;
-
-       }
-
-out:
-       if (likely(nreq)) {
-               uint32_t doorbell[2];
-
-               doorbell[0] = cl_hton32(((qp->sq.next_ind << qp->sq.wqe_shift) +
-                       qp->send_wqe_offset) | f0 | op0);
-               doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0);
-
-               wmb();
-
-               mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL);
-       }
-
-       qp->sq.next_ind = ind;
-       qp->sq.head    += nreq;
-
-err_busy:
-       cl_spinlock_release(&qp->sq.lock);
-       
-       UVP_EXIT(UVP_DBG_QP);
-       return ret;
-}
-
-
-int mthca_tavor_post_recv(struct ibv_qp *ibqp, struct _ib_recv_wr *wr,
-                         struct _ib_recv_wr **bad_wr)
-{
-       struct mthca_qp *qp = to_mqp(ibqp);
-       uint32_t doorbell[2];
-       int ret = 0;
-       int nreq;
-       int i;
-       int size;
-       int size0 = 0;
-       int ind;
-       uint8_t *wqe;
-       uint8_t *prev_wqe;
-       
-       UVP_ENTER(UVP_DBG_QP);
-       
-       cl_spinlock_acquire(&qp->rq.lock);
-
-       /* XXX check that state is OK to post receive */
-       
-       ind = qp->rq.next_ind;
-       if(ibqp->state == IBV_QPS_RESET) {
-               ret = -EBUSY;
-               if (bad_wr)
-                       *bad_wr = wr;
-               goto err_busy;
-       }
-       
-       for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
-               if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
-                       nreq = 0;
-
-                       doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
-                       doorbell[1] = cl_hton32(ibqp->qp_num << 8); //TODO sleybo: add qpn to qp struct 
-
-                       /*
-                        * Make sure that descriptors are written
-                        * before doorbell is rung.
-                        */
-                       mb();
-
-                       mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_RECV_DOORBELL);
-
-                       qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB;
-                       size0 = 0;
-               }
-
-               if (mthca_wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {
-                       UVP_PRINT(TRACE_LEVEL_ERROR,UVP_DBG_QP,("RQ %06x full (%u head, %u tail,"
-                                       " %d max, %d nreq)\n", ibqp->qp_num,
-                                       qp->rq.head, qp->rq.tail,
-                                       qp->rq.max, nreq));
-                       ret = -ENOMEM;
-                       if (bad_wr)
-                               *bad_wr = wr;
-                       goto out;
-               }
-
-               wqe = get_recv_wqe(qp, ind);
-               prev_wqe = qp->rq.last;
-               qp->rq.last = wqe;
-
-               ((struct mthca_next_seg *) wqe)->nda_op = 0;
-               ((struct mthca_next_seg *) wqe)->ee_nds =
-                       cl_hton32(MTHCA_NEXT_DBD);
-               ((struct mthca_next_seg *) wqe)->flags =
-                       cl_hton32(MTHCA_NEXT_CQ_UPDATE);
-
-               wqe += sizeof (struct mthca_next_seg);
-               size = sizeof (struct mthca_next_seg) / 16;
-
-               if (unlikely((int)wr->num_ds  > qp->rq.max_gs)) {
-                       UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("RQ %06x too many gathers\n",ibqp->qp_num));
-                       ret = -ERANGE;
-                       if (bad_wr)
-                               *bad_wr = wr;
-                       goto out;
-               }
-
-               for (i = 0; i < (int)wr->num_ds; ++i) {
-                       ((struct mthca_data_seg *) wqe)->byte_count =
-                               cl_hton32(wr->ds_array[i].length);
-                       ((struct mthca_data_seg *) wqe)->lkey =
-                               cl_hton32(wr->ds_array[i].lkey);
-                       ((struct mthca_data_seg *) wqe)->addr =
-                               cl_hton64(wr->ds_array[i].vaddr);
-                       wqe += sizeof (struct mthca_data_seg);
-                       size += sizeof (struct mthca_data_seg) / 16;
-               }
-
-               qp->wrid[ind] = wr->wr_id;
-
-               ((struct mthca_next_seg *) prev_wqe)->nda_op =
-                       cl_hton32((ind << qp->rq.wqe_shift) | 1);
-               wmb();
-               ((struct mthca_next_seg *) prev_wqe)->ee_nds =
-                       cl_hton32(MTHCA_NEXT_DBD | size);
-
-               if (!size0)
-                       size0 = size;
-
-               ++ind;
-               if (unlikely(ind >= qp->rq.max))
-                       ind -= qp->rq.max;
-       }
-
-out:
-       if (likely(nreq)) {
-               doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
-               doorbell[1] = cl_hton32((ibqp->qp_num << 8) | (nreq & 255));
-
-               /*
-                * Make sure that descriptors are written before
-                * doorbell is rung.
-                */
-               mb();
-
-               mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_RECV_DOORBELL);
-       }
-
-       qp->rq.next_ind = ind;
-       qp->rq.head    += nreq;
-
-err_busy:
-       cl_spinlock_release(&qp->rq.lock);
-       UVP_EXIT(UVP_DBG_QP);
-       return ret;
-}
-
-int mthca_arbel_post_send(struct ibv_qp *ibqp, struct _ib_send_wr *wr,
-                         struct _ib_send_wr **bad_wr)
-{
-       struct mthca_qp *qp = to_mqp(ibqp);
-       uint32_t doorbell[2];
-       uint8_t *wqe;
-       uint8_t *prev_wqe;
-       int ret = 0;
-       int nreq;       
-       int i;
-       int size;
-       int size0 = 0;
-       uint32_t f0 = unlikely(wr->send_opt & IB_SEND_OPT_FENCE) ? MTHCA_SEND_DOORBELL_FENCE : 0;
-       int ind;
-       uint8_t op0 = 0;
-       enum ib_wr_opcode opcode;
-       
-       UVP_ENTER(UVP_DBG_QP);
-       
-       cl_spinlock_acquire(&qp->sq.lock);
-
-       /* XXX check that state is OK to post send */
-
-       ind = qp->sq.head & (qp->sq.max - 1);
-       if(ibqp->state == IBV_QPS_RESET) {
-               ret = -EBUSY;
-               if (bad_wr)
-                       *bad_wr = wr;
-               goto err_busy;
-       }
-
-       for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
-               if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) {
-                       nreq = 0;
-
-                       doorbell[0] = cl_hton32((MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) |
-                                           ((qp->sq.head & 0xffff) << 8) | f0 | op0);
-                       doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0);
-                       qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB;
-                       size0 = 0;
-                       f0 = unlikely(wr->send_opt & IB_SEND_OPT_FENCE) ? MTHCA_SEND_DOORBELL_FENCE : 0;
-
-                       /*
-                        * Make sure that descriptors are written before
-                        * doorbell record.
-                        */
-                       wmb();
-                       *qp->sq.db = cl_hton32(qp->sq.head & 0xffff);
-
-                       /*
-                        * Make sure doorbell record is written before we
-                        * write MMIO send doorbell.
-                        */
-                       wmb();
-                       mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL);
-
-               }
-
-               if (mthca_wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
-                       UVP_PRINT(TRACE_LEVEL_ERROR,UVP_DBG_QP,("SQ %06x full (%u head, %u tail,"
-                                       " %d max, %d nreq)\n", ibqp->qp_num,
-                                       qp->sq.head, qp->sq.tail,
-                                       qp->sq.max, nreq));                     
-                       ret = -ENOMEM;
-                       if (bad_wr)
-                               *bad_wr = wr;
-                       goto out;
-               }
-
-               wqe = get_send_wqe(qp, ind);
-               prev_wqe = qp->sq.last;
-               qp->sq.last = wqe;
-               opcode = conv_ibal_wr_opcode(wr);
-
-               ((struct mthca_next_seg *) wqe)->flags =
-                       ((wr->send_opt & IB_SEND_OPT_SIGNALED) ?
-                        cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) |
-                       ((wr->send_opt & IB_SEND_OPT_SOLICITED) ?
-                        cl_hton32(MTHCA_NEXT_SOLICIT) : 0)   |
-                       cl_hton32(1);
-               if (opcode == MTHCA_OPCODE_SEND_IMM||
-                       opcode == MTHCA_OPCODE_RDMA_WRITE_IMM)
-                       ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data;
-
-               wqe += sizeof (struct mthca_next_seg);
-               size = sizeof (struct mthca_next_seg) / 16;
-
-               switch (ibqp->qp_type) {
-               case IB_QPT_RELIABLE_CONN:
-                       switch (opcode) {
-                       case MTHCA_OPCODE_ATOMIC_CS:
-                       case MTHCA_OPCODE_ATOMIC_FA:
-                               ((struct mthca_raddr_seg *) wqe)->raddr =
-                                       cl_hton64(wr->remote_ops.vaddr);
-                               ((struct mthca_raddr_seg *) wqe)->rkey =
-                                       wr->remote_ops.rkey;
-                               ((struct mthca_raddr_seg *) wqe)->reserved = 0;
-
-                               wqe += sizeof (struct mthca_raddr_seg);
-
-                               if (opcode == MTHCA_OPCODE_ATOMIC_CS) {
-                                       ((struct mthca_atomic_seg *) wqe)->swap_add =
-                                               cl_hton64(wr->remote_ops.atomic2);
-                                       ((struct mthca_atomic_seg *) wqe)->compare =
-                                               cl_hton64(wr->remote_ops.atomic1);
-                               } else {
-                                       ((struct mthca_atomic_seg *) wqe)->swap_add =
-                                               cl_hton64(wr->remote_ops.atomic1);
-                                       ((struct mthca_atomic_seg *) wqe)->compare = 0;
-                               }
-
-                               wqe += sizeof (struct mthca_atomic_seg);
-                               size += (sizeof (struct mthca_raddr_seg) +
-                                        sizeof (struct mthca_atomic_seg)) / 16;
-                               break;
-
-                       case MTHCA_OPCODE_RDMA_READ:
-                       case MTHCA_OPCODE_RDMA_WRITE:
-                       case MTHCA_OPCODE_RDMA_WRITE_IMM:
-                               ((struct mthca_raddr_seg *) wqe)->raddr =
-                                       cl_hton64(wr->remote_ops.vaddr);
-                               ((struct mthca_raddr_seg *) wqe)->rkey =
-                                       wr->remote_ops.rkey;
-                               ((struct mthca_raddr_seg *) wqe)->reserved = 0;
-                               wqe += sizeof (struct mthca_raddr_seg);
-                               size += sizeof (struct mthca_raddr_seg) / 16;
-                               break;
-
-                       default:
-                               /* No extra segments required for sends */
-                               break;
-                       }
-
-                       break;
-
-               case IB_QPT_UNRELIABLE_CONN:
-                       switch (opcode) {
-                       case MTHCA_OPCODE_RDMA_WRITE:
-                       case MTHCA_OPCODE_RDMA_WRITE_IMM:
-                               ((struct mthca_raddr_seg *) wqe)->raddr =
-                                       cl_hton64(wr->remote_ops.vaddr);
-                               ((struct mthca_raddr_seg *) wqe)->rkey =
-                                       wr->remote_ops.rkey;
-                               ((struct mthca_raddr_seg *) wqe)->reserved = 0;
-                               wqe += sizeof (struct mthca_raddr_seg);
-                               size += sizeof (struct mthca_raddr_seg) / 16;
-                               break;
-
-                       default:
-                               /* No extra segments required for sends */
-                               break;
-                       }
-
-                       break;
-
-               case IB_QPT_UNRELIABLE_DGRM:
-                       {
-                               struct mthca_ah *ah = ((struct mthca_ah *)wr->dgrm.ud.h_av);
-                               memcpy(((struct mthca_arbel_ud_seg *) wqe)->av,
-                                      ah->av, sizeof ( struct mthca_av));
-                               ((struct mthca_arbel_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp;
-                               ((struct mthca_arbel_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey;
-
-
-                               wqe += sizeof (struct mthca_arbel_ud_seg);
-                               size += sizeof (struct mthca_arbel_ud_seg) / 16;
-                               break;
-                       }
-
-               default:
-                       break;
-               }
-
-               if ((int)wr->num_ds > qp->sq.max_gs) {
-                       UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x full too many gathers\n",ibqp->qp_num));
-                       ret = -ERANGE;
-                       if (bad_wr)
-                               *bad_wr = wr;
-                       goto out;
-               }
-
-               if (wr->send_opt & IB_SEND_OPT_INLINE) {
-                       if (wr->num_ds) {
-                               struct mthca_inline_seg *seg = (struct mthca_inline_seg *)wqe;
-                               uint32_t s = 0;
-
-                               wqe += sizeof *seg;
-                               for (i = 0; i < (int)wr->num_ds; ++i) {
-                                       struct _ib_local_ds *sge = &wr->ds_array[i];
-
-                                       s += sge->length;
-
-                                       if (s > (uint32_t)qp->max_inline_data) {
-                                               ret = -E2BIG;
-                                               if (bad_wr)
-                                                       *bad_wr = wr;
-                                               goto out;
-                                       }
-
-                                       memcpy(wqe, (void *) (uintptr_t) sge->vaddr,
-                                              sge->length);
-                                       wqe += sge->length;
-                               }
-
-                               seg->byte_count = cl_hton32(MTHCA_INLINE_SEG | s);
-                               size += align(s + sizeof *seg, 16) / 16;
-                       }
-               } else {
-
-                       for (i = 0; i < (int)wr->num_ds; ++i) {
-                               ((struct mthca_data_seg *) wqe)->byte_count =
-                                       cl_hton32(wr->ds_array[i].length);
-                               ((struct mthca_data_seg *) wqe)->lkey =
-                                       cl_hton32(wr->ds_array[i].lkey);
-                               ((struct mthca_data_seg *) wqe)->addr =
-                                       cl_hton64(wr->ds_array[i].vaddr);
-                               wqe += sizeof (struct mthca_data_seg);
-                               size += sizeof (struct mthca_data_seg) / 16;
-                       }
-//TODO do this also in kernel
-//                     size += wr->num_ds * (sizeof *seg / 16);
-               }
-
-                       qp->wrid[ind + qp->rq.max] = wr->wr_id;
-
-               if (opcode == MTHCA_OPCODE_INVALID) {
-                       UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x opcode invalid\n",ibqp->qp_num));
-                       ret = -EINVAL;
-                       if (bad_wr)
-                               *bad_wr = wr;
-                       goto out;
-               }
-
-               ((struct mthca_next_seg *) prev_wqe)->nda_op =
-                       cl_hton32(((ind << qp->sq.wqe_shift) +
-                              qp->send_wqe_offset) |
-                             opcode);
-               wmb();
-               ((struct mthca_next_seg *) prev_wqe)->ee_nds =
-                       cl_hton32(MTHCA_NEXT_DBD | size |
-                         ((wr->send_opt & IB_SEND_OPT_FENCE) ?
-                                                  MTHCA_NEXT_FENCE : 0));
-
-               if (!size0) {
-                       size0 = size;
-                       op0   = opcode;
-               }
-
-               ++ind;
-               if (unlikely(ind >= qp->sq.max))
-                       ind -= qp->sq.max;
-       }
-
-out:
-       if (likely(nreq)) {
-               doorbell[0] = cl_hton32((nreq << 24) |
-                                   ((qp->sq.head & 0xffff) << 8) | f0 | op0);
-               doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0);
-
-               qp->sq.head += nreq;
-
-               /*
-                * Make sure that descriptors are written before
-                * doorbell record.
-                */
-               wmb();
-               *qp->sq.db = cl_hton32(qp->sq.head & 0xffff);
-
-               /*
-                * Make sure doorbell record is written before we
-                * write MMIO send doorbell.
-                */
-               wmb();
-               mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL);
-       }
-
-err_busy:
-       cl_spinlock_release(&qp->sq.lock);
-
-       UVP_EXIT(UVP_DBG_QP);
-       
-       return ret;
-}
-
-int mthca_arbel_post_recv(struct ibv_qp *ibqp, struct _ib_recv_wr *wr,
-                         struct _ib_recv_wr **bad_wr)
-{
-       struct mthca_qp *qp = to_mqp(ibqp);
-       int ret = 0;
-       int nreq;
-       int ind;
-       int i;
-       uint8_t *wqe;
-       
-       UVP_ENTER(UVP_DBG_QP);
-       
-       cl_spinlock_acquire(&qp->rq.lock);
-
-       /* XXX check that state is OK to post receive */
-
-       ind = qp->rq.head & (qp->rq.max - 1);
-       if(ibqp->state == IBV_QPS_RESET) {
-               ret = -EBUSY;
-               if (bad_wr)
-                       *bad_wr = wr;
-               goto err_busy;
-       }
-       for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
-               if (mthca_wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {//TODO sleybo: check the cq
-                       UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("RQ %06x full (%u head, %u tail,"
-                                       " %d max, %d nreq)\n", ibqp->qp_num,
-                                       qp->rq.head, qp->rq.tail,
-                                       qp->rq.max, nreq));
-                       ret = -ENOMEM;
-                       if (bad_wr)
-                               *bad_wr = wr;
-                       goto out;
-               }
-
-               wqe = get_recv_wqe(qp, ind);
-
-               ((struct mthca_next_seg *) wqe)->flags = 0;
-
-               wqe += sizeof (struct mthca_next_seg);
-
-               if (unlikely((int)wr->num_ds > qp->rq.max_gs)) {
-                       UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("RQ %06x full too many scatter\n",ibqp->qp_num));
-                       ret = -ERANGE;
-                       if (bad_wr)
-                               *bad_wr = wr;
-                       goto out;
-               }
-
-               for (i = 0; i < (int)wr->num_ds; ++i) {
-                       ((struct mthca_data_seg *) wqe)->byte_count =
-                               cl_hton32(wr->ds_array[i].length);
-                       ((struct mthca_data_seg *) wqe)->lkey =
-                               cl_hton32(wr->ds_array[i].lkey);
-                       ((struct mthca_data_seg *) wqe)->addr =
-                               cl_hton64(wr->ds_array[i].vaddr);
-                       wqe += sizeof (struct mthca_data_seg);
-               }
-
-               if (i < qp->rq.max_gs) {
-                       ((struct mthca_data_seg *) wqe)->byte_count = 0;
-                       ((struct mthca_data_seg *) wqe)->lkey = cl_hton32(MTHCA_INVAL_LKEY);
-                       ((struct mthca_data_seg *) wqe)->addr = 0;
-               }
-
-                       qp->wrid[ind] = wr->wr_id;
-
-               ++ind;
-               if (unlikely(ind >= qp->rq.max))
-                       ind -= qp->rq.max;
-       }
-out:
-       if (likely(nreq)) {
-               qp->rq.head += nreq;
-
-               /*
-                * Make sure that descriptors are written before
-                * doorbell record.
-                */
-               mb();
-               *qp->rq.db = cl_hton32(qp->rq.head & 0xffff);
-       }
-
-err_busy:
-       cl_spinlock_release(&qp->rq.lock);
-       
-       UVP_EXIT(UVP_DBG_QP);
-       
-       return ret;
-}
-
-int mthca_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
-                      ib_qp_type_t type, struct mthca_qp *qp)
-{
-       int size;
-       int max_sq_sge;
-
-       qp->rq.max_gs    = cap->max_recv_sge;
-       qp->sq.max_gs    = cap->max_send_sge;
-       max_sq_sge       = align(cap->max_inline_data + sizeof (struct mthca_inline_seg),
-                                sizeof (struct mthca_data_seg)) / sizeof (struct mthca_data_seg);
-       if (max_sq_sge < (int)cap->max_send_sge)
-               max_sq_sge = cap->max_send_sge;
-
-       qp->wrid = cl_malloc((qp->rq.max + qp->sq.max) * sizeof (uint64_t));
-       if (!qp->wrid)
-               return -1;
-
-       size = sizeof (struct mthca_next_seg) +
-               qp->rq.max_gs * sizeof (struct mthca_data_seg);
-
-       for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size;
-            qp->rq.wqe_shift++)
-               ; /* nothing */
-
-       size = max_sq_sge * sizeof (struct mthca_data_seg);
-       switch (type) {
-       case IB_QPT_UNRELIABLE_DGRM:
-               size += mthca_is_memfree(pd->context) ?
-                       sizeof (struct mthca_arbel_ud_seg) :
-                       sizeof (struct mthca_tavor_ud_seg);
-               break;
-
-       case IB_QPT_UNRELIABLE_CONN:
-               size += sizeof (struct mthca_raddr_seg);
-               break;
-
-       case IB_QPT_RELIABLE_CONN:
-               size += sizeof (struct mthca_raddr_seg);
-               /*
-                * An atomic op will require an atomic segment, a
-                * remote address segment and one scatter entry.
-                */
-               if (size < (sizeof (struct mthca_atomic_seg) +
-                           sizeof (struct mthca_raddr_seg) +
-                           sizeof (struct mthca_data_seg)))
-                       size = (sizeof (struct mthca_atomic_seg) +
-                               sizeof (struct mthca_raddr_seg) +
-                               sizeof (struct mthca_data_seg));
-               break;
-
-       default:
-               break;
-       }
-
-       /* Make sure that we have enough space for a bind request */
-       if (size < sizeof (struct mthca_bind_seg))
-               size = sizeof (struct mthca_bind_seg);
-
-       size += sizeof (struct mthca_next_seg);
-
-       for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;
-               qp->sq.wqe_shift++)
-               ; /* nothing */
-
-               qp->send_wqe_offset = align(qp->rq.max << qp->rq.wqe_shift,
-                       1 << qp->sq.wqe_shift);
-
-               qp->buf_size = qp->send_wqe_offset + (qp->sq.max << qp->sq.wqe_shift);
-
-       if (posix_memalign(&qp->buf, g_page_size,
-               align(qp->buf_size, g_page_size))) {
-               cl_free(qp->wrid);
-               return -1;
-       }
-
-       memset(qp->buf, 0, qp->buf_size);
-
-       if (mthca_is_memfree(pd->context)) {
-               struct mthca_next_seg *next;
-               struct mthca_data_seg *scatter;
-               int i;
-               uint32_t sz;
-
-               sz = cl_hton32((sizeof (struct mthca_next_seg) +
-                           qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16);
-
-               for (i = 0; i < qp->rq.max; ++i) {
-                       next = get_recv_wqe(qp, i);
-                       next->nda_op = cl_hton32(((i + 1) & (qp->rq.max - 1)) <<
-                                            qp->rq.wqe_shift);
-                       next->ee_nds = sz;
-
-                       for (scatter = (void *) (next + 1);
-                            (void *) scatter < (void *) ((char *)next + (uint32_t)(1 << qp->rq.wqe_shift));
-                            ++scatter)
-                               scatter->lkey = cl_hton32(MTHCA_INVAL_LKEY);
-               }
-
-               for (i = 0; i < qp->sq.max; ++i) {
-                       next = get_send_wqe(qp, i);
-                       next->nda_op = cl_hton32((((i + 1) & (qp->sq.max - 1)) <<
-                                             qp->sq.wqe_shift) +
-                                            qp->send_wqe_offset);
-               }
-       }
-
-       qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
-       qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);
-
-       return 0;
-}
-
-struct mthca_qp *mthca_find_qp(struct mthca_context *ctx, uint32_t qpn)
-{
-       int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
-
-       if (ctx->qp_table[tind].refcnt)
-               return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask];
-       else
-               return NULL;
-}
-
-int mthca_store_qp(struct mthca_context *ctx, uint32_t qpn, struct mthca_qp *qp)
-{
-       int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
-       int ret = 0;
-
-       WaitForSingleObject( ctx->qp_table_mutex, INFINITE );
-
-       if (!ctx->qp_table[tind].refcnt) {
-               ctx->qp_table[tind].table = cl_malloc(
-                       (ctx->qp_table_mask + 1) * sizeof (struct mthca_qp *));
-               if (!ctx->qp_table[tind].table) {
-                       ret = -1;
-                       goto out;
-               }
-       }
-       ++ctx->qp_table[tind].refcnt;
-       ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = qp;
-
-out:
-       ReleaseMutex( ctx->qp_table_mutex );
-       return ret;
-}
-
-void mthca_clear_qp(struct mthca_context *ctx, uint32_t qpn)
-{
-       int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
-
-       WaitForSingleObject( ctx->qp_table_mutex, INFINITE );
-
-       if (!--ctx->qp_table[tind].refcnt)
-               cl_free(ctx->qp_table[tind].table);
-       else
-               ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = NULL;
-       
-       ReleaseMutex( ctx->qp_table_mutex );
-}
-
-int mthca_free_err_wqe(struct mthca_qp *qp, int is_send,
-                      int index, int *dbd, uint32_t *new_wqe)
-{
-       struct mthca_next_seg *next;
-
-       /*
-        * For SRQs, all WQEs generate a CQE, so we're always at the
-        * end of the doorbell chain.
-        */
-       if (qp->ibv_qp.srq) {
-               *new_wqe = 0;
-               return 0;
-       }
-
-       if (is_send)
-               next = get_send_wqe(qp, index);
-       else
-               next = get_recv_wqe(qp, index);
-
-       *dbd = !!(next->ee_nds & cl_hton32(MTHCA_NEXT_DBD));
-       if (next->ee_nds & cl_hton32(0x3f))
-               *new_wqe = (next->nda_op & cl_hton32(~0x3f)) |
-                       (next->ee_nds & cl_hton32(0x3f));
-       else
-               *new_wqe = 0;
-
-       return 0;
-}
-
+/*\r
+ * Copyright (c) 2005 Topspin Communications.  All rights reserved.\r
+ * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.\r
+ * Portions Copyright (c) 2008 Microsoft Corporation.  All rights reserved.\r
+ *\r
+ * This software is available to you under a choice of one of two\r
+ * licenses.  You may choose to be licensed under the terms of the GNU\r
+ * General Public License (GPL) Version 2, available from the file\r
+ * COPYING in the main directory of this source tree, or the\r
+ * OpenIB.org BSD license below:\r
+ *\r
+ *     Redistribution and use in source and binary forms, with or\r
+ *     without modification, are permitted provided that the following\r
+ *     conditions are met:\r
+ *\r
+ *      - Redistributions of source code must retain the above\r
+ *        copyright notice, this list of conditions and the following\r
+ *        disclaimer.\r
+ *\r
+ *      - Redistributions in binary form must reproduce the above\r
+ *        copyright notice, this list of conditions and the following\r
+ *        disclaimer in the documentation and/or other materials\r
+ *        provided with the distribution.\r
+ *\r
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\r
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
+ * SOFTWARE.\r
+ *\r
+ * $Id$\r
+ */\r
+\r
+#include <mt_l2w.h>\r
+#include "mlnx_uvp.h"\r
+#include "mlnx_uvp_doorbell.h"\r
+#include "mthca_wqe.h"\r
+#include "mlnx_ual_data.h"\r
+\r
+#if defined(EVENT_TRACING)\r
+#include "mlnx_uvp_qp.tmh"\r
+#endif\r
+\r
+static const uint8_t mthca_opcode[] = {\r
+       MTHCA_OPCODE_RDMA_WRITE,\r
+       MTHCA_OPCODE_RDMA_WRITE_IMM,\r
+       MTHCA_OPCODE_SEND,\r
+       MTHCA_OPCODE_SEND_IMM,\r
+       MTHCA_OPCODE_RDMA_READ,\r
+       MTHCA_OPCODE_ATOMIC_CS,\r
+       MTHCA_OPCODE_ATOMIC_FA\r
+};\r
+\r
+static enum mthca_wr_opcode conv_ibal_wr_opcode(struct _ib_send_wr *wr)\r
+{\r
+       enum mthca_wr_opcode opcode = -1; //= wr->wr_type;\r
+\r
+       switch (wr->wr_type) {\r
+               case WR_SEND: \r
+                       opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_SEND_IMM : MTHCA_OPCODE_SEND;\r
+                       break;\r
+               case WR_RDMA_WRITE:     \r
+                       opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_RDMA_WRITE_IMM : MTHCA_OPCODE_RDMA_WRITE;\r
+                       break;\r
+               case WR_RDMA_READ:              opcode = MTHCA_OPCODE_RDMA_READ; break;\r
+               case WR_COMPARE_SWAP: opcode = MTHCA_OPCODE_ATOMIC_CS; break;\r
+               case WR_FETCH_ADD:                      opcode = MTHCA_OPCODE_ATOMIC_FA; break;\r
+               default:                                                opcode = MTHCA_OPCODE_INVALID;break;\r
+       }\r
+       return opcode;\r
+}\r
+\r
+\r
+static void dump_wqe(uint32_t print_lvl, uint32_t *wqe_ptr , struct mthca_qp *qp_ptr)\r
+{\r
+       net32_t *wqe = wqe_ptr;\r
+\r
+       (void) wqe;     /* avoid warning if mthca_dbg compiled away... */\r
+       UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents  QPN 0x%06x \n",qp_ptr->ibv_qp.qp_num));\r
+       UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",0\r
+               , cl_ntoh32(wqe[0]), cl_ntoh32(wqe[1]), cl_ntoh32(wqe[2]), cl_ntoh32(wqe[3])));\r
+       UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",4\r
+               , cl_ntoh32(wqe[4]), cl_ntoh32(wqe[5]), cl_ntoh32(wqe[6]), cl_ntoh32(wqe[7])));\r
+       UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",8\r
+               , cl_ntoh32(wqe[8]), cl_ntoh32(wqe[9]), cl_ntoh32(wqe[10]), cl_ntoh32(wqe[11])));\r
+       UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",12\r
+               , cl_ntoh32(wqe[12]), cl_ntoh32(wqe[13]), cl_ntoh32(wqe[14]), cl_ntoh32(wqe[15])));\r
+\r
+}\r
+static void *get_recv_wqe(struct mthca_qp *qp, int n)\r
+{\r
+       return qp->buf + (n << qp->rq.wqe_shift);\r
+}\r
+\r
+static void *get_send_wqe(struct mthca_qp *qp, int n)\r
+{\r
+       void *wqe_addr = qp->buf + qp->send_wqe_offset + (n << qp->sq.wqe_shift);\r
+       UVP_PRINT(TRACE_LEVEL_INFORMATION,UVP_DBG_QP,\r
+               ("wqe %p, qp_buf %p, offset %#x,  index %d, shift %d \n",\r
+                wqe_addr, qp->buf, qp->send_wqe_offset, n, \r
+               qp->sq.wqe_shift));\r
+       \r
+       return wqe_addr;\r
+}\r
+\r
+void mthca_init_qp_indices(struct mthca_qp *qp)\r
+{\r
+       qp->sq.next_ind  = 0;\r
+       qp->sq.last_comp = qp->sq.max - 1;\r
+       qp->sq.head      = 0;\r
+       qp->sq.tail      = 0;\r
+       qp->sq.last      = get_send_wqe(qp, qp->sq.max - 1);\r
+\r
+       qp->rq.next_ind  = 0;\r
+       qp->rq.last_comp = qp->rq.max - 1;\r
+       qp->rq.head      = 0;\r
+       qp->rq.tail      = 0;\r
+       qp->rq.last      = get_recv_wqe(qp, qp->rq.max - 1);\r
+}\r
+\r
+static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq, struct mthca_cq *cq)\r
+{\r
+       unsigned cur;\r
+\r
+       cur = wq->head - wq->tail;\r
+       if ((int)(cur + nreq) < wq->max)\r
+               return 0;\r
+\r
+       cl_spinlock_acquire(&cq->lock);\r
+       cur = wq->head - wq->tail;\r
+       cl_spinlock_release(&cq->lock);\r
+\r
+       return (int)(cur + nreq) >= wq->max;\r
+}\r
+\r
+\r
+int mthca_tavor_post_send(struct ibv_qp *ibqp, struct _ib_send_wr *wr,\r
+                         struct _ib_send_wr **bad_wr)\r
+{\r
+       struct mthca_qp *qp = to_mqp(ibqp);\r
+       uint8_t *wqe;\r
+       uint8_t *prev_wqe;\r
+       int ret = 0;\r
+       int nreq;\r
+       int i;\r
+       int size;\r
+       int size0 = 0;\r
+       uint32_t f0 = unlikely(wr->send_opt & IB_SEND_OPT_FENCE) ? MTHCA_SEND_DOORBELL_FENCE : 0;\r
+       int ind;\r
+       int op0 = 0;\r
+       enum ib_wr_opcode opcode;\r
+       \r
+       UVP_ENTER(UVP_DBG_QP);\r
+       cl_spinlock_acquire(&qp->sq.lock);\r
+\r
+       /* XXX check that state is OK to post send */\r
+\r
+       ind = qp->sq.next_ind;\r
+\r
+       if(ibqp->state == IBV_QPS_RESET) {\r
+               ret = -EBUSY;\r
+               if (bad_wr)\r
+                       *bad_wr = wr;\r
+               goto err_busy;\r
+       }\r
+\r
+       for (nreq = 0; wr; ++nreq, wr = wr->p_next) {\r
+\r
+               if (mthca_wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {\r
+                       UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("SQ %06x full (%u head, %u tail,"\r
+                                       " %d max, %d nreq)\n", ibqp->qp_num,\r
+                                       qp->sq.head, qp->sq.tail,\r
+                                       qp->sq.max, nreq));\r
+                       ret = -ENOMEM;\r
+                       if (bad_wr)\r
+                               *bad_wr = wr;\r
+                       goto out;\r
+               }\r
+\r
+               wqe = get_send_wqe(qp, ind);\r
+               prev_wqe = qp->sq.last;\r
+               qp->sq.last = wqe;\r
+               opcode = conv_ibal_wr_opcode(wr);\r
+               if (opcode == MTHCA_OPCODE_INVALID) {\r
+                       UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x opcode invalid\n",ibqp->qp_num));\r
+                       ret = -EINVAL;\r
+                       if (bad_wr)\r
+                               *bad_wr = wr;\r
+                       goto out;\r
+               }\r
+\r
+\r
+               ((struct mthca_next_seg *) wqe)->nda_op = 0;\r
+               ((struct mthca_next_seg *) wqe)->ee_nds = 0;\r
+               ((struct mthca_next_seg *) wqe)->flags =\r
+                       ((wr->send_opt & IB_SEND_OPT_SIGNALED) ?\r
+                        cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) |\r
+                       ((wr->send_opt & IB_SEND_OPT_SOLICITED) ?\r
+                        cl_hton32(MTHCA_NEXT_SOLICIT) : 0)   |\r
+                       cl_hton32(1);\r
+               if (opcode == MTHCA_OPCODE_SEND_IMM||\r
+                   opcode == MTHCA_OPCODE_RDMA_WRITE_IMM)\r
+                       ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data;\r
+\r
+               wqe += sizeof (struct mthca_next_seg);\r
+               size = sizeof (struct mthca_next_seg) / 16;\r
+\r
+\r
+               switch (ibqp->qp_type) {\r
+               case IB_QPT_RELIABLE_CONN:\r
+                       switch (opcode) {\r
+                       case MTHCA_OPCODE_ATOMIC_CS:\r
+                       case MTHCA_OPCODE_ATOMIC_FA:\r
+                               ((struct mthca_raddr_seg *) wqe)->raddr =\r
+                                       cl_hton64(wr->remote_ops.vaddr);\r
+                               ((struct mthca_raddr_seg *) wqe)->rkey =\r
+                                       wr->remote_ops.rkey;\r
+                               ((struct mthca_raddr_seg *) wqe)->reserved = 0;\r
+\r
+                               wqe += sizeof (struct mthca_raddr_seg);\r
+\r
+                               if (opcode == MTHCA_OPCODE_ATOMIC_CS) {\r
+                                       ((struct mthca_atomic_seg *) wqe)->swap_add =\r
+                                               cl_hton64(wr->remote_ops.atomic2);\r
+                                       ((struct mthca_atomic_seg *) wqe)->compare =\r
+                                               cl_hton64(wr->remote_ops.atomic1);\r
+                               } else {\r
+                                       ((struct mthca_atomic_seg *) wqe)->swap_add =\r
+                                               cl_hton64(wr->remote_ops.atomic1);\r
+                                       ((struct mthca_atomic_seg *) wqe)->compare = 0;\r
+                               }\r
+\r
+                               wqe += sizeof (struct mthca_atomic_seg);\r
+                               size += (sizeof (struct mthca_raddr_seg) +\r
+                                        sizeof (struct mthca_atomic_seg)) / 16;\r
+                               break;\r
+\r
+                       case MTHCA_OPCODE_RDMA_WRITE:\r
+                       case MTHCA_OPCODE_RDMA_WRITE_IMM:\r
+                       case MTHCA_OPCODE_RDMA_READ:\r
+                               ((struct mthca_raddr_seg *) wqe)->raddr =\r
+                                       cl_hton64(wr->remote_ops.vaddr);\r
+                               ((struct mthca_raddr_seg *) wqe)->rkey =\r
+                                       wr->remote_ops.rkey;\r
+                               ((struct mthca_raddr_seg *) wqe)->reserved = 0;\r
+                               wqe += sizeof (struct mthca_raddr_seg);\r
+                               size += sizeof (struct mthca_raddr_seg) / 16;\r
+                               break;\r
+\r
+                       default:\r
+                               /* No extra segments required for sends */\r
+                               break;\r
+                       }\r
+\r
+                       break;\r
+\r
+               case IB_QPT_UNRELIABLE_CONN:\r
+                       switch (opcode) {\r
+                       case MTHCA_OPCODE_RDMA_WRITE:\r
+                       case MTHCA_OPCODE_RDMA_WRITE_IMM:\r
+                               ((struct mthca_raddr_seg *) wqe)->raddr =\r
+                                       cl_hton64(wr->remote_ops.vaddr);\r
+                               ((struct mthca_raddr_seg *) wqe)->rkey =\r
+                                       wr->remote_ops.rkey;\r
+                               ((struct mthca_raddr_seg *) wqe)->reserved = 0;\r
+                               wqe += sizeof (struct mthca_raddr_seg);\r
+                               size += sizeof (struct mthca_raddr_seg) / 16;\r
+                               break;\r
+\r
+                       default:\r
+                               /* No extra segments required for sends */\r
+                               break;\r
+                       }\r
+\r
+                       break;\r
+\r
+               case IB_QPT_UNRELIABLE_DGRM:\r
+                       {\r
+                               struct mthca_ah *ah = ((struct mthca_ah *)wr->dgrm.ud.h_av);\r
+                               ((struct mthca_tavor_ud_seg *) wqe)->lkey =\r
+                                       cl_hton32(ah->key);\r
+                               ((struct mthca_tavor_ud_seg *) wqe)->av_addr =\r
+                                       cl_hton64((ULONG_PTR)ah->av);\r
+                               ((struct mthca_tavor_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp;\r
+                               ((struct mthca_tavor_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey;\r
+\r
+                               wqe += sizeof (struct mthca_tavor_ud_seg);\r
+                               size += sizeof (struct mthca_tavor_ud_seg) / 16;\r
+                               break;\r
+                       }\r
+\r
+               default:\r
+                       break;\r
+               }\r
+\r
+               if ((int)(int)wr->num_ds > qp->sq.max_gs) {\r
+                       UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x too many gathers\n",ibqp->qp_num));\r
+                       ret = -ERANGE;\r
+                       if (bad_wr)\r
+                               *bad_wr = wr;\r
+                       goto out;\r
+               }\r
+//TODO sleybo:\r
+               if (wr->send_opt & IB_SEND_OPT_INLINE) {\r
+                       if (wr->num_ds) {\r
+                               struct mthca_inline_seg *seg = (struct mthca_inline_seg *)wqe;\r
+                               uint32_t s = 0;\r
+\r
+                               wqe += sizeof *seg;\r
+                               for (i = 0; i < (int)wr->num_ds; ++i) {\r
+                                       struct _ib_local_ds *sge = &wr->ds_array[i];\r
+\r
+                                       s += sge->length;\r
+\r
+                                       if (s > (uint32_t)qp->max_inline_data) {\r
+                                               ret = -1;\r
+                                               if (bad_wr)\r
+                                                       *bad_wr = wr;\r
+                                               goto out;\r
+                                       }\r
+\r
+                                       memcpy(wqe, (void *) (ULONG_PTR) sge->vaddr,\r
+                                              sge->length);\r
+                                       wqe += sge->length;\r
+                               }\r
+\r
+                               seg->byte_count = cl_hton32(MTHCA_INLINE_SEG | s);\r
+                               size += align(s + sizeof *seg, 16) / 16;\r
+                       }\r
+               } else {\r
+                       for (i = 0; i < (int)wr->num_ds; ++i) {\r
+                               ((struct mthca_data_seg *) wqe)->byte_count =\r
+                                       cl_hton32(wr->ds_array[i].length);\r
+                               ((struct mthca_data_seg *) wqe)->lkey =\r
+                                       cl_hton32(wr->ds_array[i].lkey);\r
+                               ((struct mthca_data_seg *) wqe)->addr =\r
+                                       cl_hton64(wr->ds_array[i].vaddr);\r
+                               wqe += sizeof (struct mthca_data_seg);\r
+                               size += sizeof (struct mthca_data_seg) / 16;\r
+                       }\r
+               }\r
+\r
+               qp->wrid[ind + qp->rq.max] = wr->wr_id;\r
+\r
+               ((struct mthca_next_seg *) prev_wqe)->nda_op =\r
+                       cl_hton32(((ind << qp->sq.wqe_shift) +\r
+                       qp->send_wqe_offset) |opcode);\r
+               \r
+               wmb();\r
+               \r
+               ((struct mthca_next_seg *) prev_wqe)->ee_nds =\r
+                       cl_hton32((size0 ? 0 : MTHCA_NEXT_DBD) | size |\r
+                       ((wr->send_opt& IB_SEND_OPT_FENCE) ?\r
+                        MTHCA_NEXT_FENCE : 0));\r
+\r
+               if (!size0) {\r
+                       size0 = size;\r
+                       op0   = opcode;\r
+               }\r
+               \r
+               dump_wqe( TRACE_LEVEL_VERBOSE, (uint32_t*)qp->sq.last,qp);\r
+               \r
+               ++ind;\r
+               if (unlikely(ind >= qp->sq.max))\r
+                       ind -= qp->sq.max;\r
+\r
+       }\r
+\r
+out:\r
+       if (likely(nreq)) {\r
+               uint32_t doorbell[2];\r
+\r
+               doorbell[0] = cl_hton32(((qp->sq.next_ind << qp->sq.wqe_shift) +\r
+                       qp->send_wqe_offset) | f0 | op0);\r
+               doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0);\r
+\r
+               wmb();\r
+\r
+               mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL);\r
+       }\r
+\r
+       qp->sq.next_ind = ind;\r
+       qp->sq.head    += nreq;\r
+\r
+err_busy:\r
+       cl_spinlock_release(&qp->sq.lock);\r
+       \r
+       UVP_EXIT(UVP_DBG_QP);\r
+       return ret;\r
+}\r
+\r
+\r
+int mthca_tavor_post_recv(struct ibv_qp *ibqp, struct _ib_recv_wr *wr,\r
+                         struct _ib_recv_wr **bad_wr)\r
+{\r
+       struct mthca_qp *qp = to_mqp(ibqp);\r
+       uint32_t doorbell[2];\r
+       int ret = 0;\r
+       int nreq;\r
+       int i;\r
+       int size;\r
+       int size0 = 0;\r
+       int ind;\r
+       uint8_t *wqe;\r
+       uint8_t *prev_wqe;\r
+       \r
+       UVP_ENTER(UVP_DBG_QP);\r
+       \r
+       cl_spinlock_acquire(&qp->rq.lock);\r
+\r
+       /* XXX check that state is OK to post receive */\r
+       \r
+       ind = qp->rq.next_ind;\r
+       if(ibqp->state == IBV_QPS_RESET) {\r
+               ret = -EBUSY;\r
+               if (bad_wr)\r
+                       *bad_wr = wr;\r
+               goto err_busy;\r
+       }\r
+       \r
+       for (nreq = 0; wr; ++nreq, wr = wr->p_next) {\r
+               if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {\r
+                       nreq = 0;\r
+\r
+                       doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);\r
+                       doorbell[1] = cl_hton32(ibqp->qp_num << 8); //TODO sleybo: add qpn to qp struct \r
+\r
+                       /*\r
+                        * Make sure that descriptors are written\r
+                        * before doorbell is rung.\r
+                        */\r
+                       mb();\r
+\r
+                       mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_RECV_DOORBELL);\r
+\r
+                       qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB;\r
+                       size0 = 0;\r
+               }\r
+\r
+               if (mthca_wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {\r
+                       UVP_PRINT(TRACE_LEVEL_ERROR,UVP_DBG_QP,("RQ %06x full (%u head, %u tail,"\r
+                                       " %d max, %d nreq)\n", ibqp->qp_num,\r
+                                       qp->rq.head, qp->rq.tail,\r
+                                       qp->rq.max, nreq));\r
+                       ret = -ENOMEM;\r
+                       if (bad_wr)\r
+                               *bad_wr = wr;\r
+                       goto out;\r
+               }\r
+\r
+               wqe = get_recv_wqe(qp, ind);\r
+               prev_wqe = qp->rq.last;\r
+               qp->rq.last = wqe;\r
+\r
+               ((struct mthca_next_seg *) wqe)->nda_op = 0;\r
+               ((struct mthca_next_seg *) wqe)->ee_nds =\r
+                       cl_hton32(MTHCA_NEXT_DBD);\r
+               ((struct mthca_next_seg *) wqe)->flags =\r
+                       cl_hton32(MTHCA_NEXT_CQ_UPDATE);\r
+\r
+               wqe += sizeof (struct mthca_next_seg);\r
+               size = sizeof (struct mthca_next_seg) / 16;\r
+\r
+               if (unlikely((int)wr->num_ds  > qp->rq.max_gs)) {\r
+                       UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("RQ %06x too many gathers\n",ibqp->qp_num));\r
+                       ret = -ERANGE;\r
+                       if (bad_wr)\r
+                               *bad_wr = wr;\r
+                       goto out;\r
+               }\r
+\r
+               for (i = 0; i < (int)wr->num_ds; ++i) {\r
+                       ((struct mthca_data_seg *) wqe)->byte_count =\r
+                               cl_hton32(wr->ds_array[i].length);\r
+                       ((struct mthca_data_seg *) wqe)->lkey =\r
+                               cl_hton32(wr->ds_array[i].lkey);\r
+                       ((struct mthca_data_seg *) wqe)->addr =\r
+                               cl_hton64(wr->ds_array[i].vaddr);\r
+                       wqe += sizeof (struct mthca_data_seg);\r
+                       size += sizeof (struct mthca_data_seg) / 16;\r
+               }\r
+\r
+               qp->wrid[ind] = wr->wr_id;\r
+\r
+               ((struct mthca_next_seg *) prev_wqe)->nda_op =\r
+                       cl_hton32((ind << qp->rq.wqe_shift) | 1);\r
+               wmb();\r
+               ((struct mthca_next_seg *) prev_wqe)->ee_nds =\r
+                       cl_hton32(MTHCA_NEXT_DBD | size);\r
+\r
+               if (!size0)\r
+                       size0 = size;\r
+\r
+               ++ind;\r
+               if (unlikely(ind >= qp->rq.max))\r
+                       ind -= qp->rq.max;\r
+       }\r
+\r
+out:\r
+       if (likely(nreq)) {\r
+               doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);\r
+               doorbell[1] = cl_hton32((ibqp->qp_num << 8) | (nreq & 255));\r
+\r
+               /*\r
+                * Make sure that descriptors are written before\r
+                * doorbell is rung.\r
+                */\r
+               mb();\r
+\r
+               mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_RECV_DOORBELL);\r
+       }\r
+\r
+       qp->rq.next_ind = ind;\r
+       qp->rq.head    += nreq;\r
+\r
+err_busy:\r
+       cl_spinlock_release(&qp->rq.lock);\r
+       UVP_EXIT(UVP_DBG_QP);\r
+       return ret;\r
+}\r
+\r
+int mthca_arbel_post_send(struct ibv_qp *ibqp, struct _ib_send_wr *wr,\r
+                         struct _ib_send_wr **bad_wr)\r
+{\r
+       struct mthca_qp *qp = to_mqp(ibqp);\r
+       uint32_t doorbell[2];\r
+       uint8_t *wqe;\r
+       uint8_t *prev_wqe;\r
+       int ret = 0;\r
+       int nreq;       \r
+       int i;\r
+       int size;\r
+       int size0 = 0;\r
+       uint32_t f0 = unlikely(wr->send_opt & IB_SEND_OPT_FENCE) ? MTHCA_SEND_DOORBELL_FENCE : 0;\r
+       int ind;\r
+       uint8_t op0 = 0;\r
+       enum ib_wr_opcode opcode;\r
+       \r
+       UVP_ENTER(UVP_DBG_QP);\r
+       \r
+       cl_spinlock_acquire(&qp->sq.lock);\r
+\r
+       /* XXX check that state is OK to post send */\r
+\r
+       ind = qp->sq.head & (qp->sq.max - 1);\r
+       if(ibqp->state == IBV_QPS_RESET) {\r
+               ret = -EBUSY;\r
+               if (bad_wr)\r
+                       *bad_wr = wr;\r
+               goto err_busy;\r
+       }\r
+\r
+       for (nreq = 0; wr; ++nreq, wr = wr->p_next) {\r
+               if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) {\r
+                       nreq = 0;\r
+\r
+                       doorbell[0] = cl_hton32((MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) |\r
+                                           ((qp->sq.head & 0xffff) << 8) | f0 | op0);\r
+                       doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0);\r
+                       qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB;\r
+                       size0 = 0;\r
+                       f0 = unlikely(wr->send_opt & IB_SEND_OPT_FENCE) ? MTHCA_SEND_DOORBELL_FENCE : 0;\r
+\r
+                       /*\r
+                        * Make sure that descriptors are written before\r
+                        * doorbell record.\r
+                        */\r
+                       wmb();\r
+                       *qp->sq.db = cl_hton32(qp->sq.head & 0xffff);\r
+\r
+                       /*\r
+                        * Make sure doorbell record is written before we\r
+                        * write MMIO send doorbell.\r
+                        */\r
+                       wmb();\r
+                       mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL);\r
+\r
+               }\r
+\r
+               if (mthca_wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {\r
+                       UVP_PRINT(TRACE_LEVEL_ERROR,UVP_DBG_QP,("SQ %06x full (%u head, %u tail,"\r
+                                       " %d max, %d nreq)\n", ibqp->qp_num,\r
+                                       qp->sq.head, qp->sq.tail,\r
+                                       qp->sq.max, nreq));                     \r
+                       ret = -ENOMEM;\r
+                       if (bad_wr)\r
+                               *bad_wr = wr;\r
+                       goto out;\r
+               }\r
+\r
+               wqe = get_send_wqe(qp, ind);\r
+               prev_wqe = qp->sq.last;\r
+               qp->sq.last = wqe;\r
+               opcode = conv_ibal_wr_opcode(wr);\r
+\r
+               ((struct mthca_next_seg *) wqe)->flags =\r
+                       ((wr->send_opt & IB_SEND_OPT_SIGNALED) ?\r
+                        cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) |\r
+                       ((wr->send_opt & IB_SEND_OPT_SOLICITED) ?\r
+                        cl_hton32(MTHCA_NEXT_SOLICIT) : 0)   |\r
+                       cl_hton32(1);\r
+               if (opcode == MTHCA_OPCODE_SEND_IMM||\r
+                       opcode == MTHCA_OPCODE_RDMA_WRITE_IMM)\r
+                       ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data;\r
+\r
+               wqe += sizeof (struct mthca_next_seg);\r
+               size = sizeof (struct mthca_next_seg) / 16;\r
+\r
+               switch (ibqp->qp_type) {\r
+               case IB_QPT_RELIABLE_CONN:\r
+                       switch (opcode) {\r
+                       case MTHCA_OPCODE_ATOMIC_CS:\r
+                       case MTHCA_OPCODE_ATOMIC_FA:\r
+                               ((struct mthca_raddr_seg *) wqe)->raddr =\r
+                                       cl_hton64(wr->remote_ops.vaddr);\r
+                               ((struct mthca_raddr_seg *) wqe)->rkey =\r
+                                       wr->remote_ops.rkey;\r
+                               ((struct mthca_raddr_seg *) wqe)->reserved = 0;\r
+\r
+                               wqe += sizeof (struct mthca_raddr_seg);\r
+\r
+                               if (opcode == MTHCA_OPCODE_ATOMIC_CS) {\r
+                                       ((struct mthca_atomic_seg *) wqe)->swap_add =\r
+                                               cl_hton64(wr->remote_ops.atomic2);\r
+                                       ((struct mthca_atomic_seg *) wqe)->compare =\r
+                                               cl_hton64(wr->remote_ops.atomic1);\r
+                               } else {\r
+                                       ((struct mthca_atomic_seg *) wqe)->swap_add =\r
+                                               cl_hton64(wr->remote_ops.atomic1);\r
+                                       ((struct mthca_atomic_seg *) wqe)->compare = 0;\r
+                               }\r
+\r
+                               wqe += sizeof (struct mthca_atomic_seg);\r
+                               size += (sizeof (struct mthca_raddr_seg) +\r
+                                        sizeof (struct mthca_atomic_seg)) / 16;\r
+                               break;\r
+\r
+                       case MTHCA_OPCODE_RDMA_READ:\r
+                       case MTHCA_OPCODE_RDMA_WRITE:\r
+                       case MTHCA_OPCODE_RDMA_WRITE_IMM:\r
+                               ((struct mthca_raddr_seg *) wqe)->raddr =\r
+                                       cl_hton64(wr->remote_ops.vaddr);\r
+                               ((struct mthca_raddr_seg *) wqe)->rkey =\r
+                                       wr->remote_ops.rkey;\r
+                               ((struct mthca_raddr_seg *) wqe)->reserved = 0;\r
+                               wqe += sizeof (struct mthca_raddr_seg);\r
+                               size += sizeof (struct mthca_raddr_seg) / 16;\r
+                               break;\r
+\r
+                       default:\r
+                               /* No extra segments required for sends */\r
+                               break;\r
+                       }\r
+\r
+                       break;\r
+\r
+               case IB_QPT_UNRELIABLE_CONN:\r
+                       switch (opcode) {\r
+                       case MTHCA_OPCODE_RDMA_WRITE:\r
+                       case MTHCA_OPCODE_RDMA_WRITE_IMM:\r
+                               ((struct mthca_raddr_seg *) wqe)->raddr =\r
+                                       cl_hton64(wr->remote_ops.vaddr);\r
+                               ((struct mthca_raddr_seg *) wqe)->rkey =\r
+                                       wr->remote_ops.rkey;\r
+                               ((struct mthca_raddr_seg *) wqe)->reserved = 0;\r
+                               wqe += sizeof (struct mthca_raddr_seg);\r
+                               size += sizeof (struct mthca_raddr_seg) / 16;\r
+                               break;\r
+\r
+                       default:\r
+                               /* No extra segments required for sends */\r
+                               break;\r
+                       }\r
+\r
+                       break;\r
+\r
+               case IB_QPT_UNRELIABLE_DGRM:\r
+                       {\r
+                               struct mthca_ah *ah = ((struct mthca_ah *)wr->dgrm.ud.h_av);\r
+                               memcpy(((struct mthca_arbel_ud_seg *) wqe)->av,\r
+                                      ah->av, sizeof ( struct mthca_av));\r
+                               ((struct mthca_arbel_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp;\r
+                               ((struct mthca_arbel_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey;\r
+\r
+\r
+                               wqe += sizeof (struct mthca_arbel_ud_seg);\r
+                               size += sizeof (struct mthca_arbel_ud_seg) / 16;\r
+                               break;\r
+                       }\r
+\r
+               default:\r
+                       break;\r
+               }\r
+\r
+               if ((int)wr->num_ds > qp->sq.max_gs) {\r
+                       UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x full too many gathers\n",ibqp->qp_num));\r
+                       ret = -ERANGE;\r
+                       if (bad_wr)\r
+                               *bad_wr = wr;\r
+                       goto out;\r
+               }\r
+\r
+               if (wr->send_opt & IB_SEND_OPT_INLINE) {\r
+                       if (wr->num_ds) {\r
+                               struct mthca_inline_seg *seg = (struct mthca_inline_seg *)wqe;\r
+                               uint32_t s = 0;\r
+\r
+                               wqe += sizeof *seg;\r
+                               for (i = 0; i < (int)wr->num_ds; ++i) {\r
+                                       struct _ib_local_ds *sge = &wr->ds_array[i];\r
+\r
+                                       s += sge->length;\r
+\r
+                                       if (s > (uint32_t)qp->max_inline_data) {\r
+                                               ret = -E2BIG;\r
+                                               if (bad_wr)\r
+                                                       *bad_wr = wr;\r
+                                               goto out;\r
+                                       }\r
+\r
+                                       memcpy(wqe, (void *) (uintptr_t) sge->vaddr,\r
+                                              sge->length);\r
+                                       wqe += sge->length;\r
+                               }\r
+\r
+                               seg->byte_count = cl_hton32(MTHCA_INLINE_SEG | s);\r
+                               size += align(s + sizeof *seg, 16) / 16;\r
+                       }\r
+               } else {\r
+\r
+                       for (i = 0; i < (int)wr->num_ds; ++i) {\r
+                               ((struct mthca_data_seg *) wqe)->byte_count =\r
+                                       cl_hton32(wr->ds_array[i].length);\r
+                               ((struct mthca_data_seg *) wqe)->lkey =\r
+                                       cl_hton32(wr->ds_array[i].lkey);\r
+                               ((struct mthca_data_seg *) wqe)->addr =\r
+                                       cl_hton64(wr->ds_array[i].vaddr);\r
+                               wqe += sizeof (struct mthca_data_seg);\r
+                               size += sizeof (struct mthca_data_seg) / 16;\r
+                       }\r
+//TODO do this also in kernel\r
+//                     size += wr->num_ds * (sizeof *seg / 16);\r
+               }\r
+\r
+                       qp->wrid[ind + qp->rq.max] = wr->wr_id;\r
+\r
+               if (opcode == MTHCA_OPCODE_INVALID) {\r
+                       UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x opcode invalid\n",ibqp->qp_num));\r
+                       ret = -EINVAL;\r
+                       if (bad_wr)\r
+                               *bad_wr = wr;\r
+                       goto out;\r
+               }\r
+\r
+               ((struct mthca_next_seg *) prev_wqe)->nda_op =\r
+                       cl_hton32(((ind << qp->sq.wqe_shift) +\r
+                              qp->send_wqe_offset) |\r
+                             opcode);\r
+               wmb();\r
+               ((struct mthca_next_seg *) prev_wqe)->ee_nds =\r
+                       cl_hton32(MTHCA_NEXT_DBD | size |\r
+                         ((wr->send_opt & IB_SEND_OPT_FENCE) ?\r
+                                                  MTHCA_NEXT_FENCE : 0));\r
+\r
+               if (!size0) {\r
+                       size0 = size;\r
+                       op0   = opcode;\r
+               }\r
+\r
+               ++ind;\r
+               if (unlikely(ind >= qp->sq.max))\r
+                       ind -= qp->sq.max;\r
+       }\r
+\r
+out:\r
+       if (likely(nreq)) {\r
+               doorbell[0] = cl_hton32((nreq << 24) |\r
+                                   ((qp->sq.head & 0xffff) << 8) | f0 | op0);\r
+               doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0);\r
+\r
+               qp->sq.head += nreq;\r
+\r
+               /*\r
+                * Make sure that descriptors are written before\r
+                * doorbell record.\r
+                */\r
+               wmb();\r
+               *qp->sq.db = cl_hton32(qp->sq.head & 0xffff);\r
+\r
+               /*\r
+                * Make sure doorbell record is written before we\r
+                * write MMIO send doorbell.\r
+                */\r
+               wmb();\r
+               mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL);\r
+       }\r
+\r
+err_busy:\r
+       cl_spinlock_release(&qp->sq.lock);\r
+\r
+       UVP_EXIT(UVP_DBG_QP);\r
+       \r
+       return ret;\r
+}\r
+\r
+int mthca_arbel_post_recv(struct ibv_qp *ibqp, struct _ib_recv_wr *wr,\r
+                         struct _ib_recv_wr **bad_wr)\r
+{\r
+       struct mthca_qp *qp = to_mqp(ibqp);\r
+       int ret = 0;\r
+       int nreq;\r
+       int ind;\r
+       int i;\r
+       uint8_t *wqe;\r
+       \r
+       UVP_ENTER(UVP_DBG_QP);\r
+       \r
+       cl_spinlock_acquire(&qp->rq.lock);\r
+\r
+       /* XXX check that state is OK to post receive */\r
+\r
+       ind = qp->rq.head & (qp->rq.max - 1);\r
+       if(ibqp->state == IBV_QPS_RESET) {\r
+               ret = -EBUSY;\r
+               if (bad_wr)\r
+                       *bad_wr = wr;\r
+               goto err_busy;\r
+       }\r
+       for (nreq = 0; wr; ++nreq, wr = wr->p_next) {\r
+               if (mthca_wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {//TODO sleybo: check the cq\r
+                       UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("RQ %06x full (%u head, %u tail,"\r
+                                       " %d max, %d nreq)\n", ibqp->qp_num,\r
+                                       qp->rq.head, qp->rq.tail,\r
+                                       qp->rq.max, nreq));\r
+                       ret = -ENOMEM;\r
+                       if (bad_wr)\r
+                               *bad_wr = wr;\r
+                       goto out;\r
+               }\r
+\r
+               wqe = get_recv_wqe(qp, ind);\r
+\r
+               ((struct mthca_next_seg *) wqe)->flags = 0;\r
+\r
+               wqe += sizeof (struct mthca_next_seg);\r
+\r
+               if (unlikely((int)wr->num_ds > qp->rq.max_gs)) {\r
+                       UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("RQ %06x full too many scatter\n",ibqp->qp_num));\r
+                       ret = -ERANGE;\r
+                       if (bad_wr)\r
+                               *bad_wr = wr;\r
+                       goto out;\r
+               }\r
+\r
+               for (i = 0; i < (int)wr->num_ds; ++i) {\r
+                       ((struct mthca_data_seg *) wqe)->byte_count =\r
+                               cl_hton32(wr->ds_array[i].length);\r
+                       ((struct mthca_data_seg *) wqe)->lkey =\r
+                               cl_hton32(wr->ds_array[i].lkey);\r
+                       ((struct mthca_data_seg *) wqe)->addr =\r
+                               cl_hton64(wr->ds_array[i].vaddr);\r
+                       wqe += sizeof (struct mthca_data_seg);\r
+               }\r
+\r
+               if (i < qp->rq.max_gs) {\r
+                       ((struct mthca_data_seg *) wqe)->byte_count = 0;\r
+                       ((struct mthca_data_seg *) wqe)->lkey = cl_hton32(MTHCA_INVAL_LKEY);\r
+                       ((struct mthca_data_seg *) wqe)->addr = 0;\r
+               }\r
+\r
+                       qp->wrid[ind] = wr->wr_id;\r
+\r
+               ++ind;\r
+               if (unlikely(ind >= qp->rq.max))\r
+                       ind -= qp->rq.max;\r
+       }\r
+out:\r
+       if (likely(nreq)) {\r
+               qp->rq.head += nreq;\r
+\r
+               /*\r
+                * Make sure that descriptors are written before\r
+                * doorbell record.\r
+                */\r
+               mb();\r
+               *qp->rq.db = cl_hton32(qp->rq.head & 0xffff);\r
+       }\r
+\r
+err_busy:\r
+       cl_spinlock_release(&qp->rq.lock);\r
+       \r
+       UVP_EXIT(UVP_DBG_QP);\r
+       \r
+       return ret;\r
+}\r
+\r
+int mthca_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,\r
+                      ib_qp_type_t type, struct mthca_qp *qp)\r
+{\r
+       int size;\r
+       int max_sq_sge;\r
+\r
+       qp->rq.max_gs    = cap->max_recv_sge;\r
+       qp->sq.max_gs    = cap->max_send_sge;\r
+       max_sq_sge       = align(cap->max_inline_data + sizeof (struct mthca_inline_seg),\r
+                                sizeof (struct mthca_data_seg)) / sizeof (struct mthca_data_seg);\r
+       if (max_sq_sge < (int)cap->max_send_sge)\r
+               max_sq_sge = cap->max_send_sge;\r
+\r
+       qp->wrid = cl_malloc((qp->rq.max + qp->sq.max) * sizeof (uint64_t));\r
+       if (!qp->wrid)\r
+               return -1;\r
+\r
+       size = sizeof (struct mthca_next_seg) +\r
+               qp->rq.max_gs * sizeof (struct mthca_data_seg);\r
+\r
+       for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size;\r
+            qp->rq.wqe_shift++)\r
+               ; /* nothing */\r
+\r
+       size = max_sq_sge * sizeof (struct mthca_data_seg);\r
+       switch (type) {\r
+       case IB_QPT_UNRELIABLE_DGRM:\r
+               size += mthca_is_memfree(pd->context) ?\r
+                       sizeof (struct mthca_arbel_ud_seg) :\r
+                       sizeof (struct mthca_tavor_ud_seg);\r
+               break;\r
+\r
+       case IB_QPT_UNRELIABLE_CONN:\r
+               size += sizeof (struct mthca_raddr_seg);\r
+               break;\r
+\r
+       case IB_QPT_RELIABLE_CONN:\r
+               size += sizeof (struct mthca_raddr_seg);\r
+               /*\r
+                * An atomic op will require an atomic segment, a\r
+                * remote address segment and one scatter entry.\r
+                */\r
+               if (size < (sizeof (struct mthca_atomic_seg) +\r
+                           sizeof (struct mthca_raddr_seg) +\r
+                           sizeof (struct mthca_data_seg)))\r
+                       size = (sizeof (struct mthca_atomic_seg) +\r
+                               sizeof (struct mthca_raddr_seg) +\r
+                               sizeof (struct mthca_data_seg));\r
+               break;\r
+\r
+       default:\r
+               break;\r
+       }\r
+\r
+       /* Make sure that we have enough space for a bind request */\r
+       if (size < sizeof (struct mthca_bind_seg))\r
+               size = sizeof (struct mthca_bind_seg);\r
+\r
+       size += sizeof (struct mthca_next_seg);\r
+\r
+       for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;\r
+               qp->sq.wqe_shift++)\r
+               ; /* nothing */\r
+\r
+               qp->send_wqe_offset = align(qp->rq.max << qp->rq.wqe_shift,\r
+                       1 << qp->sq.wqe_shift);\r
+\r
+               qp->buf_size = qp->send_wqe_offset + (qp->sq.max << qp->sq.wqe_shift);\r
+\r
+       if (posix_memalign(&qp->buf, g_page_size,\r
+               align(qp->buf_size, g_page_size))) {\r
+               cl_free(qp->wrid);\r
+               return -1;\r
+       }\r
+\r
+       memset(qp->buf, 0, qp->buf_size);\r
+\r
+       if (mthca_is_memfree(pd->context)) {\r
+               struct mthca_next_seg *next;\r
+               struct mthca_data_seg *scatter;\r
+               int i;\r
+               uint32_t sz;\r
+\r
+               sz = cl_hton32((sizeof (struct mthca_next_seg) +\r
+                           qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16);\r
+\r
+               for (i = 0; i < qp->rq.max; ++i) {\r
+                       next = get_recv_wqe(qp, i);\r
+                       next->nda_op = cl_hton32(((i + 1) & (qp->rq.max - 1)) <<\r
+                                            qp->rq.wqe_shift);\r
+                       next->ee_nds = sz;\r
+\r
+                       for (scatter = (void *) (next + 1);\r
+                            (void *) scatter < (void *) ((char *)next + (uint32_t)(1 << qp->rq.wqe_shift));\r
+                            ++scatter)\r
+                               scatter->lkey = cl_hton32(MTHCA_INVAL_LKEY);\r
+               }\r
+\r
+               for (i = 0; i < qp->sq.max; ++i) {\r
+                       next = get_send_wqe(qp, i);\r
+                       next->nda_op = cl_hton32((((i + 1) & (qp->sq.max - 1)) <<\r
+                                             qp->sq.wqe_shift) +\r
+                                            qp->send_wqe_offset);\r
+               }\r
+       }\r
+\r
+       qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);\r
+       qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);\r
+\r
+       return 0;\r
+}\r
+\r
+struct mthca_qp *mthca_find_qp(struct mthca_context *ctx, uint32_t qpn)\r
+{\r
+       int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;\r
+\r
+       if (ctx->qp_table[tind].refcnt)\r
+               return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask];\r
+       else\r
+               return NULL;\r
+}\r
+\r
+int mthca_store_qp(struct mthca_context *ctx, uint32_t qpn, struct mthca_qp *qp)\r
+{\r
+       int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;\r
+       int ret = 0;\r
+\r
+       WaitForSingleObject( ctx->qp_table_mutex, INFINITE );\r
+\r
+       if (!ctx->qp_table[tind].refcnt) {\r
+               ctx->qp_table[tind].table = cl_malloc(\r
+                       (ctx->qp_table_mask + 1) * sizeof (struct mthca_qp *));\r
+               if (!ctx->qp_table[tind].table) {\r
+                       ret = -1;\r
+                       goto out;\r
+               }\r
+       }\r
+       ++ctx->qp_table[tind].refcnt;\r
+       ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = qp;\r
+\r
+out:\r
+       ReleaseMutex( ctx->qp_table_mutex );\r
+       return ret;\r
+}\r
+\r
+void mthca_clear_qp(struct mthca_context *ctx, uint32_t qpn)\r
+{\r
+       int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;\r
+\r
+       WaitForSingleObject( ctx->qp_table_mutex, INFINITE );\r
+\r
+       if (!--ctx->qp_table[tind].refcnt)\r
+               cl_free(ctx->qp_table[tind].table);\r
+       else\r
+               ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = NULL;\r
+       \r
+       ReleaseMutex( ctx->qp_table_mutex );\r
+}\r
+\r
+int mthca_free_err_wqe(struct mthca_qp *qp, int is_send,\r
+                      int index, int *dbd, uint32_t *new_wqe)\r
+{\r
+       struct mthca_next_seg *next;\r
+\r
+       /*\r
+        * For SRQs, all WQEs generate a CQE, so we're always at the\r
+        * end of the doorbell chain.\r
+        */\r
+       if (qp->ibv_qp.srq) {\r
+               *new_wqe = 0;\r
+               return 0;\r
+       }\r
+\r
+       if (is_send)\r
+               next = get_send_wqe(qp, index);\r
+       else\r
+               next = get_recv_wqe(qp, index);\r
+\r
+       *dbd = !!(next->ee_nds & cl_hton32(MTHCA_NEXT_DBD));\r
+       if (next->ee_nds & cl_hton32(0x3f))\r
+               *new_wqe = (next->nda_op & cl_hton32(~0x3f)) |\r
+                       (next->ee_nds & cl_hton32(0x3f));\r
+       else\r
+               *new_wqe = 0;\r
+\r
+       return 0;\r
+}\r
+\r
index 608a866..bef60aa 100644 (file)
@@ -1,5 +1,6 @@
 /*\r
  * Copyright (c) 2005 SilverStorm Technologies.  All rights reserved.\r
+ * Portions Copyright (c) 2008 Microsoft Corporation.  All rights reserved.\r
  *\r
  * This software is available to you under the OpenIB.org BSD license\r
  * below:\r
@@ -61,6 +62,11 @@ extern "C"
  */\r
 #pragma warning( disable:4232 )\r
 \r
+/*\r
+ * Enable warnings about pointer sign extension.\r
+ */\r
+#pragma warning( default:4826 )\r
+\r
 /* For DECLSPEC_EXPORT and DECLSPEC_IMPORT */\r
 #include <ntdef.h>\r
 \r
index 16e9104..7a6407b 100644 (file)
@@ -1,5 +1,6 @@
 /*\r
  * Copyright (c) 2005 SilverStorm Technologies.  All rights reserved.\r
+ * Portions Copyright (c) 2008 Microsoft Corporation.  All rights reserved.\r
  *\r
  * This software is available to you under the OpenIB.org BSD license\r
  * below:\r
        #include <windows.h>\r
 #endif // !defined( _WINDOWS_ )\r
 \r
+/*\r
+ * Enable warnings about pointer sign extension.\r
+ */\r
+#pragma warning( default:4826 )\r
+\r
 #if defined( _DEBUG ) || DBG\r
        #define _DEBUG_\r
 #else\r
index 7be9502..4da5a3f 100644 (file)
@@ -1,5 +1,6 @@
 /*\r
  * Copyright (c) 2007 QLogic Corporation.  All rights reserved.\r
+ * Portions Copyright (c) 2008 Microsoft Corporation.  All rights reserved.\r
  *\r
  * This software is available to you under the OpenIB.org BSD license\r
  * below:\r
@@ -979,7 +980,7 @@ __vnic_pnp_cb(
        vnic_adapter_t                  *p_primary_adapter;     \r
 #endif\r
 \r
-       vnic_adapter_t * __ptr64 p_adapter = (vnic_adapter_t * __ptr64)p_pnp_rec->pnp_context;\r
+       vnic_adapter_t * p_adapter = (vnic_adapter_t *)p_pnp_rec->pnp_context;\r
        \r
        VNIC_ENTER( VNIC_DBG_PNP );\r
 \r
index c8967a8..5d7bce8 100644 (file)
@@ -1,5 +1,6 @@
 /*\r
  * Copyright (c) 2007 QLogic Corporation.  All rights reserved.\r
+ * Portions Copyright (c) 2008 Microsoft Corporation.  All rights reserved.\r
  *\r
  * This software is available to you under the OpenIB.org BSD license\r
  * below:\r
@@ -186,12 +187,12 @@ control_init(
        pIo->wrq.p_next                 = NULL;\r
        pIo->wrq.wr_type                = WR_SEND;\r
        pIo->wrq.send_opt               = IB_SEND_OPT_SIGNALED;\r
-       pIo->wrq.wr_id                  = (uint64_t)(pIo);\r
+       pIo->wrq.wr_id                  = (ULONG_PTR)pIo;\r
        pIo->wrq.num_ds                 = 1;\r
        pIo->wrq.ds_array               = &pControl->sendIo.dsList;\r
        pIo->wrq.ds_array[0].length     = sizeof(Inic_ControlPacket_t);\r
        pIo->wrq.ds_array[0].lkey  = pControl->region.lkey;\r
-       pIo->wrq.ds_array[0].vaddr = (uint64_t)(pkt++);\r
+       pIo->wrq.ds_array[0].vaddr = (ULONG_PTR)pkt++;\r
 \r
        for (i = 0; i < pConfig->numRecvs; i++ )\r
        {\r
@@ -199,12 +200,12 @@ control_init(
                pIo->pViport                    = pViport;\r
                pIo->pRoutine                   = control_recvComplete;\r
 \r
-               pIo->r_wrq.wr_id                                = (uint64_t)(pIo);\r
+               pIo->r_wrq.wr_id                                = (ULONG_PTR)pIo;\r
                pIo->r_wrq.p_next                               = NULL;\r
                pIo->r_wrq.num_ds                               = 1;\r
                pIo->r_wrq.ds_array                             = &pControl->pRecvIos[i].dsList;\r
                pIo->r_wrq.ds_array[0].length   = sizeof(Inic_ControlPacket_t);\r
-               pIo->r_wrq.ds_array[0].vaddr    = (uint64_t)(pkt++);\r
+               pIo->r_wrq.ds_array[0].vaddr    = (ULONG_PTR)pkt++;\r
                pIo->r_wrq.ds_array[0].lkey             = pControl->region.lkey;\r
        \r
                if ( ibqp_postRecv( &pControl->qp, pIo ) != IB_SUCCESS )\r
index 14733d4..09f1ca5 100644 (file)
@@ -1,5 +1,6 @@
 /*\r
  * Copyright (c) 2007 QLogic Corporation.  All rights reserved.\r
+ * Portions Copyright (c) 2008 Microsoft Corporation.  All rights reserved.\r
  *\r
  * This software is available to you under the OpenIB.org BSD license\r
  * below:\r
@@ -88,8 +89,6 @@ static void
 _data_kickTimer_stop(\r
                  IN    Data_t          *pData );\r
 \r
-#define LOCAL_IO(x) PTR64((x))\r
-\r
 #define INBOUND_COPY\r
 \r
 #ifdef VNIC_STATISTIC\r
@@ -254,7 +253,7 @@ data_connect(
        pRdmaIo->io.pRoutine         = NULL;\r
        pRdmaIo->io.wrq.p_next       = NULL;\r
        pRdmaIo->io.wrq.wr_type      = WR_RDMA_WRITE;\r
-       pRdmaIo->io.wrq.wr_id        = PTR64( pRdmaIo );\r
+       pRdmaIo->io.wrq.wr_id        = (ULONG_PTR)pRdmaIo;\r
        pRdmaIo->io.wrq.num_ds       = 1;\r
        pRdmaIo->io.wrq.ds_array     = pRdmaIo->dsList;\r
        pRdmaIo->dsList[0].lkey      = pData->region.lkey;\r
@@ -265,26 +264,26 @@ data_connect(
        pSendIo->io.pRoutine         = NULL;\r
        pSendIo->io.wrq.p_next       = NULL;\r
        pSendIo->io.wrq.wr_type      = WR_SEND;\r
-       pSendIo->io.wrq.wr_id        = PTR64( pSendIo );\r
+       pSendIo->io.wrq.wr_id        = (ULONG_PTR)pSendIo;\r
        pSendIo->io.wrq.num_ds       = 1;\r
        pSendIo->io.wrq.ds_array     = &pSendIo->dsList;\r
 \r
        pSendIo->io.wrq.send_opt     = IB_SEND_OPT_SIGNALED;\r
 \r
        pSendIo->dsList.length       = 0;\r
-       pSendIo->dsList.vaddr        = PTR64( pRegionData );\r
+       pSendIo->dsList.vaddr        = (ULONG_PTR)pRegionData;\r
        pSendIo->dsList.lkey         = pData->region.lkey;\r
 \r
        for ( i = 0; i < pData->p_conf->numRecvs; i++ )\r
        {\r
                pRecvIo[i].io.pViport         = pData->p_viport;\r
                pRecvIo[i].io.pRoutine        = _data_receivedKick;\r
-               pRecvIo[i].io.r_wrq.wr_id     = PTR64( &pRecvIo[i].io );\r
+               pRecvIo[i].io.r_wrq.wr_id     = (ULONG_PTR)&pRecvIo[i].io;\r
                pRecvIo[i].io.r_wrq.p_next    = NULL;\r
                pRecvIo[i].io.r_wrq.num_ds    = 1;\r
                pRecvIo[i].io.r_wrq.ds_array  = &pRecvIo[i].dsList;\r
                pRecvIo[i].dsList.length      = 4;\r
-               pRecvIo[i].dsList.vaddr       = PTR64( pRegionData );\r
+               pRecvIo[i].dsList.vaddr       = (ULONG_PTR)pRegionData;\r
                pRecvIo[i].dsList.lkey        = pData->region.lkey;\r
                \r
                InitializeListHead( &pRecvIo[i].io.listPtrs );\r
@@ -370,7 +369,7 @@ data_connect(
                pRdmaIo->io.pRoutine         = _data_xmitComplete;\r
                pRdmaIo->io.wrq.p_next       = NULL;\r
                pRdmaIo->io.wrq.wr_type      = WR_RDMA_WRITE;\r
-               pRdmaIo->io.wrq.wr_id        = PTR64(pRdmaIo);\r
+               pRdmaIo->io.wrq.wr_id        = (ULONG_PTR)pRdmaIo;\r
                pRdmaIo->io.wrq.num_ds       = MAX_NUM_SGE; // will set actual number when transmit\r
                pRdmaIo->io.wrq.ds_array     = pRdmaIo->dsList;\r
                pRdmaIo->p_trailer                      =  (ViportTrailer_t *)&pRdmaIo->data[0];\r
@@ -381,7 +380,7 @@ data_connect(
        }\r
 \r
        pXmitPool->rdmaRKey      = pData->region.rkey;\r
-       pXmitPool->rdmaAddr      = PTR64( pXmitPool->bufPool );\r
+       pXmitPool->rdmaAddr      = (ULONG_PTR)pXmitPool->bufPool;\r
 \r
        data_postRecvs( pData );\r
 \r
@@ -1184,7 +1183,7 @@ _data_addFreeBuffer(
        pBpe = &p_recvPool->bufPool[index];\r
 \r
        pBpe->rKey       = pRdmaDest->region.rkey;\r
-       pBpe->remoteAddr = hton64( PTR64( pRdmaDest->data ) );\r
+       pBpe->remoteAddr = hton64( (ULONG_PTR)pRdmaDest->data );\r
        pBpe->valid      = (uint32_t)(pRdmaDest - &p_recvPool->pRecvBufs[0]) + 1;\r
        ++p_recvPool->numFreeBufs;\r
 \r
@@ -1338,7 +1337,7 @@ _data_sendFreeRecvBuffers(
                rdmaAddr = p_recvPool->eiocRdmaAddr + offset;\r
        \r
                pWrq->ds_array->length  = sz;\r
-               pWrq->ds_array->vaddr = PTR64((uint8_t *)p_recvPool->bufPool + offset);\r
+               pWrq->ds_array->vaddr = (ULONG_PTR)((uint8_t *)p_recvPool->bufPool + offset);\r
                pWrq->remote_ops.vaddr = rdmaAddr;\r
 \r
                if ( ibqp_postSend( &pData->qp, &pData->freeBufsIo.io ) != IB_SUCCESS )\r
index 8d5a68b..b902e78 100644 (file)
@@ -1,5 +1,6 @@
 /*\r
  * Copyright (c) 2007 QLogic Corporation.  All rights reserved.\r
+ * Portions Copyright (c) 2008 Microsoft Corporation.  All rights reserved.\r
  *\r
  * This software is available to you under the OpenIB.org BSD license\r
  * below:\r
@@ -144,7 +145,7 @@ ibregion_init (
        IN              viport_t                *p_viport,\r
        IN      OUT     IbRegion_t              *pRegion,\r
        IN              ib_pd_handle_t  hPd,\r
-       IN              void* __ptr64   vaddr,\r
+       IN              void*                   vaddr,\r
        IN              uint64_t                len,\r
        IN              ib_access_t             access_ctrl )\r
 {\r
@@ -172,7 +173,7 @@ ibregion_init (
        else\r
        {\r
                pRegion->len = len;\r
-               pRegion->virtAddress = (uint64_t)( vaddr );\r
+               pRegion->virtAddress = (ULONG_PTR)vaddr;\r
        }\r
        VNIC_EXIT ( VNIC_DBG_IB );\r
        return ib_status;\r
@@ -418,7 +419,7 @@ static void
 _ibqp_detach_cb(\r
                IN              ib_cm_drep_rec_t        *p_drep_rec )\r
 {\r
-       IbQp_t  *pQp = (IbQp_t * __ptr64 )p_drep_rec->qp_context;\r
+       IbQp_t  *pQp = (IbQp_t *)p_drep_rec->qp_context;\r
        VNIC_ENTER( VNIC_DBG_IB );\r
        CL_ASSERT( p_drep_rec );\r
 \r
@@ -432,7 +433,7 @@ static void
 _ibqp_rej_cb(\r
         IN             ib_cm_rej_rec_t         *p_rej_rec )\r
 {\r
-       IbQp_t  *pQp = (IbQp_t * __ptr64 )p_rej_rec->qp_context;\r
+       IbQp_t  *pQp = (IbQp_t *)p_rej_rec->qp_context;\r
        CL_ASSERT(p_rej_rec );\r
 \r
        InterlockedExchange( &pQp->qpState, IB_DETACHED );\r
@@ -471,7 +472,7 @@ _ibqp_dreq_cb(
 {\r
        ib_api_status_t ib_status = IB_SUCCESS;\r
        ib_cm_drep_t    cm_drep;\r
-       IbQp_t  *pQp    = (IbQp_t * __ptr64 )p_dreq_rec->qp_context;\r
+       IbQp_t  *pQp    = (IbQp_t *)p_dreq_rec->qp_context;\r
 \r
        VNIC_ENTER( VNIC_DBG_IB );\r
        CL_ASSERT( p_dreq_rec );\r
@@ -699,7 +700,7 @@ _ibqp_connect_cb(
 \r
        VNIC_ENTER( VNIC_DBG_IB );\r
 \r
-       pQp = (IbQp_t * __ptr64 )p_cm_rep->qp_context;\r
+       pQp = (IbQp_t *)p_cm_rep->qp_context;\r
        p_viport = pQp->pViport;\r
 \r
        ASSERT( pQp->qpState == IB_ATTACHING );\r
@@ -870,7 +871,7 @@ ib_asyncEvent(
                switch ( pEventRecord->code )\r
                {\r
                        case IB_AE_PORT_DOWN:\r
-                               p_adapter = ( vnic_adapter_t * __ptr64)pEventRecord->context;\r
+                               p_adapter = (vnic_adapter_t *)pEventRecord->context;\r
 \r
                                if( p_adapter &&\r
                                        p_adapter->p_currentPath->pViport &&\r
@@ -889,7 +890,7 @@ ib_asyncEvent(
                        case IB_AE_WQ_REQ_ERROR:\r
                        case IB_AE_WQ_ACCESS_ERROR:\r
 \r
-                                       p_viport = ((IbQp_t * __ptr64 )pEventRecord->context)->pViport;\r
+                                       p_viport = ((IbQp_t *)pEventRecord->context)->pViport;\r
 \r
                                        if( p_viport && !p_viport->errored )\r
                                        {\r
index 8dde7de..b230c3b 100644 (file)
@@ -1,5 +1,6 @@
 /*\r
  * Copyright (c) 2007 QLogic Corporation.  All rights reserved.\r
+ * Portions Copyright (c) 2008 Microsoft Corporation.  All rights reserved.\r
  *\r
  * This software is available to you under the OpenIB.org BSD license\r
  * below:\r
@@ -219,7 +220,7 @@ ibregion_init(
        IN              struct _viport          *p_viport,\r
        OUT             IbRegion_t                      *pRegion,\r
        IN              ib_pd_handle_t          hPd,\r
-       IN              void* __ptr64           vaddr,\r
+       IN              void*                           vaddr,\r
        IN              uint64_t                        len,\r
        IN              ib_access_t                     access_ctrl );\r
 \r
index c974b8d..4704404 100644 (file)
@@ -1,5 +1,6 @@
 /*\r
  * Copyright (c) 2007 QLogic Corporation.  All rights reserved.\r
+ * Portions Copyright (c) 2008 Microsoft Corporation.  All rights reserved.\r
  *\r
  * This software is available to you under the OpenIB.org BSD license\r
  * below:\r
@@ -37,8 +38,6 @@
 #define MAXU32 MAXULONG\r
 #define MAXU64 ((uint64_t)(~0))\r
 \r
-#define PTR64(what) ((uint64_t)(void * __ptr64)(what))\r
-\r
 #ifndef min\r
 #define min(a,b) ((a)<(b)?(a):(b))\r
 #endif\r