[MTHCA] 1. feature: added a mechanism, preventing unloading MTHCA driver while there...
authorleonidk <leonidk@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Mon, 19 Jun 2006 10:37:21 +0000 (10:37 +0000)
committerleonidk <leonidk@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Mon, 19 Jun 2006 10:37:21 +0000 (10:37 +0000)
2. feature: (for applications in checked version) read dbg flag and level from MLNX_TRACE_LVL.
3. improvement: the polling interval for HCA command completion decreased from ~166 msec to 0, which seriously speeded the driver startup and multi-process tests.
4. improvement: improved mechanism of bitmap searching.
5. improvement: new mechanism for registration of user memory.
6. bugfix: fence bit support in post_send verbs in kernel and userspace.
7. bugfix: restore capabilities of the HCA card during its reset.
8. bugfix: handling of IBAL DEVICE_REMOVE_CANCELLED notification.
9. bugfix: in preventing secondary dereferencing of the PCI BUS interface.
10. potential improvement: added a more fair algorithm of HCA command completion polling, which performs the polling with increasing interval. The algorithm is under USE_FAIR_GO_BIT_POLLING preprocessor variable, which is not defined for now. We need to perform tuning of the parameters of the algorithm before enabling it.

git-svn-id: svn://openib.tc.cornell.edu/gen1/trunk@386 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86

24 files changed:
hw/mthca/kernel/SOURCES
hw/mthca/kernel/hca_driver.c
hw/mthca/kernel/hca_driver.h
hw/mthca/kernel/hca_pci.c
hw/mthca/kernel/hca_pnp.c
hw/mthca/kernel/hca_verbs.c
hw/mthca/kernel/ib_verbs.h
hw/mthca/kernel/mt_bitmap.h
hw/mthca/kernel/mt_l2w.h
hw/mthca/kernel/mt_memory.c
hw/mthca/kernel/mt_memory.h
hw/mthca/kernel/mt_uverbsmem.c [deleted file]
hw/mthca/kernel/mthca_cmd.c
hw/mthca/kernel/mthca_provider.c
hw/mthca/kernel/mthca_provider.h
hw/mthca/kernel/mthca_qp.c
hw/mthca/mt_utils.c [new file with mode: 0644]
hw/mthca/mt_utils.h [new file with mode: 0644]
hw/mthca/user/SOURCES
hw/mthca/user/mlnx_ual_main.c
hw/mthca/user/mlnx_ual_osbypass.c
hw/mthca/user/mlnx_uvp_cq.c
hw/mthca/user/mlnx_uvp_qp.c
hw/mthca/user/mt_l2w.h

index 5640dc9..4c7f63e 100644 (file)
@@ -16,6 +16,7 @@ SOURCES= \
        hca.rc                  \\r
                                        \\r
        ..\hca_utils.c          \\r
+       ..\mt_utils.c           \\r
                                        \\r
        hca_data.c              \\r
        hca_direct.c            \\r
@@ -35,7 +36,6 @@ SOURCES= \
        mt_reset_tavor.c        \\r
        mt_ud_header.c  \\r
        mt_uverbs.c             \\r
-       mt_uverbsmem.c  \\r
        mt_verbs.c              \\r
                                        \\r
        mthca_allocator.c       \\r
@@ -75,7 +75,6 @@ TARGETLIBS= \
 !IFDEF ENABLE_EVENT_TRACING\r
 \r
 C_DEFINES = $(C_DEFINES) -DEVENT_TRACING\r
-\r
 RUN_WPP= $(SOURCES) -km -ext: .c .h .C .H \\r
        -scan:hca_debug.h \\r
        -func:HCA_PRINT(LEVEL,FLAGS,(MSG,...)) \\r
index 7ac0834..5400aae 100644 (file)
@@ -34,6 +34,7 @@
  * Provides the driver entry points for the Tavor VPD.\r
  */\r
 \r
+#include <mt_utils.h>\r
 #include "hca_driver.h"\r
 #include "hca_debug.h"\r
 \r
@@ -73,6 +74,7 @@ UCHAR g_slog_buf[ MAX_LOG_BUF_LEN ];
 char                   mlnx_uvp_lib_name[MAX_LIB_NAME] = {"mthcau"};\r
 \r
 \r
+\r
 NTSTATUS\r
 DriverEntry(\r
        IN                              PDRIVER_OBJECT                          p_driver_obj,\r
@@ -167,6 +169,9 @@ DriverEntry(
 #endif\r
        HCA_ENTER( HCA_DBG_DEV );\r
 \r
+       /* init common mechanisms */\r
+       fill_bit_tbls();\r
+\r
        status = __read_registry( p_registry_path );\r
        if( !NT_SUCCESS( status ) )\r
        {\r
index 153c36c..9d65aa0 100644 (file)
@@ -65,6 +65,7 @@ typedef enum _hca_reg_state
        HCA_SHUTDOWN,\r
        HCA_ADDED,\r
        HCA_STARTED,\r
+       HCA_IFC_DEREFERENCED,\r
        HCA_REGISTERED\r
 \r
 }      hca_reg_state_t;\r
@@ -80,6 +81,9 @@ typedef enum _hca_reg_state
 *      HCA_STARTED\r
 *              IRP_MN_START_DEVICE was called.  The HCA is fully functional.\r
 *\r
+*      HCA_IFC_DEREFERENCED\r
+*              DEVICE_QUERY_REMOVE for IBBUS was received.\r
+*\r
 *      HCA_REGISTERED\r
 *              Fully functional and registered with the bus root.\r
 *********/\r
@@ -129,11 +133,15 @@ typedef struct _hca_dev_ext
        hca_reg_state_t                                 state;                          /* State for tracking registration with AL */\r
        DEVICE_OBJECT                           *       p_al_dev;               /* IB_AL FDO */\r
        FILE_OBJECT                                     *       p_al_file_obj;  /* IB_AL file object */\r
+       UNICODE_STRING                                  al_sym_name;    /* IB_AL symbolic name */\r
 \r
        /* -------------------------------------------------\r
        *               LOW LEVEL DRIVER' DATA   \r
        * ------------------------------------------------ */\r
        mlnx_hca_t                                                      hca;\r
+       atomic_t                                                                usecnt; /* the number of working applications*/\r
+       cl_spinlock_t                                                   uctx_lock;                      // spinlock for the below chain\r
+       cl_qlist_t                                                              uctx_list;                      // chain of user contexts\r
 \r
        /* -------------------------------------------------\r
        *               OS DATA          \r
index 2dd804d..beb5d37 100644 (file)
@@ -95,7 +95,8 @@ __save_pci_config(
 static NTSTATUS\r
 __restore_pci_config(\r
        IN                              BUS_INTERFACE_STANDARD          *pBusIfc,\r
-       IN                              PCI_COMMON_CONFIG* const        pConfig );\r
+       IN                              PCI_COMMON_CONFIG* const        pConfig,\r
+       IN                              const int                                               is_bridge );\r
 \r
 \r
 #ifdef ALLOC_PRAGMA\r
@@ -105,6 +106,59 @@ __restore_pci_config(
 #pragma alloc_text (PAGE, __restore_pci_config)\r
 #endif\r
 \r
+/*\r
+ * Returns the offset in configuration space of the PCI-X capabilites.\r
+ */\r
+static ULONG\r
+__FindCapability(\r
+       IN                              PCI_COMMON_CONFIG* const        pConfig,  \r
+       IN                              char cap_id\r
+       )\r
+{\r
+       ULONG                                           offset = 0;\r
+       PCI_CAPABILITIES_HEADER         *pHdr = NULL;\r
+       UCHAR                                           *pBuf = (UCHAR*)pConfig;\r
+\r
+       HCA_ENTER( HCA_DBG_PNP );\r
+\r
+       if  ( pConfig->HeaderType == PCI_DEVICE_TYPE ) {\r
+               if( pConfig->u.type0.CapabilitiesPtr )\r
+               {\r
+                       pHdr = (PCI_CAPABILITIES_HEADER*)\r
+                               (pBuf + pConfig->u.type0.CapabilitiesPtr);\r
+               }\r
+       }\r
+\r
+       if  ( pConfig->HeaderType == PCI_BRIDGE_TYPE ) {\r
+               if( pConfig->u.type1.CapabilitiesPtr )\r
+               {\r
+                       pHdr = (PCI_CAPABILITIES_HEADER*)\r
+                               (pBuf + pConfig->u.type1.CapabilitiesPtr);\r
+               }\r
+       }\r
+\r
+       /*\r
+        * Fix up any fields that might cause changes to the\r
+        * device - like writing VPD data.\r
+        */\r
+       while( pHdr )\r
+       {\r
+               if( pHdr->CapabilityID == cap_id )\r
+               {\r
+                       offset = (UCHAR)(((ULONG_PTR)pHdr) - ((ULONG_PTR)pConfig));\r
+                       break;\r
+               }\r
+\r
+               if( pHdr->Next )\r
+                       pHdr = (PCI_CAPABILITIES_HEADER*)(pBuf + pHdr->Next);\r
+               else\r
+                       pHdr = NULL;\r
+       }\r
+\r
+       HCA_EXIT( HCA_DBG_PNP );\r
+       return offset;\r
+}\r
+\r
 /* Forwards the request to the HCA's PDO. */\r
 static NTSTATUS\r
 __get_bus_ifc(\r
@@ -283,13 +337,76 @@ __fixup_pci_capabilities(
 static NTSTATUS\r
 __restore_pci_config(\r
        IN                              BUS_INTERFACE_STANDARD          *pBusIfc,\r
-       IN                              PCI_COMMON_CONFIG* const        pConfig )\r
+       IN                              PCI_COMMON_CONFIG* const        pConfig,\r
+       IN                              const int                                               is_bridge )\r
 {\r
        NTSTATUS status = STATUS_SUCCESS;\r
        int             i, *pci_hdr = (int*)pConfig;\r
+       int hca_pcix_cap = 0;\r
 \r
        HCA_ENTER( HCA_DBG_PNP );\r
 \r
+       /* get capabilities */\r
+       hca_pcix_cap = __FindCapability( pConfig, PCI_CAPABILITY_ID_PCIX );\r
+\r
+       /* restore capabilities*/\r
+       if (is_bridge) {\r
+               if ( 4 != pBusIfc->SetBusData( pBusIfc->Context, PCI_WHICHSPACE_CONFIG,\r
+                       &pci_hdr[(hca_pcix_cap + 0x8) / 4], hca_pcix_cap + 0x8, 4) ) {\r
+                       HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, \r
+                               ("Couldn't restore HCA bridge Upstream split transaction control, aborting.\n"));\r
+                       status = STATUS_UNSUCCESSFUL;\r
+                       goto out;\r
+               }\r
+               if ( 4 != pBusIfc->SetBusData( pBusIfc->Context, PCI_WHICHSPACE_CONFIG,\r
+                       &pci_hdr[(hca_pcix_cap + 0xc) / 4], hca_pcix_cap + 0xc, 4) ) {\r
+                       HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, \r
+                               ("Couldn't restore HCA bridge Downstream split transaction control, aborting.\n"));\r
+                       status = STATUS_UNSUCCESSFUL;\r
+                       goto out;\r
+               }\r
+       }\r
+       else {\r
+               int hca_pcie_cap = __FindCapability( pConfig, PCI_CAPABILITY_ID_PCIEXP );\r
+               PCI_PCIEXP_CAPABILITY   *pPciExpCap = (PCI_PCIEXP_CAPABILITY*)(((UCHAR*)pConfig) + hca_pcie_cap);\r
+\r
+               if (hca_pcix_cap) {\r
+                       if ( 4 != pBusIfc->SetBusData( pBusIfc->Context, PCI_WHICHSPACE_CONFIG,\r
+                               &pci_hdr[hca_pcix_cap/4], hca_pcix_cap, 4) ) {\r
+                               HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, \r
+                                       ("Couldn't restore HCA PCI-X command register, aborting.\n"));\r
+                               status = STATUS_UNSUCCESSFUL;\r
+                               goto out;\r
+                       }\r
+               }\r
+\r
+               if (hca_pcie_cap) {\r
+                       /* restore HCA PCI Express Device Control register */\r
+                       if ( sizeof( pPciExpCap->DevControl ) != pBusIfc->SetBusData( \r
+                               pBusIfc->Context, PCI_WHICHSPACE_CONFIG,\r
+                               &pPciExpCap->DevControl,        hca_pcie_cap + \r
+                               offsetof( PCI_PCIEXP_CAPABILITY, DevControl),\r
+                               sizeof( pPciExpCap->DevControl ) )) {\r
+                               HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, \r
+                                       ("Couldn't restore HCA PCI Express Device Control register, aborting.\n"));\r
+                               status = STATUS_UNSUCCESSFUL;\r
+                               goto out;\r
+                       }\r
+                       /* restore HCA PCI Express Link Control register */\r
+                       if ( sizeof( pPciExpCap->LinkControl ) != pBusIfc->SetBusData( \r
+                               pBusIfc->Context, PCI_WHICHSPACE_CONFIG,\r
+                               &pPciExpCap->LinkControl,       hca_pcie_cap + \r
+                               offsetof( PCI_PCIEXP_CAPABILITY, LinkControl),\r
+                               sizeof( pPciExpCap->LinkControl ) )) {\r
+                               HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, \r
+                                       ("Couldn't restore HCA PCI Express Link Control register, aborting.\n"));\r
+                               status = STATUS_UNSUCCESSFUL;\r
+                               goto out;\r
+                       }\r
+               }\r
+       }\r
+\r
+       /* write basic part */\r
        for (i = 0; i < 16; ++i) {\r
                if (i == 1)\r
                        continue;\r
@@ -326,10 +443,6 @@ hca_reset( DEVICE_OBJECT* const            pDevObj, int is_tavor )
 \r
        HCA_ENTER( HCA_DBG_PNP );\r
 \r
-       /* Some Tavor cards don't ever return from reset on some platforms. */\r
-       if( is_tavor )\r
-               goto resetErr1;\r
-\r
        /* get the resources */\r
        {\r
                /* Get the HCA's bus interface. */\r
@@ -357,12 +470,20 @@ hca_reset( DEVICE_OBJECT* const           pDevObj, int is_tavor )
 \r
                /* Save the HCA bridge's configuration, if any */\r
                if (is_tavor) {\r
+                       int hca_pcix_cap;\r
                        status = __save_pci_config( &brBusIfc, &brConfig );\r
                        if( !NT_SUCCESS( status ) ) {\r
                                HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP,\r
                                        ("Failed to save bridge config.\n"));\r
                                goto resetErr3;\r
                        }\r
+                       hca_pcix_cap = __FindCapability( &brConfig, PCI_CAPABILITY_ID_PCIX );\r
+                       if (!hca_pcix_cap) {\r
+                               status = STATUS_UNSUCCESSFUL;\r
+                               HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP,\r
+                                       ("Couldn't locate HCA bridge PCI-X capability, aborting.\n"));\r
+                               goto resetErr3;\r
+                       }\r
                }\r
        }\r
        \r
@@ -423,7 +544,7 @@ good:       /* restore the HCA's PCI configuration headers */
                if (is_tavor) {\r
                        /* Restore the HCA's bridge configuration. */\r
                        HCA_PRINT( TRACE_LEVEL_INFORMATION  ,HCA_DBG_PNP  ,("Restoring bridge PCI configuration \n"));\r
-                       status = __restore_pci_config( &brBusIfc, &brConfig );\r
+                       status = __restore_pci_config( &brBusIfc, &brConfig, TRUE );\r
                        if( !NT_SUCCESS( status ) ) {\r
                                HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, \r
                                        ("Failed to restore bridge config.\n"));\r
@@ -433,7 +554,7 @@ good:       /* restore the HCA's PCI configuration headers */
                \r
                /* Restore the HCA's configuration. */\r
                HCA_PRINT( TRACE_LEVEL_INFORMATION  ,HCA_DBG_PNP  ,("Restoring HCA PCI configuration \n"));\r
-               status = __restore_pci_config( &hcaBusIfc, &hcaConfig );\r
+               status = __restore_pci_config( &hcaBusIfc, &hcaConfig, FALSE );\r
                if( !NT_SUCCESS( status ) ) {\r
                        HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, \r
                                ("Failed to restore HCA config.\n"));\r
@@ -453,58 +574,6 @@ resetErr1:
 }\r
 \r
 \r
-/*\r
- * Returns the offset in configuration space of the PCI-X capabilites.\r
- */\r
-static ULONG\r
-__FindCapability(\r
-       IN                              PCI_COMMON_CONFIG* const        pConfig,  \r
-       IN                              char cap_id\r
-       )\r
-{\r
-       ULONG                                           offset = 0;\r
-       UCHAR                                           *pBuf;\r
-       PCI_CAPABILITIES_HEADER         *pHdr;\r
-\r
-       HCA_ENTER( HCA_DBG_PNP );\r
-\r
-       pBuf = (UCHAR*)pConfig;\r
-\r
-       ASSERT( pConfig->HeaderType == PCI_DEVICE_TYPE );\r
-\r
-       if( pConfig->u.type0.CapabilitiesPtr )\r
-       {\r
-               pHdr = (PCI_CAPABILITIES_HEADER*)\r
-                       (pBuf + pConfig->u.type0.CapabilitiesPtr);\r
-       }\r
-       else\r
-       {\r
-               pHdr = NULL;\r
-       }\r
-\r
-       /*\r
-        * Fix up any fields that might cause changes to the\r
-        * device - like writing VPD data.\r
-        */\r
-       while( pHdr )\r
-       {\r
-               if( pHdr->CapabilityID == cap_id )\r
-               {\r
-                       offset = (UCHAR)(((ULONG_PTR)pHdr) - ((ULONG_PTR)pConfig));\r
-                       break;\r
-               }\r
-\r
-               if( pHdr->Next )\r
-                       pHdr = (PCI_CAPABILITIES_HEADER*)(pBuf + pHdr->Next);\r
-               else\r
-                       pHdr = NULL;\r
-       }\r
-\r
-       HCA_EXIT( HCA_DBG_PNP );\r
-       return offset;\r
-}\r
-\r
-\r
 /*\r
  * Tunes PCI configuration as described in 13.3.2 in the Tavor PRM.\r
  */\r
@@ -636,8 +705,8 @@ hca_enable_pci(
                /* fix command register (set PCI Master bit) */\r
                // NOTE: we change here the saved value of the command register\r
                pHcaConfig->Command |= 7;\r
-          len = phcaBusIfc->SetBusData( phcaBusIfc->Context, PCI_WHICHSPACE_CONFIG,\r
-               (PVOID)&pHcaConfig->Command , 4, sizeof(ULONG) ); \r
+               len = phcaBusIfc->SetBusData( phcaBusIfc->Context, PCI_WHICHSPACE_CONFIG,\r
+                       (PVOID)&pHcaConfig->Command , 4, sizeof(ULONG) );\r
                if( len != sizeof(ULONG) )\r
                {\r
                        HCA_PRINT( TRACE_LEVEL_ERROR  ,HCA_DBG_PNP  ,("Failed to write command register.\n"));\r
@@ -649,6 +718,7 @@ hca_enable_pci(
 \r
        pciErr:\r
                phcaBusIfc->InterfaceDereference( phcaBusIfc->Context );\r
+               phcaBusIfc->InterfaceDereference = NULL;\r
        out:\r
                HCA_EXIT( HCA_DBG_PNP );\r
                return status;\r
@@ -657,9 +727,9 @@ hca_enable_pci(
 void hca_disable_pci(PBUS_INTERFACE_STANDARD   phcaBusIfc)\r
 {\r
        // no need to disable the card, so just release the PCI bus i/f\r
-       if (phcaBusIfc) {\r
+       if (phcaBusIfc->InterfaceDereference) {\r
                phcaBusIfc->InterfaceDereference( phcaBusIfc->Context );\r
-               phcaBusIfc = NULL;\r
+               phcaBusIfc->InterfaceDereference = NULL;\r
        }\r
 }\r
 \r
index 096d4c5..de52708 100644 (file)
@@ -115,6 +115,10 @@ static NTSTATUS
 __get_ci_interface(\r
        IN                              DEVICE_OBJECT* const            p_dev_obj );\r
 \r
+static void\r
+__hca_deregister(\r
+       IN                              hca_dev_ext_t                   *p_ext );\r
+\r
 static NTSTATUS\r
 __hca_register(\r
        IN                              DEVICE_OBJECT                           *p_dev_obj );\r
@@ -219,6 +223,9 @@ hca_add_device(
 \r
        p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension;\r
        cl_memclr( p_ext, sizeof(hca_dev_ext_t) );\r
+       cl_spinlock_init( &p_ext->uctx_lock );\r
+       cl_qlist_init( &p_ext->uctx_list );\r
+       atomic_set(&p_ext->usecnt, 0);\r
 \r
        /* Attach to the device stack. */\r
        pNextDevObj = IoAttachDeviceToDeviceStack( p_dev_obj, pPdo );\r
@@ -321,31 +328,30 @@ __pnp_notify_target(
 \r
        if( IsEqualGUID( &pNotify->Event, &GUID_TARGET_DEVICE_QUERY_REMOVE ) )\r
        {\r
-               if( p_ext->state == HCA_REGISTERED )\r
-               {\r
+               if ( p_ext->state == HCA_REGISTERED) {\r
                        /* Release AL's CI interface. */\r
                        p_ext->ci_ifc.wdm.InterfaceDereference( p_ext->ci_ifc.wdm.Context );\r
-                       p_ext->state = HCA_STARTED;\r
+                       p_ext->state = HCA_IFC_DEREFERENCED;\r
                }\r
 \r
                /* Release AL's file object so that it can unload. */\r
+               CL_ASSERT( p_ext->p_al_dev );\r
                CL_ASSERT( p_ext->p_al_file_obj );\r
                CL_ASSERT( p_ext->p_al_file_obj == pNotify->FileObject );\r
-               ObDereferenceObject( p_ext->p_al_file_obj );\r
-               p_ext->p_al_file_obj = NULL;\r
-               p_ext->p_al_dev = NULL;\r
+               if( p_ext->p_al_file_obj ) {\r
+                       ObDereferenceObject( p_ext->p_al_file_obj );\r
+                       p_ext->p_al_file_obj = NULL;\r
+                       p_ext->p_al_dev = NULL;\r
+               }\r
        }\r
        else if( IsEqualGUID( &pNotify->Event, \r
                &GUID_TARGET_DEVICE_REMOVE_COMPLETE ) )\r
        {\r
-               if( p_ext->state == HCA_REGISTERED )\r
-               {\r
-                       /* Release AL's CI interface. */\r
-                       p_ext->ci_ifc.wdm.InterfaceDereference( p_ext->ci_ifc.wdm.Context );\r
-                       p_ext->state = HCA_STARTED;\r
-               }\r
+               __hca_deregister( p_ext );\r
 \r
                /* Release AL's file object so that it can unload. */\r
+               CL_ASSERT( p_ext->p_al_dev );\r
+               CL_ASSERT( p_ext->p_al_file_obj );\r
                if( p_ext->p_al_file_obj )\r
                {\r
                        ObDereferenceObject( p_ext->p_al_file_obj );\r
@@ -354,22 +360,36 @@ __pnp_notify_target(
                }\r
 \r
                /* Cancel our target device change registration. */\r
-               IoUnregisterPlugPlayNotification( p_ext->pnp_target_entry );\r
-               p_ext->pnp_target_entry = NULL;\r
+               if (p_ext->pnp_target_entry) {\r
+                       IoUnregisterPlugPlayNotification( p_ext->pnp_target_entry );\r
+                       p_ext->pnp_target_entry = NULL;\r
+               }\r
+\r
        }\r
        else if( IsEqualGUID( &pNotify->Event, \r
                &GUID_TARGET_DEVICE_REMOVE_CANCELLED ) )\r
        {\r
                /* Cancel our target device change registration. */\r
-               IoUnregisterPlugPlayNotification( p_ext->pnp_target_entry );\r
-               p_ext->pnp_target_entry = NULL;\r
+               if (p_ext->pnp_target_entry) {\r
+                       IoUnregisterPlugPlayNotification( p_ext->pnp_target_entry );\r
+                       p_ext->pnp_target_entry = NULL;\r
+               }\r
 \r
                /* Get the device object pointer for the AL. */\r
                CL_ASSERT( !p_ext->p_al_file_obj );\r
                CL_ASSERT( !p_ext->p_al_dev );\r
-               p_ext->p_al_file_obj = pNotify->FileObject;\r
-               p_ext->p_al_dev = IoGetRelatedDeviceObject( p_ext->p_al_file_obj );\r
+               /* Get the AL device object. */\r
+               HCA_PRINT( TRACE_LEVEL_INFORMATION      ,HCA_DBG_SHIM  ,("Calling IoGetDeviceObjectPointer.\n"));\r
+               status = IoGetDeviceObjectPointer( &p_ext->al_sym_name,\r
+                       FILE_ALL_ACCESS, &p_ext->p_al_file_obj, &p_ext->p_al_dev );\r
+               if( !NT_SUCCESS( status ) )\r
+               {\r
+                       HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_SHIM, \r
+                               ("IoGetDeviceObjectPointer returned %08x.\n", status ));\r
+                       return STATUS_SUCCESS;\r
+               }\r
 \r
+               /* Register for removal notification of the IB Fabric root device. */\r
                status = IoRegisterPlugPlayNotification( \r
                        EventCategoryTargetDeviceChange, 0, p_ext->p_al_file_obj, \r
                        p_dev_obj->DriverObject, __pnp_notify_target, p_dev_obj, \r
@@ -381,7 +401,12 @@ __pnp_notify_target(
                        return status;\r
                }\r
 \r
-               __hca_register( p_dev_obj );\r
+               CL_ASSERT( p_ext->state == HCA_IFC_DEREFERENCED );\r
+               if ( p_ext->state == HCA_IFC_DEREFERENCED) {\r
+                       /* Release AL's CI interface. */\r
+                       p_ext->ci_ifc.wdm.InterfaceReference( p_ext->ci_ifc.wdm.Context );\r
+                       p_ext->state = HCA_REGISTERED;\r
+               }\r
        }\r
 \r
        HCA_EXIT( HCA_DBG_PNP );\r
@@ -420,6 +445,25 @@ __alloc_hca_ifc(
        return pIfc;\r
 }\r
 \r
+static void\r
+__hca_deregister(\r
+       IN                              hca_dev_ext_t                   *p_ext )\r
+{\r
+       HCA_ENTER( HCA_DBG_PNP );\r
+       \r
+       if ( p_ext->state == HCA_REGISTERED) {\r
+               if (p_ext->ci_ifc.deregister_ca) {\r
+                       /* Notify AL that the CA is being removed. */\r
+                       p_ext->ci_ifc.deregister_ca( p_ext->hca.guid );\r
+                       p_ext->ci_ifc.deregister_ca = NULL;\r
+                       /* Release AL's CI interface. */\r
+                       p_ext->ci_ifc.wdm.InterfaceDereference( p_ext->ci_ifc.wdm.Context );\r
+                       p_ext->state = HCA_STARTED;\r
+               }\r
+       }\r
+\r
+       HCA_EXIT( HCA_DBG_PNP );\r
+}\r
 \r
 static NTSTATUS\r
 __hca_register(\r
@@ -473,7 +517,7 @@ __pnp_notify_ifc(
        IN                              void                                            *pNotifyStruct,\r
        IN                              void                                            *context )\r
 {\r
-       NTSTATUS                                                                status;\r
+       NTSTATUS                                                                status = STATUS_SUCCESS;\r
        DEVICE_OBJECT                                                   *p_dev_obj;\r
        hca_dev_ext_t                                                   *p_ext;\r
        DEVICE_INTERFACE_CHANGE_NOTIFICATION    *pNotify;\r
@@ -485,10 +529,7 @@ __pnp_notify_ifc(
        p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension;\r
 \r
        if( !IsEqualGUID( &pNotify->Event, &GUID_DEVICE_INTERFACE_ARRIVAL ) )\r
-       {\r
-               HCA_EXIT( HCA_DBG_PNP );\r
-               return STATUS_SUCCESS;\r
-       }\r
+               goto done;\r
 \r
        /*\r
         * Sanity check.  We should only be getting notifications of the \r
@@ -499,22 +540,36 @@ __pnp_notify_ifc(
 \r
        if( p_ext->state != HCA_STARTED )\r
        {\r
-               HCA_PRINT( TRACE_LEVEL_ERROR  ,HCA_DBG_SHIM  ,("Invalid state: %d\n", p_ext->state));\r
-               return STATUS_SUCCESS;\r
+               HCA_PRINT( TRACE_LEVEL_ERROR  ,HCA_DBG_PNP  ,("Invalid state: %d\n", p_ext->state));\r
+               goto done;\r
+       }\r
+\r
+       /* save symbolic name of IBAL for a case of cancelled IBAL removal */\r
+       if (!p_ext->al_sym_name.Buffer) {\r
+               p_ext->al_sym_name.Length = pNotify->SymbolicLinkName->Length;\r
+               p_ext->al_sym_name.MaximumLength = pNotify->SymbolicLinkName->MaximumLength;\r
+               p_ext->al_sym_name.Buffer = ExAllocatePool( NonPagedPool, \r
+                       p_ext->al_sym_name.MaximumLength * sizeof(wchar_t) );\r
+               if (!p_ext->al_sym_name.Buffer)\r
+               {\r
+                       HCA_PRINT( TRACE_LEVEL_ERROR  ,HCA_DBG_PNP  ,("allocation of sym IBAL name failed.\n"));\r
+                       goto done;\r
+               }\r
+               RtlCopyUnicodeString( &p_ext->al_sym_name, pNotify->SymbolicLinkName );\r
        }\r
 \r
        ASSERT( !p_ext->p_al_dev );\r
        ASSERT( !p_ext->p_al_file_obj );\r
 \r
        /* Get the AL device object. */\r
-       HCA_PRINT( TRACE_LEVEL_INFORMATION  ,HCA_DBG_SHIM  ,("Calling IoGetDeviceObjectPointer.\n"));\r
+       HCA_PRINT( TRACE_LEVEL_INFORMATION  ,HCA_DBG_PNP  ,("Calling IoGetDeviceObjectPointer.\n"));\r
        status = IoGetDeviceObjectPointer( pNotify->SymbolicLinkName,\r
                FILE_ALL_ACCESS, &p_ext->p_al_file_obj, &p_ext->p_al_dev );\r
        if( !NT_SUCCESS( status ) )\r
        {\r
-               HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_SHIM\r
+               HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP\r
                        ("IoGetDeviceObjectPointer returned %08x.\n", status ));\r
-               return STATUS_SUCCESS;\r
+               goto done;\r
        }\r
 \r
        /* Register for removal notification of the IB Fabric root device. */\r
@@ -526,29 +581,29 @@ __pnp_notify_ifc(
                &p_ext->pnp_target_entry );\r
        if( !NT_SUCCESS( status ) )\r
        {\r
-               ObDereferenceObject( p_ext->p_al_file_obj );\r
-               p_ext->p_al_file_obj = NULL;\r
-               p_ext->p_al_dev = NULL;\r
                HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, \r
                        ("IoRegisterPlugPlayNotification returned %08x.\n", status));\r
-               return STATUS_SUCCESS;\r
+               goto err_reg_notify;\r
        }\r
 \r
        status = __hca_register( p_dev_obj );\r
        if( !NT_SUCCESS( status ) )\r
        {\r
-               IoUnregisterPlugPlayNotification( p_ext->pnp_target_entry );\r
-               p_ext->pnp_target_entry = NULL;\r
-               ObDereferenceObject( p_ext->p_al_file_obj );\r
-               p_ext->p_al_file_obj = NULL;\r
-               p_ext->p_al_dev = NULL;\r
-               HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_SHIM, \r
+               HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, \r
                        ("__get_ci_interface returned %08x.\n", status));\r
-               return STATUS_SUCCESS;\r
+               goto err_reg_hca;\r
        }\r
 \r
+err_reg_hca:\r
+       IoUnregisterPlugPlayNotification( p_ext->pnp_target_entry );\r
+       p_ext->pnp_target_entry = NULL;\r
+err_reg_notify:\r
+       ObDereferenceObject( p_ext->p_al_file_obj );\r
+       p_ext->p_al_file_obj = NULL;\r
+       p_ext->p_al_dev = NULL;\r
+done:\r
        HCA_EXIT( HCA_DBG_PNP );\r
-       return STATUS_SUCCESS;\r
+       return status;\r
 }\r
 \r
 \r
@@ -857,14 +912,7 @@ __hca_release_resources(
        switch( p_ext->state )\r
        {\r
        case HCA_REGISTERED:\r
-               CL_ASSERT( p_ext->ci_ifc.deregister_ca );\r
-               CL_ASSERT( p_ext->p_al_dev );\r
-               CL_ASSERT( p_ext->p_al_file_obj );\r
-               /* Notify AL that the CA is being removed. */\r
-               p_ext->ci_ifc.deregister_ca( p_ext->hca.guid );\r
-               /* Release AL's CI interface. */\r
-               p_ext->ci_ifc.wdm.InterfaceDereference( p_ext->ci_ifc.wdm.Context );\r
-               p_ext->state = HCA_STARTED;\r
+               __hca_deregister( p_ext );\r
 \r
                /* Fall through. */\r
        case HCA_STARTED:\r
@@ -872,6 +920,11 @@ __hca_release_resources(
                mlnx_hca_remove( &p_ext->hca );\r
        }\r
 \r
+       if (p_ext->al_sym_name.Buffer) {\r
+               ExFreePool( p_ext->al_sym_name.Buffer );\r
+               p_ext->al_sym_name.Buffer = NULL;\r
+       }\r
+       \r
        if( p_ext->pnp_target_entry )\r
        {\r
                ASSERT( p_ext->pnp_ifc_entry );\r
@@ -1057,7 +1110,11 @@ hca_query_remove(
        IN                              IRP* const                                      p_irp, \r
                OUT                     cl_irp_action_t* const          p_action )\r
 {\r
-       /* Query remove always succeeds. */\r
+       hca_dev_ext_t*p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension;\r
+       if (atomic_read(&p_ext->usecnt)) {\r
+               p_irp->IoStatus.Status = STATUS_UNSUCCESSFUL;\r
+               return cl_irp_complete( p_dev_obj, p_irp, p_action );\r
+       }\r
        /* TODO: set a flag to fail creation of any new IB resources. */\r
        return cl_irp_skip( p_dev_obj, p_irp, p_action );\r
 }\r
@@ -1460,19 +1517,16 @@ __PowerDownCb(
        PoSetPowerState( p_dev_obj, DevicePowerState,\r
                pIoStack->Parameters.Power.State );\r
 \r
-       if( p_ext->state == HCA_REGISTERED )\r
+       switch( p_ext->state )\r
        {\r
-               CL_ASSERT( p_ext->ci_ifc.deregister_ca );\r
-               CL_ASSERT( p_ext->p_al_dev );\r
-               CL_ASSERT( p_ext->p_al_file_obj );\r
-               /* Notify AL that the CA is being removed. */\r
-               p_ext->ci_ifc.deregister_ca( p_ext->hca.guid );\r
-               /* Release AL's CI interface. */\r
-               p_ext->ci_ifc.wdm.InterfaceDereference( p_ext->ci_ifc.wdm.Context );\r
-               p_ext->state = HCA_STARTED;\r
-       }\r
+       case HCA_REGISTERED:\r
+               __hca_deregister( p_ext );\r
 \r
-       mthca_remove_one( p_ext );\r
+               /* Fall through. */\r
+       case HCA_STARTED:\r
+               /* dequeue HCA  */\r
+               mlnx_hca_remove( &p_ext->hca );\r
+       }\r
 \r
        PoStartNextPowerIrp( p_irp );\r
        IoSkipCurrentIrpStackLocation( p_irp );\r
index 1a75067..dfdcb8e 100644 (file)
@@ -449,6 +449,11 @@ done:
        p_context->va = p_context->p_mdl = NULL;\r
        p_context->fw_if_open = FALSE;\r
        KeInitializeMutex( &p_context->mutex, 0 );\r
+       // chain user context to the device\r
+       cl_spinlock_acquire( &ext_p->uctx_lock );\r
+       cl_qlist_insert_tail( &ext_p->uctx_list, &p_context->list_item );\r
+       atomic_inc(&ext_p->usecnt);\r
+       cl_spinlock_release( &ext_p->uctx_lock );\r
        \r
        // return the result\r
        if (ph_um_ca) *ph_um_ca = (ib_ca_handle_t)p_context;\r
@@ -473,12 +478,17 @@ mlnx_um_close(
        IN                              ib_ca_handle_t                          h_um_ca )\r
 {\r
        struct ib_ucontext *p_ucontext = (struct ib_ucontext *)h_um_ca;\r
-       UNREFERENCED_PARAMETER(h_ca);\r
+       mlnx_hob_t                      *hob_p = (mlnx_hob_t *)h_ca;\r
+       hca_dev_ext_t *ext_p = EXT_FROM_HOB( hob_p );\r
 \r
        if (mthca_is_livefish(to_mdev(p_ucontext->device)))\r
                goto done;\r
        unmap_crspace_for_all(p_ucontext);\r
 done:  \r
+       cl_spinlock_acquire( &ext_p->uctx_lock );\r
+       cl_qlist_remove_item( &ext_p->uctx_list, &p_ucontext->list_item );\r
+       atomic_dec(&ext_p->usecnt);\r
+       cl_spinlock_release( &ext_p->uctx_lock );\r
        if( !p_ucontext->pd )\r
                cl_free( h_um_ca );\r
        else\r
index f2e7060..a18b9eb 100644 (file)
@@ -531,6 +531,7 @@ struct ib_ucontext {
        struct ib_pd *pd;
        atomic_t                usecnt; /* count all resources */
        ULONG           is_removing;
+       cl_list_item_t list_item;                       // chain of user contexts
        // for tools support
        KMUTEX  mutex;
        PMDL    p_mdl;
index 22940ff..550528d 100644 (file)
@@ -1,10 +1,9 @@
 #ifndef MT_BITMAP_H
 #define MT_BITMAP_H
 
+#include <mt_utils.h>
+
 // DECLARE_BITMAP
-#define BITS_PER_LONG          32
-#define BITS_TO_LONGS(bits) \
-        (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
 #define DECLARE_BITMAP(name,bits) \
     unsigned long name[BITS_TO_LONGS(bits)]
 
@@ -104,157 +103,5 @@ static inline int bitmap_empty(const unsigned long *src, int nbits)
                 return __bitmap_empty(src, nbits);
 }
 
-/*
-* fls: find last bit set.
-* returns: 0 - if not found or N+1, if found Nth bit
-*/
-
-static inline int fls(int x)
-{
-       int r = 32;
-
-       if (!x)
-               return 0;
-       if (!(x & 0xffff0000u)) {
-               x <<= 16;
-               r -= 16;
-       }
-       if (!(x & 0xff000000u)) {
-               x <<= 8;
-               r -= 8;
-       }
-       if (!(x & 0xf0000000u)) {
-               x <<= 4;
-               r -= 4;
-       }
-       if (!(x & 0xc0000000u)) {
-               x <<= 2;
-               r -= 2;
-       }
-       if (!(x & 0x80000000u)) {
-               x <<= 1;
-               r -= 1;
-       }
-       return r;
-}
-
-
-/**
-* _ffs - find the first one bit in a word
-* @addr: The address to start the search at
-* @offset: The bitnumber to start searching at
-*
-* returns: 0 - if not found or N+1, if found Nth bit
-*/
-static inline int _ffs(const unsigned long *addr, int offset)
-{
-       //TODO: not an effective code - is better in Assembler
-       int mask = 1 << offset;
-       int rbc = BITS_PER_LONG - offset;
-       int ix;
-       for (ix=0; ix<rbc; ix++, mask<<=1) {
-               if (*addr & mask)
-                       return offset + ix + 1;
-       }
-       return 0;
-}
-
-#define ffs(val)       _ffs((const unsigned long *)&val,0)
-
-/**
-* _ffz - find the first zero bit in a word
-* @addr: The address to start the search at
-* @offset: The bitnumber to start searching at
-*
-* returns: 0 - if not found or N+1, if found Nth bit
-*/
-static inline int _ffz(const unsigned long *addr, int offset)
-{
-       //TODO: not an effective code - is better in Assembler
-       int mask = 1 << offset;
-       int rbc = BITS_PER_LONG - offset;
-       int ix;
-       for (ix=0; ix<rbc; ix++, mask<<=1) {
-               if (!(*addr & mask))
-                       return offset + ix + 1;
-       }
-       return 0;
-}
-
-#define ffz(val)       _ffz(&val,0)
-
-/**
-* find_next_zero_bit - find the first zero bit in a memory region
-* @addr: The address to base the search on
-* @offset: The bitnumber to start searching at
-* @bits_size: The maximum size to search
-*
-* Returns the bit-number of the first zero bit, not the number of the byte
-* containing a bit. If not found - returns 'size'
-*/
-static inline int find_next_zero_bit(const unsigned long *addr, int bits_size, int offset)
-{      
-       int len = BITS_TO_LONGS(bits_size);
-       int ix = offset % BITS_PER_LONG;
-       int w_offset = offset / BITS_PER_LONG;
-
-       MT_ASSERT(w_offset < len);
-       
-       // search in the first word
-       ix = _ffz(addr + w_offset,ix);
-       if (ix)
-               return ix - 1;
-       
-       // look in the rest
-       for (; ++w_offset < len; ) {
-               ix = _ffz(addr + w_offset,0);
-               if (ix)
-                       return (w_offset * BITS_PER_LONG) + ix - 1;
-       }
-
-       return bits_size;
-       
-}
-
-/**
-* find_first_zero_bit - find the first zero bit in a memory region
-* @addr: The address to start the search at
-* @bits_size: The maximum size to search
-*
-* Returns the bit-number of the first zero bit, not the number of the byte
-* containing a bit. If not found - returns 'bits_size'.
-*/
-static inline int find_first_zero_bit(const unsigned long *addr, unsigned bits_size)
-{
-       int len = BITS_TO_LONGS(bits_size);
-       int i, ix;
-       for (i=0; i<len; i++,addr++) {
-               ix = _ffz(addr,0);
-               if (ix)
-                       return (i * BITS_PER_LONG) + ix - 1;
-       }
-       return bits_size;
-}
-
-/**
-* find_first_bit - find the first set bit in a memory region
-* @addr: The address to start the search at
-* @bits_size: The maximum size to search (in bits)
-*
-* Returns the bit-number of the first set bit, not the number of the byte
-* containing a bit. Returns 'bits_size', if not found
-*/
-static inline int find_first_bit(const unsigned long *addr, unsigned bits_size)
-{
-       int len = BITS_TO_LONGS(bits_size);
-       int i, ix;
-       for (i=0; i<len; i++,addr++) {
-               ix = _ffs(addr,0);
-               if (ix)
-                       return (i * BITS_PER_LONG) + ix - 1;
-       }
-       return bits_size;
-}
-
 
 #endif
index 2b3a215..fbe1162 100644 (file)
@@ -23,6 +23,7 @@
 #include <mt_pcipool.h>
 //#include <mt_byteorder.h>
 #include <complib/cl_timer.h>
+#include <complib/cl_qlist.h>
 #include <hca_debug.h>
 
 
index 4c19376..c43e7f7 100644 (file)
@@ -329,14 +329,15 @@ void free_dma_mem(
 
 
 typedef struct _mt_iobuf_seg {
-  LIST_ENTRY   link;
-  PMDL   mdl_p;
-  u64 va;  /* virtual address of the buffer */
-  u64 size;     /* size in bytes of the buffer */
-  u32 nr_pages;
-  int  is_user;
+       LIST_ENTRY      link;
+       PMDL   mdl_p;
+       u64 va;  /* virtual address of the buffer */
+       u64 size;     /* size in bytes of the buffer */
+       u32 nr_pages;
+       int     is_user;
 } mt_iobuf_seg_t;
 
+// Returns: 0 on success, -ENOMEM or -EACCESS on error
 static int register_segment(
        IN              u64 va,
        IN              u64 size,
@@ -350,42 +351,42 @@ static int register_segment(
        mt_iobuf_seg_t * new_iobuf;
        static ULONG cnt=0;
        LOCK_OPERATION Operation;
-  
-       // set Operation
-       if (acc & IB_AC_LOCAL_WRITE)
-               Operation = IoModifyAccess;
-       else
-               Operation = IoReadAccess;
-       
+
+       // set Operation
+       if (acc & IB_AC_LOCAL_WRITE)
+               Operation = IoModifyAccess;
+       else
+               Operation = IoReadAccess;
+       
        // allocate IOBUF segment object
        new_iobuf = (mt_iobuf_seg_t *)kmalloc(sizeof(mt_iobuf_seg_t), GFP_KERNEL );
-       if (new_iobuf == NULL) {
+       if (new_iobuf == NULL) {
                rc = -ENOMEM;
                goto err_nomem;
-       }
-   
-       // allocate MDL 
-       mdl_p = IoAllocateMdl( (PVOID)(ULONG_PTR)va, (ULONG)size, FALSE,FALSE,NULL);
-       if (mdl_p == NULL) {
-               rc = ENOMEM;
+       }
+
+       // allocate MDL 
+       mdl_p = IoAllocateMdl( (PVOID)(ULONG_PTR)va, (ULONG)size, FALSE,FALSE,NULL);
+       if (mdl_p == NULL) {
+               rc = -ENOMEM;
                goto err_alloc_mdl;
-       }
+       }
 
-       // make context-dependent things
+       // make context-dependent things
        if (is_user) {
                ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL);
-       mode = UserMode;
+       mode = UserMode;
        }
        else {  /* Mapping to kernel virtual address */
                //    MmBuildMdlForNonPagedPool(mdl_p);   // fill MDL ??? - should we do that really ?
-       mode = KernelMode;
-       }
+       mode = KernelMode;
+       }
 
        __try { /* try */
-       MmProbeAndLockPages( mdl_p, mode, Operation );   /* lock memory */
-       } /* try */
+       MmProbeAndLockPages( mdl_p, mode, Operation );   /* lock memory */
+       } /* try */
                
-       __except (EXCEPTION_EXECUTE_HANDLER)    {
+       __except (EXCEPTION_EXECUTE_HANDLER)    {
                HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_MEMORY, 
                        ("MOSAL_iobuf_register: Exception 0x%x on MmProbeAndLockPages(), va %I64d, sz %I64d\n", 
                        GetExceptionCode(), va, size));
@@ -403,11 +404,11 @@ static int register_segment(
        return 0;
 
 err_probe:
-  IoFreeMdl(mdl_p);
+       IoFreeMdl(mdl_p);
 err_alloc_mdl:  
-  ExFreePool((PVOID)new_iobuf);
+       ExFreePool((PVOID)new_iobuf);
 err_nomem:  
-  return rc;
+       return rc;
 }
 
 int iobuf_register(
@@ -417,13 +418,13 @@ int iobuf_register(
        IN              ib_access_t acc,
        IN OUT  mt_iobuf_t *iobuf_p)
 {
-  int rc=0;
-  u64 seg_va = va;     // current segment start
-  u64 seg_size = size; // current segment size
-  u64 rdc = size;                      // remain data counter - what is rest to lock
-  u64 delta;                           // he size of the last not full page of the first segment
-  unsigned page_size = PAGE_SIZE;
-  
+       int rc=0;
+       u64 seg_va;     // current segment start
+       u64 seg_size;   // current segment size
+       u64 rdc;                        // remain data counter - what is rest to lock
+       u64 delta;                              // he size of the last not full page of the first segment
+       unsigned page_size = PAGE_SIZE;
+
 // 32 - for any case  
 #define PFNS_IN_PAGE_SIZE_MDL          ((PAGE_SIZE - sizeof(struct _MDL) - 32) / sizeof(long))
 #define MIN_IOBUF_SEGMENT_SIZE (PAGE_SIZE * PFNS_IN_PAGE_SIZE_MDL)     // 4MB  
@@ -431,54 +432,59 @@ int iobuf_register(
        ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL);
 
        // init IOBUF object
-       InitializeListHead( &iobuf_p->seg_que );
-       iobuf_p->seg_num = 0;
-       if (rdc <= 0)
-               return -EINVAL;
+       InitializeListHead( &iobuf_p->seg_que );
+       iobuf_p->seg_num = 0;
+
+       // Round the seg_va down to a page boundary so that we always get a seg_size
+       // that is an integral number of pages.
+       delta = va & (PAGE_SIZE - 1);
+       seg_va = va - delta;
+       // Since we rounded down the seg_va, we need to round up the rdc and size.
+       seg_size = rdc = size + delta;
                
-       // allocate segments
-       while (rdc > 0) {
-               // map a segment
-               rc = register_segment(seg_va, seg_size, is_user, acc, iobuf_p );
-
-               // success - move to another segment
-               if (!rc) {
-                       rdc -= seg_size;
-                       seg_va += seg_size;
-                       iobuf_p->seg_num++;
-                       if (seg_size > rdc)
-                               seg_size = rdc;
-                       continue;
+       // allocate segments
+       while (rdc > 0) {
+               // map a segment
+               rc = register_segment(seg_va, seg_size, is_user, acc, iobuf_p );
+
+               // success - move to another segment
+               if (!rc) {
+                       rdc -= seg_size;
+                       seg_va += seg_size;
+                       iobuf_p->seg_num++;
+                       if (seg_size > rdc)
+                               seg_size = rdc;
+                       continue;
                }
 
-               // failure - too large a buffer: lessen it and try once more
-               if (rc == -ENOMEM) {
-                       // no where to lessen - too low memory
-                       if (seg_size <= MIN_IOBUF_SEGMENT_SIZE)
-                               break;
-                       // lessen the size
-                       seg_size >>= 1;
-                       // round the segment size to the page boundary (only for the first segment)
-                       if (iobuf_p->seg_num == 0) {
-                               delta = (seg_va + seg_size) & (page_size - 1);
-                               seg_size -= delta;
-                               seg_size += page_size;
-                               if (seg_size > rdc)
-                                       seg_size = rdc;
-                       }
-                       continue;
+               // failure - too large a buffer: lessen it and try once more
+               if (rc == -ENOMEM) {
+                       // no where to lessen - too low memory
+                       if (seg_size <= MIN_IOBUF_SEGMENT_SIZE)
+                               break;
+                       // lessen the size
+                       seg_size >>= 1;
+                       // round the segment size to the page boundary (only for the first segment)
+                       if (iobuf_p->seg_num == 0) {
+                               delta = (seg_va + seg_size) & (page_size - 1);
+                               seg_size -= delta;
+                               seg_size += page_size;
+                               if (seg_size > rdc)
+                                       seg_size = rdc;
+                       }
+                       continue;
                }
 
-               // got unrecoverable error
+               // got unrecoverable error
                break;
        }
 
        // SUCCESS
-       if (rc) 
+       if (rc) 
                iobuf_deregister( iobuf_p );
        else     {
                // fill IOBUF object
-               iobuf_p->va = va;
+               iobuf_p->va = va;
                iobuf_p->size= size;
                iobuf_p->nr_pages = ADDRESS_AND_SIZE_TO_SPAN_PAGES( va, size );
                iobuf_p->is_user = is_user;
@@ -490,24 +496,81 @@ int iobuf_register(
 
 static void deregister_segment(mt_iobuf_seg_t * iobuf_seg_p)
 {
-  MmUnlockPages( iobuf_seg_p->mdl_p );    // unlock the buffer 
-  IoFreeMdl( iobuf_seg_p->mdl_p );        // free MDL
-  ExFreePool(iobuf_seg_p);
+       MmUnlockPages( iobuf_seg_p->mdl_p );    // unlock the buffer 
+       IoFreeMdl( iobuf_seg_p->mdl_p );        // free MDL
+       ExFreePool(iobuf_seg_p);
 }
 
 void iobuf_deregister(mt_iobuf_t *iobuf_p)
 {
-  mt_iobuf_seg_t *iobuf_seg_p;         // pointer to current segment object
-  
-  ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL);
-
-  // release segments
-  while (!IsListEmpty( &iobuf_p->seg_que )) {
-       iobuf_seg_p = (mt_iobuf_seg_t *)(PVOID)RemoveTailList( &iobuf_p->seg_que );
-       deregister_segment(iobuf_seg_p);
-       iobuf_p->seg_num--;
-  }
-  ASSERT(iobuf_p->seg_num == 0);
+       mt_iobuf_seg_t *iobuf_seg_p;    // pointer to current segment object
+
+       ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL);
+
+       // release segments
+       while (!IsListEmpty( &iobuf_p->seg_que )) {
+               iobuf_seg_p = (mt_iobuf_seg_t *)(PVOID)RemoveTailList( &iobuf_p->seg_que );
+               deregister_segment(iobuf_seg_p);
+               iobuf_p->seg_num--;
+       }
+       ASSERT(iobuf_p->seg_num == 0);
+}
+
+void iobuf_iter_init(
+       IN              mt_iobuf_t *iobuf_p, 
+       IN OUT  mt_iobuf_iter_t *iterator_p)
+{
+       iterator_p->seg_p = iobuf_p->seg_que.Flink;
+       iterator_p->pfn_ix = 0;
+}
+
+// the function returns phys addresses of the pages, also for the first page
+// if one wants to get the phys address of the buffer, one has to 
+// add the offset from the start of the page to the first phys address
+// Returns: the number of entries, filled in page_tbl_p
+// Returns 0  while at the end of list.
+uint32_t iobuf_get_tpt_seg(
+       IN              mt_iobuf_t *iobuf_p, 
+       IN OUT  mt_iobuf_iter_t *iterator_p,
+       IN              uint32_t n_pages_in, 
+       IN OUT  uint64_t *page_tbl_p )
+{
+       uint32_t i=0;   // has to be initialized here for a premature exit
+       mt_iobuf_seg_t *seg_p;  // pointer to current segment object 
+       PPFN_NUMBER     pfn_p; 
+       uint32_t        pfn_ix; // index of PFN in PFN array of the current segment
+       uint64_t *pa_buf_p = page_tbl_p;
+
+       // prepare to the loop
+       seg_p = iterator_p->seg_p;      // first segment of the first iobuf
+       pfn_ix= iterator_p->pfn_ix;
+
+       // check, whether we at the end of the list
+       if ((PVOID)seg_p == (PVOID)&iobuf_p->seg_que)
+               goto exit;
+       pfn_p = MmGetMdlPfnArray( seg_p->mdl_p ) + pfn_ix;
+
+       // pass along all the PFN arrays
+       for (; i < n_pages_in; i++, pa_buf_p++) {
+               // convert PFN to the physical address
+               *pa_buf_p = (uint64_t)*pfn_p++ << PAGE_SHIFT;
+       
+               // get to the next PFN 
+               if (++pfn_ix >= seg_p->nr_pages) {
+                       seg_p = (mt_iobuf_seg_t*)seg_p->link.Flink;
+                       pfn_ix = 0;
+                       if ((PVOID)seg_p == (PVOID)&iobuf_p->seg_que) {
+                               i++;
+                               break;
+                       }
+                       pfn_p = MmGetMdlPfnArray( seg_p->mdl_p );
+               }
+       }
+
+exit:
+       iterator_p->seg_p = seg_p;
+       iterator_p->pfn_ix = pfn_ix;
+       return i;
 }
 
 
index 89685a9..0c24911 100644 (file)
@@ -252,16 +252,22 @@ int get_user_pages(
        );
 
 typedef struct _mt_iobuf {
-  u64 va;  /* virtual address of the buffer */
-  u64 size;     /* size in bytes of the buffer */
-  LIST_ENTRY           seg_que;
-  u32 nr_pages;
-  int  is_user;
-  int                          seg_num;
+       u64 va;  /* virtual address of the buffer */
+       u64 size;     /* size in bytes of the buffer */
+       LIST_ENTRY seg_que;
+       u32 nr_pages;
+       int is_user;
+       int seg_num;
 } mt_iobuf_t;
 
+/* iterator for getting segments of tpt */
+typedef struct _mt_iobuf_iter {
+       void * seg_p;  /* the item from where to take the next translations */
+       unsigned int pfn_ix; /* index from where to take the next translation */
+} mt_iobuf_iter_t;
 
 void iobuf_deregister(mt_iobuf_t *iobuf_p);
+
 int iobuf_register(
        IN              u64 va,
        IN              u64 size,
@@ -269,6 +275,15 @@ int iobuf_register(
        IN              ib_access_t acc,
        IN OUT  mt_iobuf_t *iobuf_p);
 
+void iobuf_iter_init(
+       IN              mt_iobuf_t *iobuf_p, 
+       IN OUT  mt_iobuf_iter_t *iterator_p);
+
+uint32_t iobuf_get_tpt_seg(
+       IN              mt_iobuf_t *iobuf_p, 
+       IN OUT  mt_iobuf_iter_t *iterator_p,
+       IN              uint32_t n_pages_in, 
+       IN OUT  uint64_t *page_tbl_p );
 
 unsigned long copy_from_user(void *to, const void *from, unsigned long n);
 unsigned long copy_to_user(void *to, const void *from, unsigned long n);
diff --git a/hw/mthca/kernel/mt_uverbsmem.c b/hw/mthca/kernel/mt_uverbsmem.c
deleted file mode 100644 (file)
index 8ca9649..0000000
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) 2005 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Cisco Systems.  All rights reserved.
- * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * $Id: uverbs_mem.c 2783 2005-07-05 02:21:08Z roland $
- */
-
-#include "ib_verbs.h"
-
-#if defined(EVENT_TRACING)
-#ifdef offsetof
-#undef offsetof
-#endif
-#include "mt_uverbsmem.tmh"
-#endif
-
-void ibv_umem_release(struct ib_device *dev, struct ib_umem *umem)
-{
-       struct ib_umem_chunk *chunk, *tmp;
-       int i;
-
-       list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list,struct ib_umem_chunk,struct ib_umem_chunk) {
-               pci_unmap_sg((struct mthca_dev *)dev, chunk->page_list,
-                            chunk->nents, PCI_DMA_BIDIRECTIONAL);
-               for (i = 0; i < chunk->nents; ++i) {
-                       put_page(&chunk->page_list[i]);
-               }
-               kfree(chunk);
-       }
-}
-
-int ibv_umem_get(struct ib_device *dev, struct ib_umem *mem,
-               void *addr, size_t size, int write)
-{
-       struct ib_umem_chunk *chunk = NULL, *last_chunk;
-       u64 cur_base;
-       unsigned long npages;
-       int ret = -ENOMEM;
-       int i;
-
-       HCA_ENTER(HCA_DBG_MEMORY);
-       /* fill mem */
-       mem->user_base = (u64)(UINT_PTR)addr;
-       mem->length    = size;
-       mem->offset    = (int)(((u64)(UINT_PTR) addr) & ~PAGE_MASK);
-       mem->page_size = PAGE_SIZE;
-       mem->writable  = write;
-       INIT_LIST_HEAD(&mem->chunk_list);
-
-       /* build sg list */
-       npages = (unsigned long)(NEXT_PAGE_ALIGN(size + mem->offset) >> PAGE_SHIFT);
-       if (!npages) {
-               ret = -EINVAL;
-               goto err_inval;
-       }
-       cur_base = (u64)(UINT_PTR)addr & PAGE_MASK;
-       while (npages) {
-               /* allocate a max large chunk (it's <= PAGE_SIZE) */
-               chunk = kzalloc(sizeof *chunk + sizeof (struct scatterlist) *
-                               IB_UMEM_MAX_PAGE_CHUNK, GFP_KERNEL);
-               if (!chunk) 
-                       goto out;
-               list_add_tail(&chunk->list, &mem->chunk_list);
-
-               /* fill the chunk */
-               for (i=0; i < (int)IB_UMEM_MAX_PAGE_CHUNK; i++) {
-
-                       /* map a one page */
-                       ret = get_user_pages((struct mthca_dev *)dev, cur_base,
-                               1, write, &chunk->page_list[i] );
-                       if (ret < 0)
-                               goto out;                                            
-
-                       /* update the chunk */
-                       chunk->nents++; /* number of sg elements */
-
-                       /* calculate the rest of the buffer to handle */
-                       cur_base += PAGE_SIZE;
-                       if (!--npages)
-                               break;
-               }
-
-               /* map all chunk pages */
-               chunk->nmap = pci_map_sg((struct mthca_dev *)dev,
-                       chunk->page_list, chunk->nents, PCI_DMA_BIDIRECTIONAL);
-               if (chunk->nmap <= 0) 
-                       goto out;
-
-       }
-
-       /* shorten the last chunk */
-       ret = 0; /* if we get here - all is OK */
-       last_chunk = chunk;
-       chunk = kzalloc(sizeof *chunk + sizeof (struct scatterlist) *
-                       chunk->nents, GFP_KERNEL);
-       if (!chunk) 
-               goto err_kmalloc;
-       memcpy( chunk, last_chunk, sizeof *last_chunk + sizeof (struct scatterlist) *
-               last_chunk->nents);
-       list_del(&last_chunk->list);
-       list_add_tail(&chunk->list, &mem->chunk_list);
-       kfree(last_chunk);
-       goto exit;
-       
-out:
-       ibv_umem_release(dev, mem);
-err_kmalloc: err_inval:
-exit:
-       return ret;
-}
-
-
index 1ec487a..d358403 100644 (file)
@@ -209,16 +209,36 @@ static int poll_go_bit(struct mthca_dev *dev)
 */
 static int wait_go_bit(struct mthca_dev *dev, unsigned long timeout_usecs)
 {
+#ifdef USE_FAIR_GO_BIT_POLLING 
+//
+// the algorithm polls 'go bit'  N_POLL_TRIES times with a polling interval,
+// increasing from 0 to MAX_POLL_INTERVAL with step of POLL_INTERVAL_DELTA
+//
+// The values of the above contains are set voluntarily.
+// They require evetual tuning for which reason the algorithm is extinguished for now.
+
+               int i = 0;
+#define POLL_INTERVAL_DELTA            5 *(-10)        // 10 usec
+#define MAX_POLL_INTERVAL                      200 *(-10)      // 200 usec
+#define N_POLL_TRIES                           40
+#endif
        u64 start, end;
        LARGE_INTEGER  interval;
 
        if (!go_bit(dev))       return 0;
 
-       interval.QuadPart = -(__int64)(((u64)(timeout_usecs) * 10) /    CMD_POLL_N_TRIES);
+       interval.QuadPart = 0;
        start = cl_get_time_stamp();
        end = start + timeout_usecs;
        while (go_bit(dev) && (cl_get_time_stamp() < end)) {
                KeDelayExecutionThread( KernelMode, FALSE, &interval );
+#ifdef USE_FAIR_GO_BIT_POLLING 
+               if (++i >= N_POLL_TRIES) {
+                       if ( (__int64)interval.QuadPart > (__int64)MAX_POLL_INTERVAL)
+                               interval.QuadPart += POLL_INTERVAL_DELTA;
+                       i = 0;
+               }
+#endif         
        }
 
        if (!go_bit(dev))       return 0;
index ca60593..5c36da1 100644 (file)
 #include "mthca_cmd.h"
 #include "mthca_memfree.h"
 
- void ibv_umem_release(struct ib_device *dev, struct ib_umem *umem);
- int ibv_umem_get(struct ib_device *dev, struct ib_umem *mem,
-                void *addr, size_t size, int write);
  static void init_query_mad(struct ib_smp *mad)
  {
         mad->base_version      = 1;
@@ -996,21 +992,27 @@ struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd,
        void* __ptr64   vaddr, uint64_t length, uint64_t hca_va, mthca_qp_access_t acc)
 {
        struct mthca_dev *dev = to_mdev(pd->device);
-       struct ib_umem_chunk *chunk;
        struct mthca_mr *mr;
-       struct ib_umem *region;
        u64 *pages;
-       int shift, n, len;
-       int i, j, k;
        int err = 0;
+       uint32_t i, n;
+       mt_iobuf_t *iobuf_p;
+       mt_iobuf_iter_t iobuf_iter;
+
+       /*
+        * Be friendly to WRITE_MTT command and leave two 
+        * empty slots for the  index and reserved fields of the mailbox.
+        */
+       int max_buf_list_size = PAGE_SIZE / sizeof (u64) - 2;
 
        HCA_ENTER(HCA_DBG_MEMORY);
+
+
        mr = kzalloc(sizeof *mr, GFP_KERNEL);
        if (!mr) {
                err = -ENOMEM;
                goto err_nomem;
        }
-       region = &mr->umem;
 
        /*
         * We ask for writable memory if any access flags other than
@@ -1019,64 +1021,53 @@ struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd,
         * things like fetch and add, which will modify memory, and
         * "MW bind" can change permissions by binding a window.
         */
-       err = ibv_umem_get(pd->device, region,
-                         (void *)vaddr, (size_t)length,
-                         !!(acc & ~MTHCA_ACCESS_REMOTE_READ));
-       if (err)
-               goto err_umem_get;
-
-       region->virt_base = hca_va;     /* va in HCA */
 
-       n = 0;
-       shift = ffs(region->page_size) - 1;
-       list_for_each_entry(chunk, &region->chunk_list, list,struct ib_umem_chunk)
-               n += chunk->nents;
+       // try register the buffer
+       iobuf_p = &mr->iobuf;
+       err =  iobuf_register( (u64)vaddr, length, TRUE, 
+               (acc & ~MTHCA_ACCESS_REMOTE_READ) ? IB_AC_LOCAL_WRITE : 0, iobuf_p );
+       if (err)
+               goto err_reg_mem;
 
-       mr->mtt = mthca_alloc_mtt(dev, n);
+       // allocate MTT's
+       mr->mtt = mthca_alloc_mtt(dev, iobuf_p->nr_pages);
        if (IS_ERR(mr->mtt)) {
                err = PTR_ERR(mr->mtt);
                goto err_alloc_mtt;
        }
 
+       // allocate buffer_list for writing MTT's
        pages = (u64 *) kmalloc(PAGE_SIZE,GFP_KERNEL);
        if (!pages) {
                err = -ENOMEM;
                goto err_pages;
        }
 
-       i = n = 0;
-
-       list_for_each_entry(chunk, &region->chunk_list, list,struct ib_umem_chunk)
-               for (j = 0; j < chunk->nmap; ++j) {
-                       len = sg_dma_len(&chunk->page_list[j]) >> shift;
-                       for (k = 0; k < len; ++k) {
-                               pages[i++] = sg_dma_address(&chunk->page_list[j]) +
-                                       region->page_size * k;
-                               /*
-                                * Be friendly to WRITE_MTT command
-                                * and leave two empty slots for the
-                                * index and reserved fields of the
-                                * mailbox.
-                                */
-                               if (i == PAGE_SIZE / sizeof (u64) - 2) {
-                                       err = mthca_write_mtt(dev, mr->mtt,
-                                                             n, pages, i);
-                                       if (err)
-                                               goto err_write_mtt;
-                                       n += i;
-                                       i = 0;
-                               }
-                       }
-               }
+       // write MTT's
+       iobuf_iter_init( iobuf_p, &iobuf_iter );
+       n = 0;
+       for (;;) {
+               // get up to  max_buf_list_size page physical addresses
+               i = iobuf_get_tpt_seg( iobuf_p, &iobuf_iter, max_buf_list_size, pages );
+               if (!i)
+                       break;
 
-       if (i) {
+               //TODO: convert physical adresses to dma one's
+
+               // write 'i' dma addresses
                err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
                if (err)
                        goto err_write_mtt;
-       }       
+               n += i;
+               if (n >= iobuf_p->nr_pages)
+                       break;
+       }
 
-       err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, region->virt_base,
-                            region->length, map_qp_mpt(acc), mr);
+       CL_ASSERT(n == iobuf_p->nr_pages);
+       
+       // write MPT
+       err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, PAGE_SHIFT, hca_va,
+               length, map_qp_mpt(acc), mr);
        if (err)
                goto err_mt_alloc;
 
@@ -1101,6 +1092,7 @@ struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd,
        }
 done:  
        free_page((void*) pages);
+
        HCA_EXIT(HCA_DBG_MEMORY);
        return &mr->ibmr;
 
@@ -1111,10 +1103,11 @@ err_write_mtt:
 err_pages:
        mthca_free_mtt(dev, mr->mtt);
 err_alloc_mtt:
-       ibv_umem_release(pd->device, region);
-err_umem_get:  
+       iobuf_deregister(iobuf_p);
+err_reg_mem:   
        kfree(mr);
 err_nomem:     
+
        HCA_EXIT(HCA_DBG_MEMORY);
        return ERR_PTR(err);
 }
@@ -1122,9 +1115,11 @@ err_nomem:
 int mthca_dereg_mr(struct ib_mr *mr)
 {
        struct mthca_mr *mmr = to_mmr(mr);
-       mthca_free_mr(to_mdev(mr->device), mmr);
+       struct mthca_dev* dev = to_mdev(mr->device);
+
+       mthca_free_mr(dev, mmr);
        if (mr->pd->ucontext) {
-               ibv_umem_release(mr->pd->device, &mmr->umem);
+               iobuf_deregister(&mmr->iobuf);
                MmUnsecureVirtualMemory ( mmr->secure_handle );
        }
        kfree(mmr);
index bc1523f..91a2d8b 100644 (file)
@@ -77,7 +77,7 @@ struct mthca_mr {
        //NB: the structure was not inserted here for not to mix driver and provider structures
        struct ib_mr      ibmr;
        struct mthca_mtt *mtt;
-       struct ib_umem umem;
+       mt_iobuf_t      iobuf;
        void *secure_handle;
 };
 
index 1fd54f7..b9734df 100644 (file)
@@ -1516,6 +1516,10 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct _ib_send_wr *wr,
 
        ind = qp->sq.next_ind;
 
+       /* prepare fence bit for the doorbell */
+       if (wr->send_opt & IB_SEND_OPT_FENCE)
+               f0 = 1 << 5;
+
        for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
                if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
                        HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP,("SQ %06x full (%u head, %u tail,"
@@ -1867,6 +1871,10 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct _ib_send_wr *wr,
 
        ind = qp->sq.head & (qp->sq.max - 1);
 
+       /* prepare fence bit for the doorbell */
+       if (wr->send_opt & IB_SEND_OPT_FENCE)
+               f0 = 1 << 5;
+
        for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
                if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) {
                        nreq = 0;
diff --git a/hw/mthca/mt_utils.c b/hw/mthca/mt_utils.c
new file mode 100644 (file)
index 0000000..586014e
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2005 SilverStorm Technologies.  All rights reserved.
+ *
+ * This software is available to you under the OpenIB.org BSD license
+ * below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: hca_driver.c 46 2005-05-30 17:55:53Z sleybo $
+ */
+
+#include <mt_utils.h>
+
+/* Nth element of the table contains the index of the first set bit of N; 8 - for N=0 */
+char g_set_bit_tbl[256];
+
+/* Nth element of the table contains the index of the first 0 bit of N; 8 - for N=255 */
+char g_clr_bit_tbl[256];
+
+void fill_bit_tbls()
+{      
+       unsigned long i;
+       for (i=0; i<256; ++i) {
+               g_set_bit_tbl[i] = (char)(_ffs_raw(&i,0) - 1);
+               g_clr_bit_tbl[i] = (char)(_ffz_raw(&i,0) - 1);
+       }
+       g_set_bit_tbl[0] = g_clr_bit_tbl[255] = 8;
+}
+
+
diff --git a/hw/mthca/mt_utils.h b/hw/mthca/mt_utils.h
new file mode 100644 (file)
index 0000000..8f0b563
--- /dev/null
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2005 SilverStorm Technologies.  All rights reserved.
+ *
+ * This software is available to you under the OpenIB.org BSD license
+ * below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: hca_driver.c 46 2005-05-30 17:55:53Z sleybo $
+ */
+
+
+#ifndef MT_UTILS_H
+#define MT_UTILS_H
+
+// Nth element of the table contains the index of the first set bit of N; 8 - for N=0
+extern char g_set_bit_tbl[256];
+// Nth element of the table contains the index of the first cleared bit of N; 8 - for N=0
+extern char g_clr_bit_tbl[256];
+
+// DECLARE_BITMAP
+#define BITS_PER_LONG          32
+#define BITS_TO_LONGS(bits) \
+        (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
+
+/*
+* fls: find last bit set.
+* returns: 0 - if not found or N+1, if found Nth bit
+*/
+
+static __inline int fls(int x)
+{
+       int r = 32;
+
+       if (!x)
+               return 0;
+       if (!(x & 0xffff0000u)) {
+               x <<= 16;
+               r -= 16;
+       }
+       if (!(x & 0xff000000u)) {
+               x <<= 8;
+               r -= 8;
+       }
+       if (!(x & 0xf0000000u)) {
+               x <<= 4;
+               r -= 4;
+       }
+       if (!(x & 0xc0000000u)) {
+               x <<= 2;
+               r -= 2;
+       }
+       if (!(x & 0x80000000u)) {
+               x <<= 1;
+               r -= 1;
+       }
+       return r;
+}
+
+/**
+* _ffs_raw - find the first one bit in a word
+* @addr: The address to start the search at
+* @offset: The bitnumber to start searching at
+*
+* returns: 0 - if not found or N+1, if found Nth bit
+*/
+static __inline int _ffs_raw(const unsigned long *addr, int offset)
+{
+       //TODO: not an effective code - is better in Assembler
+       int mask;
+       int rbc;
+       int ix;
+       if (!*addr) return 0;
+       mask = 1 << offset;
+       rbc = BITS_PER_LONG - offset;
+       for (ix=0; ix<rbc; ix++, mask<<=1) {
+               if (*addr & mask)
+                       return offset + ix + 1;
+       }
+       return 0;
+}
+
+// as previous with offset = 0
+static __inline int _ffs(const unsigned long *addr)
+{
+       unsigned char *ptr = (unsigned char *)addr;
+       if (!*addr) return 0;                                   // skip sero dword
+       if (!*(short*)ptr) ptr += 2;                            // get to the non-zero word
+       if (!*(char*)ptr) ptr++;                                                // get to the non-zero byte
+       return (int)(((ptr - (unsigned char *)addr ) << 3) + g_set_bit_tbl[*ptr] + 1);
+}
+
+
+#define ffs(val)       _ffs((const unsigned long *)&val)
+
+/**
+* _ffz_raw - find the first zero bit in a word
+* @addr: The address to start the search at
+* @offset: The bitnumber to start searching at
+*
+* returns: 0 - if not found or N+1, if found Nth bit
+*/
+static __inline int _ffz_raw(const unsigned long *addr, int offset)
+{
+       //TODO: not an effective code - is better in Assembler
+       int mask;
+       int rbc;
+       int ix;
+       if (!~*addr) return 0;
+       mask = 1 << offset;
+       rbc = BITS_PER_LONG - offset;
+       for (ix=0; ix<rbc; ix++, mask<<=1) {
+               if (!(*addr & mask))
+                       return offset + ix + 1;
+       }
+       return 0;
+}
+
+
+// as previous with offset = 0
+static __inline int _ffz(const unsigned long *addr)
+{
+       unsigned char *ptr = (unsigned char *)addr;
+       if (!~*addr) return 0;                                  // skip sero dword
+       if (!~*(short*)ptr) ptr += 2;                           // get to the non-zero word
+       if (!~*(char*)ptr) ptr++;                                               // get to the non-zero byte
+       return (int)(((ptr - (unsigned char *)addr ) << 3) + g_clr_bit_tbl[*ptr] + 1);
+}
+
+#define ffz(val)       _ffz((const unsigned long *)&val)
+
+// Function: 
+//     finds the first bit, set in the bitmap
+// Parameters:
+//     ptr     - address of the bitmap
+//     bits_size       - the size in bits
+// Returns:
+//     the index of the first bit set; 'bits_size' - when there is noone
+// Notes:
+//     presumes, that ptr is aligned on dword
+//     presumes, that the map contains an integer number of dwords
+//     on bits_size=0 will return 0, but its an illegal case
+//
+static __inline int find_first_bit(const unsigned long *addr, unsigned bits_size)
+{
+       unsigned char *ptr = (unsigned char *)addr;             // bitmap start
+       unsigned char *end_ptr = (unsigned char *)(addr + BITS_TO_LONGS(bits_size));    // bitmap end
+
+       while (ptr<end_ptr) {
+               if (!*(int*)ptr) { ptr += 4; continue; }        // skip zero dword
+               if (!*(short*)ptr) ptr += 2;                            // get to the non-zero word
+               if (!*(char*)ptr) ptr++;                                                // get to the non-zero byte
+               return (int)(((ptr - (unsigned char *)addr ) << 3) + g_set_bit_tbl[*ptr]);
+       }
+       return bits_size;
+}
+
+static __inline int find_first_zero_bit(const unsigned long *addr, unsigned bits_size)
+{
+       unsigned char *ptr = (unsigned char *)addr;             // bitmap start
+       unsigned char *end_ptr = (unsigned char *)(addr + BITS_TO_LONGS(bits_size));    // bitmap end
+
+       while (ptr<end_ptr) {
+               if (!~*(int*)ptr) { ptr += 4; continue; }       // skip dword w/o zero bits
+               if (!~*(short*)ptr) ptr += 2;                           // get to the word with zero bits
+               if (!~*(char*)ptr) ptr++;                                               // get to the byte with zero bits
+               return (int)(((ptr - (unsigned char *)addr ) << 3) + g_clr_bit_tbl[*ptr]);
+       }
+       return bits_size;
+}
+
+
+/**
+* find_next_zero_bit - find the first zero bit in a memory region
+* @addr: The address to base the search on
+* @offset: The bitnumber to start searching at
+* @bits_size: The maximum size to search
+*
+* Returns the bit-number of the first zero bit, not the number of the byte
+* containing a bit. If not found - returns 'size'
+*/
+static __inline int find_next_zero_bit(const unsigned long *addr, int bits_size, int offset)
+{      
+       int res;
+       int ix = offset % BITS_PER_LONG;
+       int w_offset = offset / BITS_PER_LONG;
+
+       // search in the first word while we are in the middle
+       if (ix) {
+               res = _ffz_raw(addr + w_offset, ix);
+               if (res)
+                       return res - 1;
+               ++addr;
+               bits_size -= BITS_PER_LONG;
+               ix = BITS_PER_LONG;
+       }
+
+       res = find_first_zero_bit( addr, bits_size );
+       return res + ix;
+}
+
+void fill_bit_tbls();
+
+#endif
+
index 5ff4f7b..4e362d5 100644 (file)
@@ -19,6 +19,9 @@ ENABLE_EVENT_TRACING=1
 !endif\r
 \r
 SOURCES= \\r
+                                       \\r
+       ..\mt_utils.c           \\r
+                                       \\r
        mlnx_uvp.rc \\r
        mlnx_ual_av.c \\r
        mlnx_ual_ca.c \\r
index fd65af0..591644e 100644 (file)
@@ -32,6 +32,7 @@
 \r
 #include <tchar.h>\r
 #include <stdlib.h>\r
+#include <mt_l2w.h>\r
 #include "mlnx_ual_main.h"\r
 \r
 #if defined(EVENT_TRACING)\r
 \r
 uint32_t       mlnx_dbg_lvl = 0; // MLNX_TRACE_LVL_8;\r
 \r
-\r
 static void uvp_init();\r
 \r
-\r
 extern BOOL APIENTRY\r
 _DllMainCRTStartupForGS(\r
        IN                              HINSTANCE                                       h_module,\r
@@ -74,7 +73,8 @@ DllMain(
                        return FALSE;\r
                }\r
 \r
-               //uvp_init();\r
+               fill_bit_tbls();\r
+               uvp_init();\r
                break;\r
 \r
         case DLL_PROCESS_DETACH:\r
index b09ce3f..b382166 100644 (file)
@@ -152,22 +152,22 @@ mlnx_poll_cq (
                OUT                     ib_wc_t**       const                   pp_done_wclist )\r
 {\r
        int err;\r
-    ib_api_status_t status = IB_SUCCESS;\r
-    mlnx_ual_hobul_t *p_hobul;\r
-    mlnx_ual_cq_info_t *p_cq_info = (mlnx_ual_cq_info_t *)((void*) h_cq);\r
+       ib_api_status_t status = IB_SUCCESS;\r
+       mlnx_ual_hobul_t *p_hobul;\r
+       mlnx_ual_cq_info_t *p_cq_info = (mlnx_ual_cq_info_t *)((void*) h_cq);\r
 \r
-    UVP_ENTER(UVP_DBG_CQ);\r
-    CL_ASSERT (p_cq_info);\r
+       UVP_ENTER(UVP_DBG_CQ);\r
+       CL_ASSERT (p_cq_info);\r
 \r
-    p_hobul = (mlnx_ual_hobul_t *) p_cq_info->p_hobul;\r
-    CL_ASSERT (p_hobul);\r
+       p_hobul = (mlnx_ual_hobul_t *) p_cq_info->p_hobul;\r
+       CL_ASSERT (p_hobul);\r
 \r
-    if (!pp_free_wclist || !*pp_free_wclist || !pp_done_wclist)\r
-    {\r
-        UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_CQ ,("Passed in bad params\n")); \r
-        status = IB_INVALID_PARAMETER;\r
+       if (!pp_free_wclist || !*pp_free_wclist || !pp_done_wclist)\r
+       {\r
+               UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_CQ ,("Passed in bad params\n")); \r
+               status = IB_INVALID_PARAMETER;\r
                goto err_invalid_params;\r
-    }\r
+       }\r
 \r
        err = p_hobul->ibv_ctx->ops.poll_cq_list(p_cq_info->ibv_cq, pp_free_wclist, pp_done_wclist );\r
        if (err) {\r
index 53d35a4..9d13669 100644 (file)
@@ -165,7 +165,7 @@ static void dump_cqe(uint32_t print_lvl, void *cqe_ptr)
        int i;
        (void) cqe;     /* avoid warning if mthca_dbg compiled away... */
 
-       UVP_PRINT(print_lvl,UVP_DBG_CQ,("CQE content \n "));
+       UVP_PRINT(print_lvl,UVP_DBG_CQ,("CQE content \n"));
        UVP_PRINT(print_lvl,UVP_DBG_CQ,(" [%2x] %08x %08x %08x %08x \n",0
                , cl_ntoh32(cqe[0]), cl_ntoh32(cqe[1]), cl_ntoh32(cqe[2]), cl_ntoh32(cqe[3])));
        UVP_PRINT(print_lvl,UVP_DBG_CQ,(" [%2x] %08x %08x %08x %08x\n",16
index 04c17c4..9f9f771 100644 (file)
@@ -165,6 +165,11 @@ int mthca_tavor_post_send(struct ibv_qp *ibqp, struct _ib_send_wr *wr,
                        *bad_wr = wr;
                goto err_busy;
        }
+
+       /* prepare fence bit for the doorbell */
+       if (wr->send_opt & IB_SEND_OPT_FENCE)
+               f0 = 1 << 5;
+
        for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
 
                if (mthca_wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
@@ -549,6 +554,11 @@ int mthca_arbel_post_send(struct ibv_qp *ibqp, struct _ib_send_wr *wr,
                        *bad_wr = wr;
                goto err_busy;
        }
+
+       /* prepare fence bit for the doorbell */
+       if (wr->send_opt & IB_SEND_OPT_FENCE)
+               f0 = 1 << 5;
+
        for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
                if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) {
                        nreq = 0;
index e9a572a..9f204d2 100644 (file)
@@ -13,6 +13,7 @@
 #include <errno.h>
 #include <complib/cl_memory.h>
 //#include <malloc.h>
+#include <mt_utils.h>
 
 
 // ===========================================
 // - error can't be bigger than 1000
 #define IS_ERR(ptr)                            ((ULONG_PTR)ptr > (ULONG_PTR)-1000L)
 
-//-------------------------------------------------------
-// from mt_bitmap.h
-
-#define BITS_PER_LONG          32
-#define BITS_TO_LONGS(bits) \
-       (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
-
-/**
-* _ffs - find the first one bit in a word
-* @addr: The address to start the search at
-* @offset: The bitnumber to start searching at
-*
-* returns: 0 - if not found or N+1, if found Nth bit
-*/
-static inline int _ffs(const unsigned long *addr, int offset)
-{
-       //TODO: not an effective code - is better in Assembler
-       int mask = 1 << offset;
-       int rbc = BITS_PER_LONG - offset;
-       int ix;
-       for (ix=0; ix<rbc; ix++, mask<<=1) {
-               if (*addr & mask)
-                       return offset + ix + 1;
-       }
-       return 0;
-}
-
-#define ffs(val)               _ffs(&val,0)
-#define ffsl(val)      ffs(val)
+#define ffsl(val)              ffs(val)
 
 extern size_t g_page_size;