[WSD] Fix MR caching to properly flush both local and RDMA registrations.
authorftillier <ftillier@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Fri, 7 Oct 2005 18:39:18 +0000 (18:39 +0000)
committerftillier <ftillier@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Fri, 7 Oct 2005 18:39:18 +0000 (18:39 +0000)
Signed-off-by: Fab Tillier (ftillier@silverstorm.com)
git-svn-id: svn://openib.tc.cornell.edu/gen1/trunk@102 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86

ulp/wsd/user/extensions.c
ulp/wsd/user/ibsp_iblow.c
ulp/wsd/user/ibsp_mem.c
ulp/wsd/user/ibsp_pnp.c
ulp/wsd/user/ibspdll.c
ulp/wsd/user/ibspproto.h
ulp/wsd/user/ibspstruct.h
ulp/wsd/user/sockinfo.c

index 83f95c6..3de9b07 100644 (file)
@@ -87,7 +87,7 @@ IBSPRegisterMemory(
                return NULL;\r
        }\r
 \r
-       node = ib_register_memory( &socket_info->buf_mem_list, socket_info->hca_pd,\r
+       node = ibsp_reg_mem( socket_info, socket_info->hca_pd,\r
                lpBuffer, dwBufferLength, access_ctrl, lpErrno );\r
 \r
        fzprint(("%s():%d:0x%x:0x%x: registering MEM from %p to %p, len %d, handle %p\n",\r
@@ -98,7 +98,7 @@ IBSPRegisterMemory(
        if( node == NULL )\r
        {\r
                IBSP_ERROR_EXIT(\r
-                       ("ib_register_memory failed (pd=%p)\n", socket_info->hca_pd) );\r
+                       ("ibsp_reg_mem failed (pd=%p)\n", socket_info->hca_pd) );\r
                *lpErrno = WSAENOBUFS;\r
        }\r
        else\r
@@ -138,8 +138,7 @@ IBSPDeregisterMemory(
                return SOCKET_ERROR;\r
        }\r
 \r
-       ret = ib_deregister_memory( &socket_info->buf_mem_list, node, lpErrno );\r
-\r
+       ret = ibsp_dereg_mem( socket_info, node, lpErrno );\r
 \r
        fzprint(("%s():%d:0x%x:0x%x: unregistering MEM %p, mr_num=%d, ret=%d\n",\r
                         __FUNCTION__,\r
@@ -147,7 +146,6 @@ IBSPDeregisterMemory(
                         GetCurrentThreadId(), node, g_ibsp.mr_num, ret));\r
 \r
        IBSP_EXIT( IBSP_DBG_MEM );\r
-\r
        return ret;\r
 }\r
 \r
@@ -225,12 +223,13 @@ IBSPRegisterRdmaMemory(
 \r
        hca = socket_info->port->hca;\r
 \r
-       node = ib_register_memory( &hca->rdma_mem_list, hca->pd,\r
+       /** TODO: Fix locking so we dont' dereference node outside of mutex. */\r
+       node = ibsp_reg_mem( socket_info, hca->pd,\r
                lpBuffer, dwBufferLength, access_ctrl, lpErrno );\r
 \r
-       if( node == NULL )\r
+       if( !node )\r
        {\r
-               IBSP_ERROR_EXIT( ("ib_register_memory failed %d\n", *lpErrno) );\r
+               IBSP_ERROR_EXIT( ("ibsp_reg_mem failed %d\n", *lpErrno) );\r
                *lpErrno = WSAENOBUFS;\r
                return SOCKET_ERROR;\r
        }\r
@@ -238,15 +237,16 @@ IBSPRegisterRdmaMemory(
        desc = lpRdmaBufferDescriptor;\r
 \r
        desc->iova = (uint64_t) (uintptr_t) lpBuffer;\r
-       desc->lkey = node->lkey;\r
-       desc->rkey = node->rkey;\r
+       desc->lkey = node->p_reg->lkey;\r
+       desc->rkey = node->p_reg->rkey;\r
        desc->node = node;\r
 \r
        *lpErrno = 0;\r
 \r
-       fzprint(("%s(): registering RDMA MEM from %p to %p, len %d, handle %p\n",\r
-                        __FUNCTION__, lpBuffer, (unsigned char *)lpBuffer + dwBufferLength,\r
-                        dwBufferLength, node));\r
+       IBSP_TRACE1( IBSP_DBG_MEM,\r
+               ("Socket %p registered RDMA MEM at %p, len %d, for access %d, "\r
+               "returning handle %p, rkey %08x\n",\r
+               s, lpBuffer, dwBufferLength, dwFlags, node, desc->rkey));\r
 \r
        IBSP_EXIT( IBSP_DBG_MEM );\r
 \r
@@ -267,7 +267,6 @@ IBSPDeregisterRdmaMemory(
        struct rdma_memory_desc *desc;\r
        struct ibsp_socket_info *socket_info = (struct ibsp_socket_info *)s;\r
        int ret;\r
-       struct ibsp_hca *hca;\r
 \r
        IBSP_ENTER( IBSP_DBG_MEM );\r
 \r
@@ -283,7 +282,6 @@ IBSPDeregisterRdmaMemory(
                return SOCKET_ERROR;\r
        }\r
 \r
-\r
        CL_ASSERT( lpRdmaBufferDescriptor );\r
 \r
        if( dwDescriptorLength < sizeof(struct rdma_memory_desc) )\r
@@ -296,15 +294,13 @@ IBSPDeregisterRdmaMemory(
 \r
        desc = lpRdmaBufferDescriptor;\r
 \r
-       hca = socket_info->port->hca;\r
-       ret = ib_deregister_memory( &hca->rdma_mem_list, desc->node, lpErrno );\r
+       ret = ibsp_dereg_mem( socket_info, desc->node, lpErrno );\r
 \r
        fzprint(("%s():%d:0x%x:0x%x: Unregistering RDMA MEM %p\n",\r
                         __FUNCTION__, __LINE__, GetCurrentProcessId(),\r
                         GetCurrentThreadId(), desc->node));\r
 \r
        IBSP_EXIT( IBSP_DBG_MEM );\r
-\r
        return ret;\r
 }\r
 \r
@@ -414,7 +410,8 @@ do_rdma_op(
        {\r
                local_ds[ds_idx].vaddr = (uint64_t)(void* __ptr64)lpBuffers[ds_idx].buf;\r
                local_ds[ds_idx].length = lpBuffers[ds_idx].len;\r
-               local_ds[ds_idx].lkey = ((struct memory_node*)lpBuffers[ds_idx].handle)->lkey;\r
+               local_ds[ds_idx].lkey =\r
+                       ((struct memory_node*)lpBuffers[ds_idx].handle)->p_reg->lkey;\r
 \r
                lpOverlapped->InternalHigh += lpBuffers[ds_idx].len;\r
        }\r
@@ -476,9 +473,10 @@ do_rdma_op(
                *lpErrno = WSA_IO_PENDING;\r
 \r
                IBSP_TRACE1( IBSP_DBG_IO,\r
-                       ("Posted RDMA: socket=%p, ov=%p, type=%d, local=%p dest=%I64x, len=%d\n",\r
-                       s, lpOverlapped, wr_type, lpBuffers[0].buf,\r
-                       send_wr.remote_ops.vaddr, lpBuffers[0].len) );\r
+                       ("Posted RDMA: socket=%p, ov=%p, type=%d, local=%p, len=%d, "\r
+                       "dest=%016I64x, rkey=%08x\n",\r
+                       s, lpOverlapped, wr_type, lpBuffers[0].buf, lpBuffers[0].len,\r
+                       send_wr.remote_ops.vaddr, send_wr.remote_ops.rkey) );\r
 \r
                fzprint(("posted RDMA %p, len=%d, op=%d, mr handle=%p\n",\r
                                lpOverlapped, lpBuffers[0].len, wr_type, node));\r
@@ -626,12 +624,22 @@ IBSPMemoryRegistrationCacheCallback(
        IN                              SIZE_T                                          Size,\r
                OUT                     LPINT                                           lpErrno )\r
 {\r
+       cl_list_item_t          *p_item;\r
+\r
        IBSP_ENTER( IBSP_DBG_MEM );\r
 \r
-       UNUSED_PARAM( lpvAddress );\r
-       UNUSED_PARAM( Size );\r
        UNUSED_PARAM( lpErrno );\r
 \r
+       cl_spinlock_acquire( &g_ibsp.hca_mutex );\r
+       for( p_item = cl_qlist_head( &g_ibsp.hca_list );\r
+               p_item != cl_qlist_end( &g_ibsp.hca_list );\r
+               p_item = cl_qlist_next( p_item ) )\r
+       {\r
+               ibsp_hca_flush_mr_cache(\r
+                       PARENT_STRUCT( p_item, struct ibsp_hca, item ), lpvAddress, Size );\r
+       }\r
+       cl_spinlock_release( &g_ibsp.hca_mutex );\r
+\r
        IBSP_EXIT( IBSP_DBG_MEM );\r
        return 0;\r
 }\r
index 0091137..54424bb 100644 (file)
@@ -1130,7 +1130,7 @@ shutdown_and_destroy_socket_info(
        while( socket_info->send_cnt || socket_info->recv_cnt )\r
                ib_cq_comp( socket_info->cq_tinfo );\r
 \r
-       ib_deregister_all_mr( &socket_info->buf_mem_list );\r
+       ibsp_dereg_socket( socket_info );\r
 \r
        ib_destroy_socket( socket_info );\r
 \r
index 007e79b..3e13c7f 100644 (file)
 #include "ibspdll.h"\r
 \r
 \r
-/* Find the first registered mr that matches the given region. \r
- * mem_list is either socket_info->buf_mem_list or socket_info->rdma_mem_list.\r
- * mem_list_mutex must be taken.\r
- */\r
-static inline struct memory_node *\r
-__lookup_partial_mr(\r
-       IN                              struct mr_list                          *mem_list,\r
+__forceinline boolean_t\r
+__check_mr(\r
+       IN                              struct memory_reg                       *p_reg,\r
        IN                              ib_access_t                                     acl_mask,\r
        IN                              void                                            *start,\r
        IN                              size_t                                          len )\r
 {\r
-       cl_list_item_t *item;\r
-\r
-       for( item = cl_qlist_head( &mem_list->list );\r
-               item != cl_qlist_end( &mem_list->list );\r
-               item = cl_qlist_next( item ) )\r
-       {\r
-               struct memory_node *node = PARENT_STRUCT(item, struct memory_node, item);\r
-\r
-               if( (node->type.access_ctrl & acl_mask) == acl_mask &&\r
-                       start >= node->type.vaddr &&\r
-                       ((unsigned char *)start) + len <=\r
-                       ((unsigned char * __ptr64)node->type.vaddr) + node->type.length )\r
-               {\r
-                       return node;\r
-               }\r
-       }\r
-\r
-       return NULL;\r
+       return( (p_reg->type.access_ctrl & acl_mask) == acl_mask &&\r
+               start >= p_reg->type.vaddr &&\r
+               ((uintn_t)start) + len <=\r
+               ((uintn_t)(uint64_t)p_reg->type.vaddr) + p_reg->type.length );\r
 }\r
 \r
 \r
@@ -71,27 +53,36 @@ __lookup_partial_mr(
  */\r
 struct memory_node *\r
 lookup_partial_mr(\r
-       IN                              struct mr_list                          *mem_list,\r
+       IN                              struct ibsp_socket_info         *s,\r
        IN                              ib_access_t                                     acl_mask,\r
        IN                              void                                            *start,\r
        IN                              size_t                                          len )\r
 {\r
-       struct memory_node *node;\r
+       struct memory_node      *p_node;\r
+       cl_list_item_t          *p_item;\r
 \r
        IBSP_ENTER( IBSP_DBG_MEM );\r
 \r
-       cl_spinlock_acquire( &mem_list->mutex );\r
-\r
-       node = __lookup_partial_mr( mem_list, acl_mask, start, len );\r
+       cl_spinlock_acquire( &s->port->hca->rdma_mem_list.mutex );\r
 \r
-       cl_spinlock_release( &mem_list->mutex );\r
+       for( p_item = cl_qlist_head( &s->mr_list );\r
+               p_item != cl_qlist_end( &s->mr_list );\r
+               p_item = cl_qlist_next( p_item ) )\r
+       {\r
+               p_node = PARENT_STRUCT( p_item, struct memory_node, socket_item );\r
+               \r
+               if( __check_mr( p_node->p_reg, acl_mask, start, len ) )\r
+               {\r
+                       cl_spinlock_release( &s->port->hca->rdma_mem_list.mutex );\r
+                       IBSP_EXIT( IBSP_DBG_MEM );\r
+                       return p_node;\r
+               }\r
+       }\r
 \r
-       if( node )\r
-               IBSP_EXIT( IBSP_DBG_MEM );\r
-       else\r
-               IBSP_TRACE_EXIT( IBSP_DBG_MEM, ("mr not found\n") );\r
+       cl_spinlock_release( &s->port->hca->rdma_mem_list.mutex );\r
 \r
-       return node;\r
+       IBSP_TRACE_EXIT( IBSP_DBG_MEM, ("mr not found\n") );\r
+       return NULL;\r
 }\r
 \r
 \r
@@ -99,16 +90,18 @@ lookup_partial_mr(
  * mem_list is either socket_info->buf_mem_list or hca->rdma_mem_list.\r
  */\r
 struct memory_node *\r
-ib_register_memory(\r
-       IN                              struct mr_list                          *mem_list,\r
+ibsp_reg_mem(\r
+       IN                              struct ibsp_socket_info         *s,\r
        IN                              ib_pd_handle_t                          pd,\r
        IN                              void                                            *start,\r
        IN                              size_t                                          len,\r
        IN                              ib_access_t                                     access_ctrl,\r
                OUT                     LPINT                                           lpErrno )\r
 {\r
-       struct memory_node *node;\r
-       ib_api_status_t status;\r
+       struct memory_node      *p_node;\r
+       struct memory_reg       *p_reg;\r
+       cl_list_item_t          *p_item;\r
+       ib_api_status_t         status;\r
 \r
        IBSP_ENTER( IBSP_DBG_MEM );\r
 \r
@@ -117,55 +110,73 @@ ib_register_memory(
        CL_ASSERT( (access_ctrl & ~(IB_AC_RDMA_READ | IB_AC_RDMA_WRITE | IB_AC_LOCAL_WRITE)) ==\r
                          0 );\r
 \r
-       /* First, try to find a suitable MR */\r
-       cl_spinlock_acquire( &mem_list->mutex );\r
+       /* Optimistically allocate a tracking structure. */\r
+       p_node = HeapAlloc( g_ibsp.heap, 0, sizeof(struct memory_node) );\r
+       if( !p_node )\r
+       {\r
+               IBSP_ERROR_EXIT(\r
+                       ("AllocateOverlappedBuf:HeapAlloc() failed: %d\n",\r
+                       GetLastError()) );\r
+               *lpErrno = WSAENOBUFS;\r
+               return NULL;\r
+       }\r
 \r
-       node = __lookup_partial_mr( mem_list, access_ctrl, start, len );\r
+       /* First, try to find a suitable MR */\r
+       cl_spinlock_acquire( &s->port->hca->rdma_mem_list.mutex );\r
 \r
-       if( node )\r
+       /* Find the first registered mr that matches the given region. */\r
+       for( p_item = cl_qlist_head( &s->port->hca->rdma_mem_list.list );\r
+               p_item != cl_qlist_end( &s->port->hca->rdma_mem_list.list );\r
+               p_item = cl_qlist_next( p_item ) )\r
        {\r
-               node->refcount++;\r
-               cl_spinlock_release( &mem_list->mutex );\r
-               IBSP_EXIT( IBSP_DBG_MEM );\r
-               return node;\r
-       }\r
+               p_reg = PARENT_STRUCT(p_item, struct memory_reg, item);\r
 \r
-       cl_spinlock_release( &mem_list->mutex );\r
+               if( __check_mr( p_reg, access_ctrl, start, len ) )\r
+               {\r
+                       p_node->p_reg = p_reg;\r
+                       p_node->s = s;\r
+                       cl_qlist_insert_tail( &p_reg->node_list, &p_node->mr_item );\r
+                       cl_qlist_insert_head(\r
+                               &s->mr_list, &p_node->socket_item );\r
+                       cl_spinlock_release( &s->port->hca->rdma_mem_list.mutex );\r
+                       IBSP_EXIT( IBSP_DBG_MEM );\r
+                       return p_node;\r
+               }\r
+       }\r
 \r
        /* No corresponding MR has been found. Create a new one. */\r
-       node = HeapAlloc( g_ibsp.heap, 0, sizeof(struct memory_node) );\r
+       p_reg = HeapAlloc( g_ibsp.heap, 0, sizeof(struct memory_reg) );\r
 \r
-       if( node == NULL )\r
+       if( !p_reg )\r
        {\r
                IBSP_ERROR_EXIT(\r
                        ("AllocateOverlappedBuf:HeapAlloc() failed: %d\n",\r
                        GetLastError()) );\r
+               cl_spinlock_release( &s->port->hca->rdma_mem_list.mutex );\r
+               HeapFree( g_ibsp.heap, 0, p_node );\r
                *lpErrno = WSAENOBUFS;\r
                return NULL;\r
        }\r
 \r
        /* The node is not initialized yet. All the parameters given are\r
         * supposed to be valid so we don't check them. */\r
-       node->refcount = 1;\r
-       node->type.vaddr = start;\r
-       node->type.length = len;\r
-       node->type.access_ctrl = access_ctrl;\r
+       cl_qlist_init( &p_reg->node_list );\r
+       p_reg->type.vaddr = start;\r
+       p_reg->type.length = len;\r
+       p_reg->type.access_ctrl = access_ctrl;\r
 \r
-       IBSP_TRACE2( IBSP_DBG_MEM, ("pinning memory node %p\n", node) );\r
-       status = ib_reg_mem( pd, &node->type, &node->lkey, &node->rkey, &node->mr_handle );\r
+       IBSP_TRACE2( IBSP_DBG_MEM, ("pinning memory node %p\n", p_node) );\r
+       status = ib_reg_mem(\r
+               pd, &p_reg->type, &p_reg->lkey, &p_reg->rkey, &p_reg->mr_handle );\r
 \r
        if( status )\r
        {\r
-#ifdef _DEBUG_\r
-               memset( node, 0x2e, sizeof(struct memory_node) );       /* Poison */\r
-#endif\r
-               if( HeapFree( g_ibsp.heap, 0, node ) == FALSE )\r
-               {\r
-                       IBSP_ERROR(\r
-                               ("BAD: HeapFree failed (%d)\n", GetLastError()) );\r
-               }\r
+               cl_spinlock_release( &s->port->hca->rdma_mem_list.mutex );\r
+               HeapFree( g_ibsp.heap, 0, p_reg );\r
+               HeapFree( g_ibsp.heap, 0, p_node );\r
 \r
-               IBSP_ERROR_EXIT( ("ib_reg_mem failed (%d)\n", status) );\r
+               IBSP_ERROR_EXIT(\r
+                       ("ib_reg_mem returned %s\n", ib_get_err_str(status)) );\r
 \r
                *lpErrno = WSAEFAULT;\r
                return NULL;\r
@@ -173,123 +184,196 @@ ib_register_memory(
 \r
        STAT_INC( mr_num );\r
 \r
+       p_node->p_reg = p_reg;\r
+       p_node->s = s;\r
+\r
        /* Link to the list of nodes. */\r
-       cl_spinlock_acquire( &mem_list->mutex );\r
-       cl_qlist_insert_head( &mem_list->list, &node->item );\r
-       cl_spinlock_release( &mem_list->mutex );\r
+       cl_qlist_insert_head( &s->port->hca->rdma_mem_list.list, &p_reg->item );\r
+       cl_qlist_insert_head( &s->mr_list, &p_node->socket_item );\r
+       cl_qlist_insert_tail( &p_reg->node_list, &p_node->mr_item );\r
+       cl_spinlock_release( &s->port->hca->rdma_mem_list.mutex );\r
 \r
        IBSP_EXIT( IBSP_DBG_MEM );\r
 \r
        *lpErrno = 0;\r
-       return node;\r
+       return p_node;\r
+}\r
+\r
+\r
+static inline int __ibsp_dereg_mem_mr(\r
+       IN                              struct memory_node                      *node )\r
+{\r
+       IBSP_ENTER( IBSP_DBG_MEM );\r
+\r
+       cl_qlist_remove_item( &node->p_reg->node_list, &node->mr_item );\r
+       cl_qlist_remove_item( &node->s->mr_list, &node->socket_item );\r
+\r
+       HeapFree( g_ibsp.heap, 0, node );\r
+\r
+       IBSP_EXIT( IBSP_DBG_MEM );\r
+       return 0;\r
 }\r
 \r
 \r
 /* Deregisters a memory region */\r
 int\r
-ib_deregister_memory(\r
-       IN                              struct mr_list                          *mem_list,\r
+ibsp_dereg_mem(\r
+       IN                              struct ibsp_socket_info         *s,\r
        IN                              struct memory_node                      *node,\r
                OUT                     LPINT                                           lpErrno )\r
 {\r
-#if 0\r
-       ib_api_status_t status;\r
-#endif\r
+       IBSP_ENTER( IBSP_DBG_MEM );\r
+\r
+       cl_spinlock_acquire( &s->port->hca->rdma_mem_list.mutex );\r
+       *lpErrno = __ibsp_dereg_mem_mr( node );\r
+       cl_spinlock_release( &s->port->hca->rdma_mem_list.mutex );\r
+\r
+       IBSP_EXIT( IBSP_DBG_MEM );\r
+       return (*lpErrno? SOCKET_ERROR : 0);\r
+}\r
+\r
+\r
+/*\r
+ * Deregister the remaining memory regions on an HCA. This function should\r
+ * only be called before destroying the PD. In normal case, the list should\r
+ * be empty because the switch should have done it.\r
+ */\r
+void\r
+ibsp_dereg_hca(\r
+       IN                              struct mr_list                          *mem_list )\r
+{\r
+       cl_list_item_t *item;\r
 \r
        IBSP_ENTER( IBSP_DBG_MEM );\r
 \r
        cl_spinlock_acquire( &mem_list->mutex );\r
+       IBSP_TRACE1( IBSP_DBG_MEM,\r
+               ("%d registrations.\n", cl_qlist_count( &mem_list->list )) );\r
 \r
-       if( node->refcount <= 0 )\r
-       {\r
-               cl_spinlock_release( &mem_list->mutex );\r
-               IBSP_ERROR(\r
-                       ("ib_deregister_memory: bad refcount %d)\n", node->refcount) );\r
-               *lpErrno = WSAEINVAL;\r
-               return SOCKET_ERROR;\r
-       }\r
-#if 0\r
-       if( --node->refcount == 0 )\r
+       for( item = cl_qlist_remove_head( &mem_list->list );\r
+               item != cl_qlist_end( &mem_list->list );\r
+               item = cl_qlist_remove_head( &mem_list->list ) )\r
        {\r
-               cl_qlist_remove_item( &mem_list->list, &node->item );\r
+               struct memory_reg *p_reg = PARENT_STRUCT(item, struct memory_reg, item);\r
+               ib_api_status_t status;\r
+\r
+               while( cl_qlist_count( &p_reg->node_list ) )\r
+               {\r
+                       struct memory_node *p_node =\r
+                               PARENT_STRUCT( cl_qlist_head( &p_reg->node_list ),\r
+                               struct memory_node, mr_item );\r
 \r
-               status = ib_dereg_mr( node->mr_handle );\r
+                       __ibsp_dereg_mem_mr( p_node );\r
+               }\r
+\r
+               IBSP_TRACE2( IBSP_DBG_MEM, ("unpinning ,memory reg %p\n", p_reg) );\r
+               status = ib_dereg_mr( p_reg->mr_handle );\r
                if( status )\r
                {\r
-                       IBSP_ERROR( ("ib_dereg_mr failed (%d)\n", status) );\r
+                       IBSP_ERROR(\r
+                               ("ib_dereg_mem returned %s\n", ib_get_err_str( status )) );\r
                }\r
                else\r
                {\r
                        STAT_DEC( mr_num );\r
                }\r
 \r
-#ifdef _DEBUG_\r
-               memset( node, 0x39, sizeof(struct memory_node) );\r
-#endif\r
-               if( HeapFree( g_ibsp.heap, 0, node ) == FALSE )\r
-               {\r
-                       IBSP_ERROR(\r
-                               ("BAD: HeapFree failed (%d)\n", GetLastError()) );\r
-               }\r
+               HeapFree( g_ibsp.heap, 0, p_reg );\r
        }\r
-#else\r
-       /* Never deregister. Cache it. */\r
-       --node->refcount;\r
-#endif\r
 \r
        cl_spinlock_release( &mem_list->mutex );\r
 \r
        IBSP_EXIT( IBSP_DBG_MEM );\r
-\r
-       *lpErrno = 0;\r
-       return 0;\r
 }\r
 \r
 \r
 /* Deregister the remaining memory regions. This function should only \r
- * be called before destroying the PD. In normal case, the list should \r
+ * be called when destroying the socket. In normal case, the list should \r
  * be empty because the switch should have done it. */\r
 void\r
-ib_deregister_all_mr(\r
-       IN                              struct mr_list                          *mem_list )\r
+ibsp_dereg_socket(\r
+       IN                              struct ibsp_socket_info         *s )\r
 {\r
-       cl_list_item_t *item;\r
-\r
        IBSP_ENTER( IBSP_DBG_MEM );\r
 \r
-       cl_spinlock_acquire( &mem_list->mutex );\r
+       if( !s->port )\r
+       {\r
+               CL_ASSERT( !cl_qlist_count( &s->mr_list ) );\r
+               IBSP_EXIT( IBSP_DBG_MEM );\r
+               return;\r
+       }\r
+\r
+       cl_spinlock_acquire( &s->port->hca->rdma_mem_list.mutex );\r
        IBSP_TRACE1( IBSP_DBG_MEM,\r
-               ("%d registrations.\n", cl_qlist_count( &mem_list->list )) );\r
+               ("%d registrations.\n", cl_qlist_count( &s->mr_list )) );\r
 \r
-       while( (item = cl_qlist_remove_head( &mem_list->list )) != cl_qlist_end( &mem_list->list ) )\r
+       while( cl_qlist_count( &s->mr_list ) )\r
        {\r
-               struct memory_node *node = PARENT_STRUCT(item, struct memory_node, item);\r
-               ib_api_status_t status;\r
+               __ibsp_dereg_mem_mr( PARENT_STRUCT( cl_qlist_head( &s->mr_list ),\r
+                       struct memory_node, socket_item) );\r
+       }\r
 \r
-               IBSP_TRACE2( IBSP_DBG_MEM, ("unpinning ,memory node node %p\n", node) );\r
-               status = ib_dereg_mr( node->mr_handle );\r
-               if( status )\r
+       cl_spinlock_release( &s->port->hca->rdma_mem_list.mutex );\r
+\r
+       IBSP_EXIT( IBSP_DBG_MEM );\r
+}\r
+\r
+\r
+/*\r
+ * Loop through all the memory registrations on an HCA and release\r
+ * all that fall within the specified range.\r
+ */\r
+void\r
+ibsp_hca_flush_mr_cache(\r
+       IN                              struct ibsp_hca                         *p_hca,\r
+       IN                              LPVOID                                          lpvAddress,\r
+       IN                              SIZE_T                                          Size )\r
+{\r
+       struct memory_reg       *p_reg;\r
+       cl_list_item_t          *p_item;\r
+       ib_api_status_t         status;\r
+\r
+       IBSP_ENTER( IBSP_DBG_MEM );\r
+\r
+       cl_spinlock_acquire( &p_hca->rdma_mem_list.mutex );\r
+       for( p_item = cl_qlist_head( &p_hca->rdma_mem_list.list );\r
+               p_item != cl_qlist_end( &p_hca->rdma_mem_list.list );\r
+               p_item = cl_qlist_next( p_item ) )\r
+       {\r
+               p_reg = PARENT_STRUCT( p_item, struct memory_reg, item );\r
+\r
+               if( lpvAddress > p_reg->type.vaddr ||\r
+                       ((uintn_t)lpvAddress) + Size <\r
+                       ((uintn_t)(uint64_t)p_reg->type.vaddr) + p_reg->type.length )\r
                {\r
-                       IBSP_ERROR(\r
-                               ("ib_dereg_mem returned %s\n", ib_get_err_str( status )) );\r
+                       continue;\r
                }\r
-               else\r
+\r
+               /* Release all socket's nodes that reference this registration. */\r
+               while( cl_qlist_count( &p_reg->node_list ) )\r
                {\r
-                       STAT_DEC( mr_num );\r
+                       struct memory_node      *p_node =\r
+                               PARENT_STRUCT( cl_qlist_head( &p_reg->node_list ),\r
+                               struct memory_node, mr_item );\r
+\r
+                       __ibsp_dereg_mem_mr( p_node );\r
                }\r
 \r
-#ifdef _DEBUG_\r
-               memset( node, 0x2d, sizeof(struct memory_node) );       /* Poison */\r
-#endif\r
+               /* Move to the previous item so the for loop properly moves forward. */\r
+               p_item = cl_qlist_prev( p_item );\r
+\r
+               cl_qlist_remove_item( &p_hca->rdma_mem_list.list, &p_reg->item );\r
 \r
-               if( HeapFree( g_ibsp.heap, 0, node ) == FALSE )\r
+               status = ib_dereg_mr( p_reg->mr_handle );\r
+               if( status != IB_SUCCESS )\r
                {\r
                        IBSP_ERROR(\r
-                               ("BAD: HeapFree failed (%d)\n", GetLastError()) );\r
+                               ("ib_dereg_mr returned %s\n", ib_get_err_str(status)) );\r
                }\r
-       }\r
 \r
-       cl_spinlock_release( &mem_list->mutex );\r
+               HeapFree( g_ibsp.heap, 0, p_reg );\r
+       }\r
+       cl_spinlock_release( &p_hca->rdma_mem_list.mutex );\r
 \r
        IBSP_EXIT( IBSP_DBG_MEM );\r
 }\r
index a968856..9b033e8 100644 (file)
@@ -185,7 +185,7 @@ pnp_ca_remove(
 \r
        if( hca->pd )\r
        {\r
-               ib_deregister_all_mr( &hca->rdma_mem_list );\r
+               ibsp_dereg_hca( &hca->rdma_mem_list );\r
 \r
                /*\r
                 * No need to wait for PD destruction - CA destruction will block\r
index 23a14d9..9aa2a04 100644 (file)
@@ -1551,7 +1551,7 @@ IBSPRecv(
        for( ds_idx = 0; ds_idx < dwBufferCount; ds_idx++ )\r
        {\r
                /* Get the memory region node */\r
-               node = lookup_partial_mr( &socket_info->buf_mem_list, IB_AC_LOCAL_WRITE,\r
+               node = lookup_partial_mr( socket_info, IB_AC_LOCAL_WRITE,\r
                        lpBuffers[ds_idx].buf, lpBuffers[ds_idx].len );\r
                if( !node )\r
                {\r
@@ -1568,7 +1568,7 @@ IBSPRecv(
                wr->ds_array[ds_idx].vaddr =\r
                        (uint64_t)(void* __ptr64)lpBuffers[ds_idx].buf;\r
                wr->ds_array[ds_idx].length = lpBuffers[ds_idx].len;\r
-               wr->ds_array[ds_idx].lkey = node->lkey;\r
+               wr->ds_array[ds_idx].lkey = node->p_reg->lkey;\r
        }\r
 \r
        /*\r
@@ -1787,7 +1787,7 @@ IBSPSend(
                for( ds_idx = 0; ds_idx < dwBufferCount; ds_idx++ )\r
                {\r
                        /* Get the memory region node */\r
-                       node = lookup_partial_mr( &socket_info->buf_mem_list, 0,        /* READ */\r
+                       node = lookup_partial_mr( socket_info, 0,       /* READ */\r
                                lpBuffers[ds_idx].buf, lpBuffers[ds_idx].len );\r
                        if( !node )\r
                        {\r
@@ -1801,7 +1801,7 @@ IBSPSend(
                                return SOCKET_ERROR;\r
                        }\r
 \r
-                       local_ds[ds_idx].lkey = node->lkey;\r
+                       local_ds[ds_idx].lkey = node->p_reg->lkey;\r
                }\r
        }\r
 \r
index a0ea72b..43a254f 100644 (file)
@@ -238,16 +238,18 @@ prepare_duplicate_socket(
        IN                              DWORD                                           dwProcessId );\r
 \r
 /* ibsp_mem.c */\r
+\r
+\r
 struct memory_node *\r
 lookup_partial_mr(\r
-       IN                              struct mr_list                          *mem_list,\r
+       IN                              struct ibsp_socket_info         *s,\r
        IN                              ib_access_t                                     acl_mask,\r
        IN                              void                                            *start,\r
        IN                              size_t                                          len );\r
 \r
 struct memory_node *\r
-ib_register_memory(\r
-       IN                              struct mr_list                          *mem_list,\r
+ibsp_reg_mem(\r
+       IN                              struct ibsp_socket_info         *s,\r
        IN                              ib_pd_handle_t                          pd,\r
        IN                              void                                            *start,\r
        IN                              size_t                                          len,\r
@@ -255,15 +257,25 @@ ib_register_memory(
                OUT                     LPINT                                           lpErrno );\r
 \r
 int\r
-ib_deregister_memory(\r
-       IN                              struct mr_list                          *mem_list,\r
+ibsp_dereg_mem(\r
+       IN                              struct ibsp_socket_info         *s,\r
        IN                              struct memory_node                      *node,\r
                OUT                     LPINT                                           lpErrno );\r
 \r
 void\r
-ib_deregister_all_mr(\r
+ibsp_dereg_hca(\r
        IN                              struct mr_list                          *mem_list );\r
 \r
+void\r
+ibsp_dereg_socket(\r
+       IN                              struct ibsp_socket_info         *s );\r
+\r
+void\r
+ibsp_hca_flush_mr_cache(\r
+       IN                              struct ibsp_hca                         *p_hca,\r
+       IN                              LPVOID                                          lpvAddress,\r
+       IN                              SIZE_T                                          Size );\r
+\r
 int\r
 ibsp_conn_insert(\r
        IN                              struct ibsp_socket_info         *socket_info );\r
index ceb4ce8..7936050 100644 (file)
@@ -147,30 +147,44 @@ struct disconnect_reason
        }       duplicating;\r
 };\r
 \r
+\r
 /* Internal node describing a registered region. */\r
-struct memory_node\r
+struct memory_reg\r
 {\r
-\r
-       cl_list_item_t item;\r
+       cl_list_item_t  item;\r
+       /*\r
+        * List count serves as reference count.  The memory registration\r
+        * can be released when the list is empty.\r
+        */\r
+       cl_qlist_t              node_list;\r
 \r
 #ifdef _DEBUG_\r
 #define MR_NODE_MAGIC 0x7fba43ce\r
        int magic;\r
 #endif\r
 \r
-       /* Everytime the same region is registered, this counter is\r
-        * increased. When it reaches 0, the memory can be released. */\r
-       int refcount;\r
-\r
        /* Characteristics of that region. */\r
-       ib_mr_create_t type;\r
+       ib_mr_create_t  type;\r
 \r
        /* Memory registration parameters, returned by ib_reg_mem. */\r
-       uint32_t lkey;\r
-       uint32_t rkey;\r
-       ib_mr_handle_t mr_handle;\r
+       uint32_t                lkey;\r
+       uint32_t                rkey;\r
+       ib_mr_handle_t  mr_handle;\r
 };\r
 \r
+\r
+struct memory_node\r
+{\r
+       /* List item to track within a socket structure. */\r
+       cl_list_item_t                  socket_item;\r
+       struct ibsp_socket_info *s;\r
+       /* List item to track within the registration structure. */\r
+       cl_list_item_t                  mr_item;\r
+       struct memory_reg               *p_reg;\r
+};\r
+\r
+\r
+\r
 /* Descriptor given back to WSPRegisterRdmaMemory */\r
 struct rdma_memory_desc\r
 {\r
@@ -288,14 +302,13 @@ struct ibsp_socket_info
        uint8_t                 dup_idx;\r
        atomic32_t              dup_cnt;\r
 \r
-       /* Memory management. \r
-        * From what I saw, the regions registered through IBSPRegisterMemory will \r
-        * only be used by this socket. And there will be only one registration for \r
-        * send buffers and one for receive buffers for the whole life of the socket. \r
-        * Thus these won't be cached. A later optimization would be not to release \r
-        * those regions, but to store them in the HCA structure in case another socket\r
-        * binds the same area again. */\r
-       struct mr_list buf_mem_list;\r
+       /*\r
+        * The switch will register local and RDMA memory for use in RDMA\r
+        * transfers.  All RDMA registrations are cached in the HCA structure,\r
+        * and have memory_node structures referencing them stored here in the\r
+        * socket structures.\r
+        */\r
+       cl_qlist_t              mr_list;\r
 \r
        /* Stuff for socket duplication */\r
        struct\r
index b73f8c8..b5af1ac 100644 (file)
@@ -61,18 +61,14 @@ create_socket_info(
        }\r
 \r
        cl_spinlock_construct( &socket_info->mutex );\r
-       cl_spinlock_construct( &socket_info->buf_mem_list.mutex );\r
        cl_spinlock_construct( &socket_info->send_lock );\r
        cl_spinlock_construct( &socket_info->recv_lock );\r
-       cl_qlist_init( &socket_info->buf_mem_list.list );\r
+       cl_qlist_init( &socket_info->mr_list );\r
        cl_qlist_init( &socket_info->listen.list );\r
 \r
        if( cl_spinlock_init( &socket_info->mutex ) != CL_SUCCESS )\r
                goto err;\r
 \r
-       if( cl_spinlock_init( &socket_info->buf_mem_list.mutex ) != CL_SUCCESS )\r
-               goto err;\r
-\r
        if( cl_spinlock_init( &socket_info->send_lock ) != CL_SUCCESS )\r
                goto err;\r
 \r
@@ -131,13 +127,9 @@ free_socket_info(
                ret = g_ibsp.up_call_table.lpWPUCloseSocketHandle(\r
                        p_socket->switch_socket, &error );\r
                if( ret == SOCKET_ERROR )\r
-               {\r
                        IBSP_ERROR( ("WPUCloseSocketHandle failed: %d\n", error) );\r
-               }\r
                else\r
-               {\r
                        STAT_DEC( wpusocket_num );\r
-               }\r
 \r
                p_socket->switch_socket = INVALID_SOCKET;\r
        }\r
@@ -145,7 +137,6 @@ free_socket_info(
        CL_ASSERT( !p_socket->qp );\r
        CL_ASSERT( !p_socket->conn_item.p_map );\r
 \r
-       cl_spinlock_destroy( &p_socket->buf_mem_list.mutex );\r
        cl_spinlock_destroy( &p_socket->mutex );\r
 \r
        cl_spinlock_destroy( &p_socket->send_lock );\r