ib/cm: poll for CM REQ events
authorLOCALS~1\Temp/report.7.tmp <LOCALS~1\Temp/report.7.tmp@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Wed, 13 Jan 2010 19:34:17 +0000 (19:34 +0000)
committerLOCALS~1\Temp/report.7.tmp <LOCALS~1\Temp/report.7.tmp@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Wed, 13 Jan 2010 19:34:17 +0000 (19:34 +0000)
Replace the callback mechanism for reporting connection
requests with one that requires the user to poll for the
events.  This allows queuing REQs in the CM until the user
is ready to process the events.

Still provide a callback mechanism to notify the user that
REQ events are ready to be retrieved.

This change improves the connection rate for winverbs when
the user retrieves only a small number of requests at a time.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
git-svn-id: svn://openib.tc.cornell.edu/gen1/trunk@2663 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86

core/al/kernel/al_cm.c
core/al/kernel/al_cm_cep.c
core/winverbs/kernel/wv_ep.c
inc/kernel/iba/ib_cm_ifc.h

index 177bb9e..8ba28f1 100644 (file)
@@ -98,37 +98,45 @@ cm_cep_handler(const ib_al_handle_t h_al, const net32_t cid)
 \r
 static void\r
 cm_listen_handler(const ib_al_handle_t h_al, const net32_t cid)\r
+{\r
+       iba_cm_id               *id;\r
+       iba_cm_event    event;\r
+\r
+       id = (iba_cm_id *) kal_cep_get_context(h_al, cid, NULL, NULL);\r
+       memset(&event, 0, sizeof event);\r
+       event.type = iba_cm_req_received;\r
+       id->callback(id, &event);\r
+}\r
+\r
+static NTSTATUS\r
+cm_get_request(iba_cm_id *p_listen_id, iba_cm_id **pp_id, iba_cm_event *p_event)\r
 {\r
        void                            *context;\r
        net32_t                         new_cid;\r
        ib_mad_element_t        *mad;\r
-       iba_cm_id                       *id, *listen_id;\r
-       iba_cm_event            event;\r
+       ib_api_status_t         ib_status;\r
        NTSTATUS                        status;\r
 \r
-       while (al_cep_poll(h_al, cid, &context, &new_cid, &mad) == IB_SUCCESS) {\r
-\r
-               listen_id = (iba_cm_id *) context;\r
+       ib_status = al_cep_poll(gh_al, p_listen_id->cid, &context, &new_cid, &mad);\r
+       if (ib_status != IB_SUCCESS) {\r
+               return STATUS_NO_MORE_ENTRIES;\r
+       }\r
 \r
-               id = cm_alloc_id(listen_id->callback, listen_id);\r
-               if (id == NULL) {\r
-                       kal_cep_destroy(h_al, new_cid, STATUS_NO_MORE_ENTRIES);\r
-                       ib_put_mad(mad);\r
-                       continue;\r
-               }\r
+       *pp_id = cm_alloc_id(p_listen_id->callback, p_listen_id);\r
+       if (*pp_id == NULL) {\r
+               kal_cep_destroy(gh_al, new_cid, STATUS_NO_MORE_ENTRIES);\r
+               status = STATUS_NO_MEMORY;\r
+               goto out;\r
+       }\r
 \r
-               kal_cep_config(h_al, new_cid, cm_cep_handler, id, cm_destroy_handler);\r
-               id->cid = new_cid;\r
+       kal_cep_config(gh_al, new_cid, cm_cep_handler, *pp_id, cm_destroy_handler);\r
+       (*pp_id)->cid = new_cid;\r
+       kal_cep_format_event(gh_al, new_cid, mad, p_event);\r
+       status = STATUS_SUCCESS;\r
 \r
-               kal_cep_format_event(h_al, id->cid, mad, &event);\r
-               status = id->callback(id, &event);\r
-               if (!NT_SUCCESS(status)) {\r
-                       kal_cep_config(h_al, new_cid, NULL, NULL, NULL);\r
-                       kal_cep_destroy(h_al, id->cid, status);\r
-                       cm_free_id(id);\r
-               }\r
-               ib_put_mad(mad);\r
-       }\r
+out:\r
+       ib_put_mad(mad);\r
+       return status;\r
 }\r
 \r
 static NTSTATUS\r
@@ -367,6 +375,7 @@ void cm_get_interface(iba_cm_interface *p_ifc)
        p_ifc->create_id = cm_create_id;\r
        p_ifc->destroy_id = cm_destroy_id;\r
        p_ifc->listen = cm_listen;\r
+       p_ifc->get_request = cm_get_request;\r
        p_ifc->send_req = cm_send_req;\r
        p_ifc->send_rep = cm_send_rep;\r
        p_ifc->send_rtu = cm_send_rtu;\r
index 86c5412..9a1d138 100644 (file)
@@ -5978,38 +5978,32 @@ __format_event_req(kcep_t *p_cep, mad_cm_req_t *p_mad, iba_cm_req_event *p_req)
        p_req->remote_ca_guid = p_cep->remote_ca_guid;\r
        p_req->pkey_index = p_cep->av[0].pkey_index;\r
        p_req->port_num = p_cep->av[0].attr.port_num;\r
-       p_req->req.service_id = p_mad->sid;\r
-\r
-       p_req->req.qpn = conn_req_get_lcl_qpn(p_mad);\r
-       p_req->req.qp_type = conn_req_get_qp_type(p_mad);\r
-       p_req->req.starting_psn = conn_req_get_starting_psn(p_mad);\r
-\r
-       p_req->req.p_pdata = p_mad->pdata;\r
-       p_req->req.pdata_len = IB_REQ_PDATA_SIZE;\r
-\r
-       p_req->req.max_cm_retries = conn_req_get_max_cm_retries(p_mad);\r
-       p_req->req.resp_res = conn_req_get_init_depth(p_mad);\r
-       p_req->req.init_depth = conn_req_get_resp_res(p_mad);\r
-       p_req->req.remote_resp_timeout = conn_req_get_resp_timeout(p_mad);\r
-       p_req->req.flow_ctrl = (uint8_t) conn_req_get_flow_ctrl(p_mad);\r
-       p_req->req.local_resp_timeout = conn_req_get_lcl_resp_timeout(p_mad);\r
-       p_req->req.rnr_retry_cnt = conn_req_get_rnr_retry_cnt(p_mad);\r
-       p_req->req.retry_cnt = conn_req_get_retry_cnt(p_mad);\r
-       p_req->req.srq = 0; // TODO: fix mad_cm_req_t\r
-\r
-       // We can re-use the MAD buffer if we're careful to read out the data\r
-       // that we need before it's overwritten.\r
-       p_req->req.p_primary_path = (ib_path_rec_t *) p_mad;\r
-       __format_path(p_req->req.p_primary_path, &p_mad->primary_path,\r
+       p_req->service_id = p_mad->sid;\r
+\r
+       p_req->qpn = conn_req_get_lcl_qpn(p_mad);\r
+       p_req->qp_type = conn_req_get_qp_type(p_mad);\r
+       p_req->starting_psn = conn_req_get_starting_psn(p_mad);\r
+\r
+       cl_memcpy(p_req->pdata, p_mad->pdata, IB_REQ_PDATA_SIZE);\r
+\r
+       p_req->max_cm_retries = conn_req_get_max_cm_retries(p_mad);\r
+       p_req->resp_res = conn_req_get_init_depth(p_mad);\r
+       p_req->init_depth = conn_req_get_resp_res(p_mad);\r
+       p_req->remote_resp_timeout = conn_req_get_resp_timeout(p_mad);\r
+       p_req->flow_ctrl = (uint8_t) conn_req_get_flow_ctrl(p_mad);\r
+       p_req->local_resp_timeout = conn_req_get_lcl_resp_timeout(p_mad);\r
+       p_req->rnr_retry_cnt = conn_req_get_rnr_retry_cnt(p_mad);\r
+       p_req->retry_cnt = conn_req_get_retry_cnt(p_mad);\r
+       p_req->srq = 0; // TODO: fix mad_cm_req_t\r
+\r
+       __format_path(&p_req->primary_path, &p_mad->primary_path,\r
                                  p_mad->pkey, conn_req_get_mtu(p_mad));\r
 \r
        if (p_mad->alternate_path.remote_lid != 0) {\r
-               p_req->req.p_alt_path = p_req->req.p_primary_path + 1;\r
-               __format_path(p_req->req.p_alt_path, &p_mad->alternate_path,\r
-                                         p_req->req.p_primary_path->pkey,\r
-                                         p_req->req.p_primary_path->mtu);\r
+               __format_path(&p_req->alt_path, &p_mad->alternate_path,\r
+                                         p_req->primary_path.pkey, p_req->primary_path.mtu);\r
        } else {\r
-               p_req->req.p_alt_path = NULL;\r
+               cl_memclr(&p_req->alt_path, sizeof p_req->alt_path);\r
        }\r
 }\r
 \r
@@ -6647,7 +6641,6 @@ kal_cep_get_context(
        AL_PRINT( TRACE_LEVEL_VERBOSE, AL_DBG_CM, ("[ CID = %d\n", cid) );\r
 \r
        CL_ASSERT( h_al );\r
-       CL_ASSERT( pfn_addref );\r
 \r
        KeAcquireInStackQueuedSpinLock( &gp_cep_mgr->lock, &hdl );\r
        p_cep = __lookup_cep( h_al, cid );\r
@@ -6658,7 +6651,7 @@ kal_cep_get_context(
                goto out;\r
        }\r
 \r
-       if( p_cep->pfn_cb != pfn_cb )\r
+       if( pfn_cb && p_cep->pfn_cb != pfn_cb )\r
        {\r
                AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,\r
                        ("CEP callback mismatch for cid %d, h_al %p\n", cid, h_al ));\r
@@ -6666,7 +6659,7 @@ kal_cep_get_context(
        }\r
 \r
        context = p_cep->context;\r
-       if( context != NULL )\r
+       if( pfn_addref && context != NULL )\r
        {\r
                pfn_addref( context );\r
        }\r
index 3d5c6ce..3ad91ac 100644 (file)
@@ -1110,60 +1110,75 @@ complete:
        WdfRequestComplete(Request, status);\r
 }\r
 \r
-static NTSTATUS WvEpIbListenHandler(iba_cm_id *pId, iba_cm_event *pEvent)\r
+static void WvEpGetIbRequest(WV_ENDPOINT *pListen)\r
 {\r
-       WV_ENDPOINT             *listen, *ep;\r
+       WV_ENDPOINT             *ep;\r
        WDFREQUEST              request;\r
        NTSTATUS                status;\r
        IB_CMA_HEADER   *hdr;\r
+       iba_cm_id               *id;\r
+       iba_cm_event    event;\r
 \r
-       listen = ((iba_cm_id *) pId->context)->context;\r
+       WdfObjectAcquireLock(pListen->Queue);\r
+       while (1) {\r
+               status = WdfIoQueueRetrieveNextRequest(pListen->Queue, &request);\r
+               if (!NT_SUCCESS(status)) {\r
+                       break;\r
+               }\r
 \r
-       WdfObjectAcquireLock(listen->Queue);\r
-       status = WdfIoQueueRetrieveNextRequest(listen->Queue, &request);\r
-       if (!NT_SUCCESS(status)) {\r
-               goto release;\r
+               status = IbCmInterface.CM.get_request(pListen->pIbCmId, &id, &event);\r
+               if (!NT_SUCCESS(status)) {\r
+                       WdfRequestRequeue(request);\r
+                       break;\r
+               }\r
+\r
+               ASSERT(!IsListEmpty(&pListen->Entry));\r
+               ep = CONTAINING_RECORD(RemoveHeadList(&pListen->Entry), WV_ENDPOINT, Entry);\r
+               ep->pIbCmId = id;\r
+               id->callback = WvEpIbCmHandler;\r
+               id->context = ep;\r
+\r
+               hdr = (IB_CMA_HEADER *) event.data.req.pdata;\r
+               if ((hdr->IpVersion >> 4) == 4) {\r
+                       ep->Attributes.LocalAddress.SockAddr.In.SinFamily = WV_AF_INET;\r
+                       ep->Attributes.LocalAddress.SockAddr.In.SinAddr = hdr->DstAddress.Ip4.Address;\r
+                       ep->Attributes.PeerAddress.SockAddr.In.SinFamily = WV_AF_INET;\r
+                       ep->Attributes.PeerAddress.SockAddr.In.SinAddr = hdr->SrcAddress.Ip4.Address;\r
+               } else {\r
+                       ep->Attributes.LocalAddress.SockAddr.In6.Sin6Family = WV_AF_INET6; \r
+                       RtlCopyMemory(ep->Attributes.LocalAddress.SockAddr.In6.Sin6Addr,\r
+                                                 hdr->DstAddress.Ip6Address, 16);\r
+                       ep->Attributes.PeerAddress.SockAddr.In6.Sin6Family = WV_AF_INET6;\r
+                       RtlCopyMemory(ep->Attributes.PeerAddress.SockAddr.In6.Sin6Addr,\r
+                                                 hdr->SrcAddress.Ip6Address, 16);\r
+               }\r
+               ep->Attributes.Device.DeviceGuid = event.data.req.local_ca_guid;\r
+               ep->Attributes.Device.Pkey = event.data.req.primary_path.pkey;\r
+               ep->Attributes.Device.PortNumber = event.data.req.port_num;\r
+               ep->Attributes.Param.Connect.ResponderResources = event.data.req.resp_res;\r
+               ep->Attributes.Param.Connect.InitiatorDepth = event.data.req.init_depth;\r
+               ep->Attributes.Param.Connect.RetryCount = event.data.req.retry_cnt;\r
+               ep->Attributes.Param.Connect.RnrRetryCount = event.data.req.rnr_retry_cnt;\r
+               ep->Attributes.Param.Connect.DataLength = sizeof(ep->Attributes.Param.Connect.Data);\r
+               RtlCopyMemory(ep->Attributes.Param.Connect.Data, hdr + 1,\r
+                                         sizeof(ep->Attributes.Param.Connect.Data));\r
+               ep->Route = event.data.req.primary_path;\r
+\r
+               ep->State = WvEpPassiveConnect;\r
+               WvEpPut(ep);\r
+\r
+               WdfRequestComplete(request, STATUS_SUCCESS);\r
        }\r
+       WdfObjectReleaseLock(pListen->Queue);\r
+}\r
 \r
-       ASSERT(!IsListEmpty(&listen->Entry));\r
-       ep = CONTAINING_RECORD(RemoveHeadList(&listen->Entry), WV_ENDPOINT, Entry);\r
-       ep->pIbCmId = pId;\r
-       pId->callback = WvEpIbCmHandler;\r
-       pId->context = ep;\r
+static NTSTATUS WvEpIbListenHandler(iba_cm_id *pId, iba_cm_event *pEvent)\r
+{\r
+       WV_ENDPOINT             *listen;\r
 \r
-       hdr = pEvent->data.req.req.p_pdata;\r
-       if ((hdr->IpVersion >> 4) == 4) {\r
-               ep->Attributes.LocalAddress.SockAddr.In.SinFamily = WV_AF_INET;\r
-               ep->Attributes.LocalAddress.SockAddr.In.SinAddr = hdr->DstAddress.Ip4.Address;\r
-               ep->Attributes.PeerAddress.SockAddr.In.SinFamily = WV_AF_INET;\r
-               ep->Attributes.PeerAddress.SockAddr.In.SinAddr = hdr->SrcAddress.Ip4.Address;\r
-       } else {\r
-               ep->Attributes.LocalAddress.SockAddr.In6.Sin6Family = WV_AF_INET6; \r
-               RtlCopyMemory(ep->Attributes.LocalAddress.SockAddr.In6.Sin6Addr,\r
-                                         hdr->DstAddress.Ip6Address, 16);\r
-               ep->Attributes.PeerAddress.SockAddr.In6.Sin6Family = WV_AF_INET6;\r
-               RtlCopyMemory(ep->Attributes.PeerAddress.SockAddr.In6.Sin6Addr,\r
-                                         hdr->SrcAddress.Ip6Address, 16);\r
-       }\r
-       ep->Attributes.Device.DeviceGuid = pEvent->data.req.local_ca_guid;\r
-       ep->Attributes.Device.Pkey = pEvent->data.req.req.p_primary_path->pkey;\r
-       ep->Attributes.Device.PortNumber = pEvent->data.req.port_num;\r
-       ep->Attributes.Param.Connect.ResponderResources = pEvent->data.req.req.resp_res;\r
-       ep->Attributes.Param.Connect.InitiatorDepth = pEvent->data.req.req.init_depth;\r
-       ep->Attributes.Param.Connect.RetryCount = pEvent->data.req.req.retry_cnt;\r
-       ep->Attributes.Param.Connect.RnrRetryCount = pEvent->data.req.req.rnr_retry_cnt;\r
-       ep->Attributes.Param.Connect.DataLength = sizeof(ep->Attributes.Param.Connect.Data);\r
-       RtlCopyMemory(ep->Attributes.Param.Connect.Data, hdr + 1,\r
-                                 sizeof(ep->Attributes.Param.Connect.Data));\r
-       ep->Route = *pEvent->data.req.req.p_primary_path;\r
-\r
-       ep->State = WvEpPassiveConnect;\r
-       WvEpPut(ep);\r
-\r
-       WdfRequestComplete(request, STATUS_SUCCESS);\r
-release:\r
-       WdfObjectReleaseLock(listen->Queue);\r
-       return status;\r
+       listen = pId->context;\r
+       WvEpGetIbRequest(listen);\r
+       return STATUS_SUCCESS;\r
 }\r
 \r
 void WvEpListen(WV_PROVIDER *pProvider, WDFREQUEST Request)\r
@@ -1235,24 +1250,24 @@ void WvEpGetRequest(WV_PROVIDER *pProvider, WDFREQUEST Request)
        status = WdfRequestRetrieveInputBuffer(Request, sizeof(WV_IO_EP_GET_REQUEST),\r
                                                                                   &req, NULL);\r
        if (!NT_SUCCESS(status)) {\r
-               goto complete;\r
+               goto err1;\r
        }\r
 \r
        listen = WvEpAcquire(pProvider, req->Id);\r
        if (listen == NULL) {\r
                status = STATUS_NOT_FOUND;\r
-               goto complete;\r
+               goto err1;\r
        }\r
 \r
        if (listen->State != WvEpListening) {\r
                status = STATUS_NOT_SUPPORTED;\r
-               goto release1;\r
+               goto err2;\r
        }\r
 \r
        ep = WvEpAcquire(pProvider, req->EpId);\r
        if (ep == NULL) {\r
                status = STATUS_NOT_FOUND;\r
-               goto release1;\r
+               goto err2;\r
        }\r
 \r
        WdfObjectAcquireLock(ep->Queue);\r
@@ -1262,9 +1277,8 @@ void WvEpGetRequest(WV_PROVIDER *pProvider, WDFREQUEST Request)
                status = STATUS_CONNECTION_IN_USE;\r
        }\r
        WdfObjectReleaseLock(ep->Queue);\r
-\r
        if (!NT_SUCCESS(status)) {\r
-               goto release2;\r
+               goto err3;\r
        }\r
 \r
        WdfObjectAcquireLock(listen->Queue);\r
@@ -1274,15 +1288,21 @@ void WvEpGetRequest(WV_PROVIDER *pProvider, WDFREQUEST Request)
                WvEpGet(ep);\r
        }\r
        WdfObjectReleaseLock(listen->Queue);\r
+       if (!NT_SUCCESS(status)) {\r
+               goto err3;\r
+       }\r
 \r
-release2:\r
        WvEpRelease(ep);\r
-release1:\r
+       WvEpGetIbRequest(listen);\r
        WvEpRelease(listen);\r
-complete:\r
-       if (!NT_SUCCESS(status)) {\r
-               WdfRequestComplete(Request, status);\r
-       }\r
+       return;\r
+\r
+err3:\r
+       WvEpRelease(ep);\r
+err2:\r
+       WvEpRelease(listen);\r
+err1:\r
+       WdfRequestComplete(Request, status);\r
 }\r
 \r
 void WvEpLookup(WV_PROVIDER *pProvider, WDFREQUEST Request)\r
index 0949482..e895406 100644 (file)
@@ -73,7 +73,27 @@ typedef struct _iba_cm_req
 \r
 typedef struct _iba_cm_req_event\r
 {\r
-       iba_cm_req                                      req;\r
+       ib_net64_t                                      service_id;\r
+\r
+       ib_path_rec_t                           primary_path;\r
+       ib_path_rec_t                           alt_path;\r
+\r
+       net32_t                                         qpn;\r
+       ib_qp_type_t                            qp_type;\r
+       net32_t                                         starting_psn;\r
+\r
+       uint8_t                                         pdata[IB_REQ_PDATA_SIZE];\r
+\r
+       uint8_t                                         max_cm_retries;\r
+       uint8_t                                         resp_res;\r
+       uint8_t                                         init_depth;\r
+       uint8_t                                         remote_resp_timeout;\r
+       uint8_t                                         flow_ctrl;\r
+       uint8_t                                         local_resp_timeout;\r
+       uint8_t                                         rnr_retry_cnt;\r
+       uint8_t                                         retry_cnt;\r
+       uint8_t                                         srq;\r
+\r
        net64_t                                         local_ca_guid;\r
        net64_t                                         remote_ca_guid;\r
        uint16_t                                        pkey_index;\r
@@ -239,6 +259,8 @@ typedef struct _iba_cm_interface
 \r
        NTSTATUS                (*listen)(iba_cm_id *p_id, net64_t service_id, void *p_compare_buf,\r
                                                          uint8_t compare_len, uint8_t compare_offset);\r
+       NTSTATUS                (*get_request)(iba_cm_id *p_listen_id, iba_cm_id **pp_id,\r
+                                                                  iba_cm_event *p_event);\r
 \r
        NTSTATUS                (*send_req)(iba_cm_id *p_id, iba_cm_req *p_req);\r
        NTSTATUS                (*send_rep)(iba_cm_id *p_id, iba_cm_rep *p_rep);\r
@@ -282,9 +304,9 @@ static inline UINT8 IbaCmVersionMinor(USHORT Version)
        return (UINT8) Version;\r
 }\r
 \r
-// {EACC1466-BB2D-4478-B5BE-40EDF7EE08AB}\r
-DEFINE_GUID(GUID_INFINIBAND_INTERFACE_CM, 0xeacc1466, 0xbb2d, 0x4478,\r
-                       0xb5, 0xbe, 0x40, 0xed, 0xf7, 0xee, 0x8, 0xab);\r
+// {6A11D060-8957-49e6-BE2A-01EDF1BD22B3}\r
+DEFINE_GUID(GUID_INFINIBAND_INTERFACE_CM, 0x6a11d060, 0x8957, 0x49e6,\r
+                       0xbe, 0x2a, 0x1, 0xed, 0xf1, 0xbd, 0x22, 0xb3);\r
 \r
 typedef struct _INFINIBAND_INTERFACE_CM\r
 {\r