winverbs: fix crash accessing freed memory from async thread
authorshefty <shefty@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Wed, 23 Sep 2009 21:46:19 +0000 (21:46 +0000)
committershefty <shefty@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Wed, 23 Sep 2009 21:46:19 +0000 (21:46 +0000)
If an application exits while asynchronous accept processing is queued,
it's possible for the async processing to access the IbCmId after it has
been freed.  A similar problem to this was fixed that dealt with accessing
the verbs QP handle.

A simpler, more generic solution to this problem is to handle application
exit in the same manner as device removal, and lock the winverb provider
lookup lists with exclusive access.  Asynchronous operations that are in
process will run to completion, and future operations will be blocked until
the provider cleanup thread has completed.  Once they run, they will fail
to acquire a reference on the desired object, which should result in a
graceful failure.

This avoids more complicated locking to use handles belonging to the lower
level code.  If a reference on an object can be acquired, the handle will
be available for use until the reference is released.  To handle IB CM
callbacks, additional state checking is required to avoid processing
CM events when we're trying to destroy the endpoint.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
git-svn-id: svn://openib.tc.cornell.edu/gen1/trunk@2462 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86

core/winverbs/kernel/wv_ep.c
core/winverbs/kernel/wv_provider.c
core/winverbs/kernel/wv_qp.c

index 9a3350c..2fbb5b3 100644 (file)
@@ -57,7 +57,7 @@ WV_ENDPOINT *WvEpAcquire(WV_PROVIDER *pProvider, UINT64 Id)
        KeAcquireGuardedMutex(&pProvider->Lock);\r
        WvProviderDisableRemove(pProvider);\r
        ep = IndexListAt(&pProvider->EpIndex, (SIZE_T) Id);\r
-       if (ep != NULL) {\r
+       if (ep != NULL && ep->State != WvEpDestroying) {\r
                WvEpGet(ep);\r
        } else {\r
                ep = NULL;\r
@@ -185,14 +185,14 @@ void WvEpFree(WV_ENDPOINT *pEndpoint)
        pEndpoint->State = WvEpDestroying;\r
        WdfObjectReleaseLock(pEndpoint->Queue);\r
 \r
-       if (pEndpoint->pIbCmId != NULL) {\r
-               IbCmInterface.CM.destroy_id(pEndpoint->pIbCmId);\r
-       }\r
-\r
        if (InterlockedDecrement(&pEndpoint->Ref) > 0) {\r
                KeWaitForSingleObject(&pEndpoint->Event, Executive, KernelMode, FALSE, NULL);\r
        }\r
 \r
+       if (pEndpoint->pIbCmId != NULL) {\r
+               IbCmInterface.CM.destroy_id(pEndpoint->pIbCmId);\r
+       }\r
+\r
        WdfIoQueuePurgeSynchronously(pEndpoint->Queue);\r
        WdfObjectDelete(pEndpoint->Queue);\r
        ExFreePoolWithTag(pEndpoint, 'pevw');\r
@@ -373,10 +373,6 @@ static void WvEpSaveReject(WV_ENDPOINT *pEndpoint, iba_cm_rej_event *pReject)
        pEndpoint->Attributes.Param.Connect.DataLength = len;\r
 }\r
 \r
-/*\r
- * The QP transition to error may be done from an async worker thread.\r
- * Synchronize against application exit.\r
- */\r
 static NTSTATUS WvEpModifyQpErr(WV_QUEUE_PAIR *pQp,\r
                                                                UINT8 *pVerbsData, UINT32 VerbsSize)\r
 {\r
@@ -384,12 +380,6 @@ static NTSTATUS WvEpModifyQpErr(WV_QUEUE_PAIR *pQp,
        ib_api_status_t         ib_status;\r
        NTSTATUS                        status;\r
 \r
-       KeAcquireGuardedMutex(&pQp->Lock);\r
-       if (pQp->hVerbsQp == NULL) {\r
-               status = STATUS_NOT_FOUND;\r
-               goto out;\r
-       }\r
-\r
        attr.req_state = IB_QPS_ERROR;\r
        ib_status = pQp->pVerbs->ndi_modify_qp(pQp->hVerbsQp, &attr, NULL,\r
                                                                                   VerbsSize, pVerbsData);\r
@@ -399,8 +389,6 @@ static NTSTATUS WvEpModifyQpErr(WV_QUEUE_PAIR *pQp,
                status = STATUS_UNSUCCESSFUL;\r
        }\r
 \r
-out:\r
-       KeReleaseGuardedMutex(&pQp->Lock);\r
        return status;\r
 }\r
 \r
@@ -505,10 +493,19 @@ static NTSTATUS WvEpIbCmHandler(iba_cm_id *pId, iba_cm_event *pEvent)
        ep = pId->context;\r
        switch (pEvent->type) {\r
        case iba_cm_req_error:\r
+               WdfObjectAcquireLock(ep->Queue);\r
+               if (ep->State == WvEpActiveConnect) {\r
+                       ep->State = WvEpDisconnected;\r
+                       WvCompleteRequests(ep->Queue, STATUS_IO_TIMEOUT);\r
+               }\r
+               WdfObjectReleaseLock(ep->Queue);\r
+               break;\r
        case iba_cm_rep_error:\r
                WdfObjectAcquireLock(ep->Queue);\r
-               ep->State = WvEpDisconnected;\r
-               WvCompleteRequests(ep->Queue, STATUS_IO_TIMEOUT);\r
+               if (ep->State == WvEpPassiveConnect) {\r
+                       ep->State = WvEpDisconnected;\r
+                       WvCompleteRequests(ep->Queue, STATUS_IO_TIMEOUT);\r
+               }\r
                WdfObjectReleaseLock(ep->Queue);\r
                break;\r
        case iba_cm_dreq_error:\r
@@ -557,8 +554,10 @@ static NTSTATUS WvEpIbCmHandler(iba_cm_id *pId, iba_cm_event *pEvent)
                break;\r
        default:\r
                WdfObjectAcquireLock(ep->Queue);\r
-               ep->State = WvEpDisconnected;\r
-               WvCompleteRequests(ep->Queue, STATUS_NOT_IMPLEMENTED);\r
+               if (ep->State != WvEpDestroying) {\r
+                       ep->State = WvEpDisconnected;\r
+                       WvCompleteRequests(ep->Queue, STATUS_NOT_IMPLEMENTED);\r
+               }\r
                WdfObjectReleaseLock(ep->Queue);\r
                break;\r
        }\r
index 2998afe..d8b83f9 100644 (file)
@@ -53,6 +53,36 @@ void WvProviderPut(WV_PROVIDER *pProvider)
        }\r
 }\r
 \r
+// See comment above WvProviderRemoveHandler.\r
+static void WvProviderLockRemove(WV_PROVIDER *pProvider)\r
+{\r
+       KeAcquireGuardedMutex(&pProvider->Lock);\r
+       pProvider->Exclusive++;\r
+       KeClearEvent(&pProvider->SharedEvent);\r
+       while (pProvider->Active > 0) {\r
+               KeReleaseGuardedMutex(&pProvider->Lock);\r
+               KeWaitForSingleObject(&pProvider->ExclusiveEvent, Executive, KernelMode,\r
+                                                         FALSE, NULL);\r
+               KeAcquireGuardedMutex(&pProvider->Lock);\r
+       }\r
+       pProvider->Active++;\r
+       KeReleaseGuardedMutex(&pProvider->Lock);\r
+}\r
+\r
+// See comment above WvProviderRemoveHandler.\r
+static void WvProviderUnlockRemove(WV_PROVIDER *pProvider)\r
+{\r
+       KeAcquireGuardedMutex(&pProvider->Lock);\r
+       pProvider->Exclusive--;\r
+       pProvider->Active--;\r
+       if (pProvider->Exclusive > 0) {\r
+               KeSetEvent(&pProvider->ExclusiveEvent, 0, FALSE);\r
+       } else if (pProvider->Pending > 0) {\r
+               KeSetEvent(&pProvider->SharedEvent, 0, FALSE);\r
+       }\r
+       KeReleaseGuardedMutex(&pProvider->Lock);\r
+}\r
+\r
 NTSTATUS WvProviderInit(WDFDEVICE Device, WV_PROVIDER *pProvider)\r
 {\r
        NTSTATUS status;\r
@@ -83,6 +113,16 @@ NTSTATUS WvProviderInit(WDFDEVICE Device, WV_PROVIDER *pProvider)
        return STATUS_SUCCESS;\r
 }\r
 \r
+/*\r
+ * We could be processing asynchronous requests or have them queued\r
+ * when cleaning up.  To synchronize with async request processing,\r
+ * acquire the provider lock with exclusive access until we're done\r
+ * destroying all resoureces.  This will allow active requests to\r
+ * complete their processing, but prevent queued requests from\r
+ * running until cleanup is done.  At that point, queue requests will\r
+ * be unable to acquire any winverbs resources.\r
+ */\r
+\r
 void WvProviderCleanup(WV_PROVIDER *pProvider)\r
 {\r
        WV_DEVICE                               *dev;\r
@@ -95,6 +135,7 @@ void WvProviderCleanup(WV_PROVIDER *pProvider)
        WV_ENDPOINT                             *ep;\r
        SIZE_T                                  i;\r
 \r
+       WvProviderLockRemove(pProvider);\r
        IndexListForEach(&pProvider->EpIndex, i) {\r
                ep = (WV_ENDPOINT *) IndexListAt(&pProvider->EpIndex, i);\r
                if (ep->State == WvEpListening) {\r
@@ -131,6 +172,7 @@ void WvProviderCleanup(WV_PROVIDER *pProvider)
        while ((dev = IndexListRemoveHead(&pProvider->DevIndex)) != NULL) {\r
                WvDeviceFree(dev);\r
        }\r
+       WvProviderUnlockRemove(pProvider);\r
 \r
        if (InterlockedDecrement(&pProvider->Ref) > 0) {\r
                KeWaitForSingleObject(&pProvider->Event, Executive, KernelMode, FALSE, NULL);\r
@@ -147,36 +189,6 @@ void WvProviderCleanup(WV_PROVIDER *pProvider)
        WorkQueueDestroy(&pProvider->WorkQueue);\r
 }\r
 \r
-// See comment above WvProviderRemoveHandler.\r
-static void WvProviderLockRemove(WV_PROVIDER *pProvider)\r
-{\r
-       KeAcquireGuardedMutex(&pProvider->Lock);\r
-       pProvider->Exclusive++;\r
-       KeClearEvent(&pProvider->SharedEvent);\r
-       while (pProvider->Active > 0) {\r
-               KeReleaseGuardedMutex(&pProvider->Lock);\r
-               KeWaitForSingleObject(&pProvider->ExclusiveEvent, Executive, KernelMode,\r
-                                                         FALSE, NULL);\r
-               KeAcquireGuardedMutex(&pProvider->Lock);\r
-       }\r
-       pProvider->Active++;\r
-       KeReleaseGuardedMutex(&pProvider->Lock);\r
-}\r
-\r
-// See comment above WvProviderRemoveHandler.\r
-static void WvProviderUnlockRemove(WV_PROVIDER *pProvider)\r
-{\r
-       KeAcquireGuardedMutex(&pProvider->Lock);\r
-       pProvider->Exclusive--;\r
-       pProvider->Active--;\r
-       if (pProvider->Exclusive > 0) {\r
-               KeSetEvent(&pProvider->ExclusiveEvent, 0, FALSE);\r
-       } else if (pProvider->Pending > 0) {\r
-               KeSetEvent(&pProvider->SharedEvent, 0, FALSE);\r
-       }\r
-       KeReleaseGuardedMutex(&pProvider->Lock);\r
-}\r
-\r
 /*\r
  * Must hold pProvider->Lock.  Function may release and re-acquire.\r
  * See comment above WvProviderRemoveHandler.\r
index 3f64bd5..8ad87e5 100644 (file)
@@ -519,20 +519,10 @@ out:
        WdfRequestComplete(Request, status);\r
 }\r
 \r
-/*\r
- * Synchronize freeing the QP due to application exit with asynchronous\r
- * disconnect processing transitioning the QP into the error state.\r
- */\r
 void WvQpFree(WV_QUEUE_PAIR *pQp)\r
 {\r
        WV_MULTICAST            *mc;\r
        LIST_ENTRY                      *entry;\r
-       ib_qp_handle_t          hqp;\r
-\r
-       KeAcquireGuardedMutex(&pQp->Lock);\r
-       hqp = pQp->hVerbsQp;\r
-       pQp->hVerbsQp = NULL;\r
-       KeReleaseGuardedMutex(&pQp->Lock);\r
 \r
        while ((entry = RemoveHeadList(&pQp->McList)) != &pQp->McList) {\r
                mc = CONTAINING_RECORD(entry, WV_MULTICAST, Entry);\r
@@ -546,8 +536,8 @@ void WvQpFree(WV_QUEUE_PAIR *pQp)
                KeWaitForSingleObject(&pQp->Event, Executive, KernelMode, FALSE, NULL);\r
        }\r
 \r
-       if (hqp != NULL) {\r
-               pQp->pVerbs->destroy_qp(hqp, 0);\r
+       if (pQp->hVerbsQp != NULL) {\r
+               pQp->pVerbs->destroy_qp(pQp->hVerbsQp, 0);\r
        }\r
 \r
        if (pQp->pSrq != NULL) {\r