[WSD] Add IBWSD_POLL environment variable to allow provider to poll for
[mirror/winof/.git] / ulp / wsd / user / ibsp_iblow.c
1 /*\r
2  * Copyright (c) 2005 SilverStorm Technologies.  All rights reserved.\r
3  *\r
4  * This software is available to you under the OpenIB.org BSD license\r
5  * below:\r
6  *\r
7  *     Redistribution and use in source and binary forms, with or\r
8  *     without modification, are permitted provided that the following\r
9  *     conditions are met:\r
10  *\r
11  *      - Redistributions of source code must retain the above\r
12  *        copyright notice, this list of conditions and the following\r
13  *        disclaimer.\r
14  *\r
15  *      - Redistributions in binary form must reproduce the above\r
16  *        copyright notice, this list of conditions and the following\r
17  *        disclaimer in the documentation and/or other materials\r
18  *        provided with the distribution.\r
19  *\r
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
21  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
22  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\r
23  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
24  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
25  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
26  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
27  * SOFTWARE.\r
28  *\r
29  * $Id$\r
30  */\r
31 \r
32 #include "ibspdll.h"\r
33 \r
34 \r
35 typedef struct _io_comp_info\r
36 {\r
37         struct ibsp_socket_info *p_socket;\r
38         LPWSAOVERLAPPED                 p_ov;\r
39 \r
40 } io_comp_info_t;\r
41 \r
42 \r
43 /* Work queue entry completion routine. */\r
44 static void\r
45 complete_wq(\r
46         IN              const   ib_wc_t                                         *wc,\r
47                 OUT                     io_comp_info_t                          *p_io_info )\r
48 {\r
49         struct _wr                              *wr = NULL;\r
50         struct _recv_wr                 *p_recv_wr = NULL;\r
51         LPWSAOVERLAPPED                 lpOverlapped = NULL;\r
52         struct ibsp_socket_info *socket_info = NULL;\r
53 \r
54         IBSP_ENTER( IBSP_DBG_IO );\r
55 \r
56         wr = (struct _wr * __ptr64)wc->wr_id;\r
57         p_recv_wr = (struct _recv_wr * __ptr64)wc->wr_id;\r
58 \r
59         CL_ASSERT( wr );\r
60 \r
61         socket_info = wr->socket_info;\r
62         p_io_info->p_socket = socket_info;\r
63 \r
64         lpOverlapped = wr->lpOverlapped;\r
65 \r
66         IBSP_TRACE4( IBSP_DBG_IO,\r
67                 ("socket %p, ov %p, work completion status=%s, wc_type=%s\n",\r
68                 socket_info, lpOverlapped, ib_get_wc_status_str( wc->status ),\r
69                 ib_get_wc_type_str( wc->wc_type )) );\r
70 \r
71         /* Set the windows error code. It's not easy to find an easy\r
72          * correspondence between the IBAL error codes and windows error\r
73          * codes; but it probably does not matter, as long as it returns an\r
74          * error. */\r
75         switch( wc->status )\r
76         {\r
77         case IB_WCS_SUCCESS:\r
78                 /*\r
79                  * Set the length of the operation. Under Infiniband, the work\r
80                  * completion length is only valid for a receive\r
81                  * operation. Fortunately we had already set the length during the\r
82                  * send operation. \r
83                  *\r
84                  * lpWPUCompleteOverlappedRequest is supposed to store the length\r
85                  * into InternalHigh, however it will not be called if the low\r
86                  * order bit of lpOverlapped->hEvent is set. So we do it and hope\r
87                  * for the best. \r
88                  *\r
89                  * NOTE: Without a valid length, the switch doesn't seem to call \r
90                  * GetOverlappedResult() even if we call lpWPUCompleteOverlappedRequest()\r
91                  */\r
92                 if( wc->wc_type == IB_WC_RECV )\r
93                         lpOverlapped->InternalHigh = wc->length;\r
94 \r
95                 lpOverlapped->OffsetHigh = 0;\r
96                 break;\r
97 \r
98         case IB_WCS_WR_FLUSHED_ERR:\r
99                 cl_spinlock_acquire( &socket_info->mutex );\r
100 \r
101                 if( socket_info->socket_state == IBSP_DUPLICATING_REMOTE &&\r
102                         wc->wc_type == IB_WC_RECV )\r
103                 {\r
104                         /*\r
105                          * Take the wr off the wr_list, and place onto the\r
106                          * dup_wr_list.  We will post them later on the new QP. \r
107                          */\r
108                         cl_spinlock_acquire( &socket_info->recv_lock );\r
109 \r
110                         /* Copy to the duplicate WR array. */\r
111                         socket_info->dup_wr[socket_info->dup_idx] = *p_recv_wr;\r
112 \r
113 #if QP_ATTRIB_RQ_DEPTH == 256 || QP_ATTRIB_RQ_DEPTH == 128 || \\r
114         QP_ATTRIB_RQ_DEPTH == 64 || QP_ATTRIB_RQ_DEPTH == 32 || \\r
115         QP_ATTRIB_RQ_DEPTH == 16 || QP_ATTRIB_RQ_DEPTH == 8\r
116                         socket_info->dup_idx++;\r
117                         socket_info->dup_idx &= (QP_ATTRIB_RQ_DEPTH - 1);\r
118 #else\r
119                         if( ++socket_info->dup_idx == QP_ATTRIB_RQ_DEPTH )\r
120                                 socket_info->dup_idx = 0;\r
121 #endif\r
122 \r
123                         cl_atomic_inc( &socket_info->dup_cnt );\r
124                         /* ib_cq_comp will decrement the receive count. */\r
125                         cl_atomic_dec( &socket_info->recv_cnt );\r
126 \r
127                         cl_spinlock_release( &socket_info->recv_lock );\r
128 \r
129                         cl_spinlock_release( &socket_info->mutex );\r
130                         IBSP_EXIT( IBSP_DBG_IO );\r
131                         return;\r
132                 }\r
133                 \r
134                 /* Check for flushing the receive buffers on purpose. */\r
135                 if( socket_info->socket_state == IBSP_DUPLICATING_OLD )\r
136                         wr->lpOverlapped->OffsetHigh = 0;\r
137                 else\r
138                         wr->lpOverlapped->OffsetHigh = WSA_OPERATION_ABORTED;\r
139 \r
140                 cl_spinlock_release( &socket_info->mutex );\r
141 \r
142                 /* Override the length, as per the WSD specs. */\r
143                 wr->lpOverlapped->InternalHigh = 0;\r
144                 break;\r
145 \r
146         case IB_WCS_LOCAL_LEN_ERR:\r
147         case IB_WCS_LOCAL_OP_ERR:\r
148         case IB_WCS_LOCAL_PROTECTION_ERR:\r
149         case IB_WCS_MEM_WINDOW_BIND_ERR:\r
150         case IB_WCS_REM_ACCESS_ERR:\r
151         case IB_WCS_REM_OP_ERR:\r
152         case IB_WCS_RNR_RETRY_ERR:\r
153         case IB_WCS_TIMEOUT_RETRY_ERR:\r
154         case IB_WCS_REM_INVALID_REQ_ERR:\r
155         default:\r
156                 IBSP_ERROR( ("%s error: %s\n",\r
157                         ib_get_wc_type_str( wc->wc_type ),\r
158                         ib_get_wc_status_str( wc->status )) );\r
159                 lpOverlapped->OffsetHigh = WSAECONNABORTED;\r
160                 wr->lpOverlapped->InternalHigh = 0;\r
161                 socket_info->qp_error = WSAECONNABORTED;\r
162                 break;\r
163         }\r
164 \r
165 #ifdef _DEBUG_\r
166         if( wc->wc_type == IB_WC_RECV )\r
167         {\r
168                 // This code requires the recv count to be decremented here, but it needs\r
169                 // to be decremented after any callbacks are invoked so socket destruction\r
170                 // gets delayed until all callbacks have been invoked.\r
171                 //{\r
172                 //      uint8_t idx;\r
173 \r
174                 //      cl_spinlock_acquire( &socket_info->recv_lock );\r
175                 //      idx = socket_info->recv_idx - (uint8_t)socket_info->recv_cnt;\r
176                 //      if( idx >= QP_ATTRIB_RQ_DEPTH )\r
177                 //              idx += QP_ATTRIB_RQ_DEPTH;\r
178 \r
179                 //      CL_ASSERT( wc->wr_id == (uint64_t)(void* __ptr64)&socket_info->recv_wr[idx] );\r
180                 //      cl_atomic_dec( &socket_info->recv_cnt );\r
181                 //      cl_spinlock_release( &socket_info->recv_lock );\r
182                 //}\r
183 \r
184                 if( wc->status == IB_SUCCESS && p_recv_wr->ds_array[0].length >= 40 )\r
185                 {\r
186                         debug_dump_buffer( IBSP_DBG_WQ | IBSP_DBG_LEVEL4, "RECV",\r
187                                 (void * __ptr64)p_recv_wr->ds_array[0].vaddr, 40 );\r
188                 }\r
189 \r
190                 cl_atomic_dec( &g_ibsp.recv_count );\r
191                 cl_atomic_inc( &socket_info->recv_comp );\r
192 \r
193                 memset( p_recv_wr, 0x33, sizeof(struct _recv_wr) );\r
194         }\r
195         else\r
196         {\r
197                 // This code requires the send count to be decremented here, but it needs\r
198                 // to be decremented after any callbacks are invoked so socket destruction\r
199                 // gets delayed until all callbacks have been invoked.\r
200                 //{\r
201                 //      uint8_t idx;\r
202 \r
203                 //      cl_spinlock_acquire( &socket_info->send_lock );\r
204                 //      idx = socket_info->send_idx - (uint8_t)socket_info->send_cnt;\r
205                 //      if( idx >= QP_ATTRIB_SQ_DEPTH )\r
206                 //              idx += QP_ATTRIB_SQ_DEPTH;\r
207                 //      CL_ASSERT( wc->wr_id == (uint64_t)(void* __ptr64)&socket_info->send_wr[idx] );\r
208                 //      cl_atomic_dec( &socket_info->send_cnt );\r
209                 //      cl_spinlock_release( &socket_info->send_lock );\r
210                 //}\r
211 \r
212                 if( wc->wc_type == IB_WC_SEND )\r
213                 {\r
214                         cl_atomic_dec( &g_ibsp.send_count );\r
215                         cl_atomic_inc( &socket_info->send_comp );\r
216 \r
217                         fzprint(("%s():%d:0x%x:0x%x: send_count=%d\n",\r
218                                 __FUNCTION__,\r
219                                 __LINE__, GetCurrentProcessId(), GetCurrentThreadId(), g_ibsp.send_count));\r
220                 }\r
221 \r
222                 memset( wr, 0x33, sizeof(struct _wr) );\r
223         }\r
224 #endif\r
225 \r
226         IBSP_TRACE4( IBSP_DBG_IO,\r
227                 ("overlapped=%p, InternalHigh=%d, hEvent=%x\n",\r
228                 lpOverlapped, lpOverlapped->InternalHigh,\r
229                 (uintptr_t) lpOverlapped->hEvent) );\r
230 \r
231         /* Don't notify the switch for that completion only if:\r
232          *   - the switch don't want a notification\r
233          *   - the wq completed with success\r
234          *   - the socket is still connected\r
235          */\r
236         if( ((uintptr_t) lpOverlapped->hEvent) & 0x00000001 )\r
237         {\r
238                 /* Indicate this operation is complete. The switch will poll\r
239                  * with calls to WSPGetOverlappedResult(). */\r
240 \r
241 #ifdef _DEBUG_\r
242                 cl_atomic_dec( &g_ibsp.overlap_h1_comp_count );\r
243 \r
244                 fzprint(("%s():%d:0x%x:0x%x: ov=0x%p h0=%d h1=%d h1_c=%d send=%d recv=%d\n",\r
245                                  __FUNCTION__, __LINE__, GetCurrentProcessId(),\r
246                                  GetCurrentThreadId(), lpOverlapped,\r
247                                  g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count,\r
248                                  g_ibsp.overlap_h1_comp_count, g_ibsp.send_count, g_ibsp.recv_count));\r
249 #endif\r
250 \r
251                 IBSP_TRACE1( IBSP_DBG_IO,\r
252                         ("Not calling lpWPUCompleteOverlappedRequest: "\r
253                         "socket=%p, ov=%p OffsetHigh=%d, InternalHigh=%d hEvent=%p\n",\r
254                         socket_info, lpOverlapped, lpOverlapped->OffsetHigh,\r
255                         lpOverlapped->InternalHigh, lpOverlapped->hEvent) );\r
256 \r
257                 lpOverlapped->Internal = 0;\r
258                 p_io_info->p_ov = NULL;\r
259         }\r
260         else\r
261         {\r
262 #ifdef _DEBUG_\r
263                 cl_atomic_dec( &g_ibsp.overlap_h0_count );\r
264 \r
265                 fzprint(("%s():%d:0x%x:0x%x: ov=0x%p h0=%d h1=%d h1_c=%d send=%d recv=%d\n",\r
266                                  __FUNCTION__, __LINE__, GetCurrentProcessId(),\r
267                                  GetCurrentThreadId(), lpOverlapped,\r
268                                  g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count,\r
269                                  g_ibsp.overlap_h1_comp_count, g_ibsp.send_count, g_ibsp.recv_count));\r
270 #endif\r
271 \r
272                 IBSP_TRACE1( IBSP_DBG_IO,\r
273                         ("Calling lpWPUCompleteOverlappedRequest: "\r
274                         "socket=%p, ov=%p OffsetHigh=%d InternalHigh=%d hEvent=%p\n",\r
275                         socket_info, lpOverlapped, lpOverlapped->OffsetHigh,\r
276                         lpOverlapped->InternalHigh, lpOverlapped->hEvent) );\r
277 \r
278                 p_io_info->p_ov = lpOverlapped;\r
279                 cl_atomic_inc( &socket_info->ref_cnt );\r
280         }\r
281 \r
282         if( wc->wc_type == IB_WC_RECV )\r
283                 cl_atomic_dec( &socket_info->recv_cnt );\r
284         else\r
285                 cl_atomic_dec( &socket_info->send_cnt );\r
286 \r
287         IBSP_EXIT( IBSP_DBG_IO );\r
288 }\r
289 \r
290 \r
291 /* CQ completion handler. */\r
292 int\r
293 ib_cq_comp(\r
294                                         void                                            *cq_context )\r
295 {\r
296         struct cq_thread_info   *cq_tinfo = cq_context;\r
297         ib_api_status_t                 status;\r
298         ib_wc_t                                 wclist[WC_LIST_SIZE];\r
299         ib_wc_t                                 *free_wclist;\r
300         ib_wc_t                                 *done_wclist;\r
301         io_comp_info_t                  info[WC_LIST_SIZE];\r
302         int                                             cb_idx;\r
303         int                                             i;\r
304         int                                             n_comp = 0;\r
305 #ifdef _DEBUG_\r
306         int                                             comp_count;\r
307 #endif\r
308 \r
309         IBSP_ENTER( IBSP_DBG_WQ );\r
310 \r
311         CL_ASSERT( WC_LIST_SIZE >= 1 );\r
312 \r
313         do\r
314         {\r
315                 /* Try to retrieve up to WC_LIST_SIZE completions at a time. */\r
316                 for( i = 0; i < (WC_LIST_SIZE - 1); i++ )\r
317                 {\r
318                         wclist[i].p_next = &wclist[i + 1];\r
319                 }\r
320                 wclist[(WC_LIST_SIZE - 1)].p_next = NULL;\r
321 \r
322                 free_wclist = &wclist[0];\r
323                 done_wclist = NULL;\r
324 \r
325                 status = ib_poll_cq( cq_tinfo->cq, &free_wclist, &done_wclist );\r
326 \r
327                 IBSP_TRACE( IBSP_DBG_WQ,\r
328                         ("%s():%d:0x%x:0x%x: poll CQ got status %d, free=%p, done=%p\n",\r
329                         __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(),\r
330                         status, free_wclist, done_wclist) );\r
331 \r
332                 switch( status )\r
333                 {\r
334                 case IB_NOT_FOUND:\r
335                 case IB_SUCCESS:\r
336                         break;\r
337 \r
338                 case IB_INVALID_CQ_HANDLE:\r
339                         /* This happens when the switch closes the socket while the \r
340                          * execution thread was calling lpWPUCompleteOverlappedRequest. */\r
341                         IBSP_ERROR(\r
342                                 ("ib_poll_cq returned IB_INVLALID_CQ_HANDLE\n") );\r
343                         goto done;\r
344 \r
345                 default:\r
346                         IBSP_ERROR(\r
347                                 ("ib_poll_cq failed returned %s\n", ib_get_err_str( status )) );\r
348                         break;\r
349                 }\r
350 \r
351 #ifdef _DEBUG_\r
352                 comp_count = 0;\r
353 #endif\r
354 \r
355                 /* We have some completions. */\r
356                 cb_idx = 0;\r
357                 while( done_wclist )\r
358                 {\r
359 #ifdef _DEBUG_\r
360                         comp_count++;\r
361 #endif\r
362                         complete_wq( done_wclist, &info[cb_idx++] );\r
363 \r
364                         done_wclist = done_wclist->p_next;\r
365                 }\r
366 \r
367                 for( i = 0; i < cb_idx; i++ )\r
368                 {\r
369                         int error;\r
370                         int ret;\r
371 \r
372                         if( info[i].p_ov )\r
373                         {\r
374                                 IBSP_TRACE1( IBSP_DBG_IO,\r
375                                         ("Calling WPUCompleteOverlappedRequest for ov=%p\n",\r
376                                         info[i].p_ov) );\r
377 \r
378                                 ret = g_ibsp.up_call_table.lpWPUCompleteOverlappedRequest(\r
379                                         info[i].p_socket->switch_socket, info[i].p_ov,\r
380                                         info[i].p_ov->OffsetHigh,\r
381                                         (DWORD)info[i].p_ov->InternalHigh, &error );\r
382                                 if( ret != 0 )\r
383                                 {\r
384                                         IBSP_ERROR( ("WPUCompleteOverlappedRequest for ov=%p "\r
385                                                 "returned %d err %d\n", info[i].p_ov, ret, error) );\r
386                                 }\r
387                                 deref_socket_info( info[i].p_socket );\r
388                         }\r
389                 }\r
390 \r
391                 n_comp += i;\r
392 \r
393 #ifdef _DEBUG_\r
394                 if( comp_count > g_ibsp.max_comp_count )\r
395                 {\r
396                         g_ibsp.max_comp_count = comp_count;\r
397                 }\r
398 #endif\r
399         } while( !free_wclist );\r
400 \r
401 done:\r
402 \r
403 #ifdef _DEBUG_\r
404         fzprint(("%s():%d:0x%x:0x%x: overlap_h0_count=%d overlap_h1_count=%d\n",\r
405                          __FUNCTION__,\r
406                          __LINE__, GetCurrentProcessId(),\r
407                          GetCurrentThreadId(), g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count));\r
408 #endif\r
409 \r
410         IBSP_EXIT( IBSP_DBG_WQ );\r
411         return n_comp;\r
412 }\r
413 \r
414 \r
415 /* IB completion thread */\r
416 static DWORD WINAPI\r
417 ib_cq_thread(\r
418                                         LPVOID                                          lpParameter )\r
419 {\r
420         struct cq_thread_info   *cq_tinfo = (struct cq_thread_info *)lpParameter;\r
421         cl_status_t                             cl_status;\r
422         ib_api_status_t                 status;\r
423         int                                             i;\r
424 \r
425         IBSP_ENTER( IBSP_DBG_HW );\r
426 \r
427 \r
428         fzprint(("%s():%d:0x%x:0x%x: cq_tinfo=0x%p\n", __FUNCTION__,\r
429                          __LINE__, GetCurrentProcessId(), GetCurrentThreadId(), cq_tinfo));\r
430 \r
431         do\r
432         {\r
433                 cl_status = cl_waitobj_wait_on( cq_tinfo->cq_waitobj, EVENT_NO_TIMEOUT, TRUE );\r
434                 if( cl_status != CL_SUCCESS )\r
435                 {\r
436                         IBSP_ERROR(\r
437                                 ("cl_waitobj_wait_on() (%d)\n", cl_status) );\r
438                 }\r
439 \r
440                 /* \r
441                  * TODO: By rearanging thread creation and cq creation, this check\r
442                  * may be eliminated.\r
443                  */\r
444                 if( cq_tinfo->cq != NULL )\r
445                 {\r
446                         fzprint(("%s():%d:0x%x:0x%x: Calling ib_cq_comp().\n", __FUNCTION__,\r
447                                          __LINE__, GetCurrentProcessId(), GetCurrentThreadId()));\r
448 \r
449                         i = g_max_poll;\r
450                         do\r
451                         {\r
452                                 if( ib_cq_comp( cq_tinfo ) )\r
453                                         i = g_max_poll;\r
454 \r
455                         } while( i-- );\r
456 \r
457                         fzprint(("%s():%d:0x%x:0x%x: Done calling ib_cq_comp().\n", __FUNCTION__,\r
458                                          __LINE__, GetCurrentProcessId(), GetCurrentThreadId()));\r
459 \r
460                         status = ib_rearm_cq( cq_tinfo->cq, FALSE );\r
461                         if( status != IB_SUCCESS )\r
462                         {\r
463                                 IBSP_ERROR(\r
464                                         ("ib_rearm_cq returned %s)\n", ib_get_err_str( status )) );\r
465                         }\r
466                 }\r
467 \r
468         } while( !cq_tinfo->ib_cq_thread_exit_wanted );\r
469 \r
470         cl_status = cl_waitobj_destroy( cq_tinfo->cq_waitobj );\r
471         if( cl_status != CL_SUCCESS )\r
472         {\r
473                 IBSP_ERROR(\r
474                         ("cl_waitobj_destroy() returned %s\n", CL_STATUS_MSG(cl_status)) );\r
475         }\r
476         HeapFree( g_ibsp.heap, 0, cq_tinfo );\r
477 \r
478         /* No special exit code, even on errors. */\r
479         IBSP_EXIT( IBSP_DBG_HW );\r
480         ExitThread( 0 );\r
481 }\r
482 \r
483 \r
484 /* Called with the HCA's CQ lock held. */\r
485 static struct cq_thread_info *\r
486 ib_alloc_cq_tinfo(\r
487                                         struct ibsp_hca                         *hca )\r
488 {\r
489         struct cq_thread_info *cq_tinfo = NULL;\r
490         ib_cq_create_t cq_create;\r
491         ib_api_status_t status;\r
492         cl_status_t cl_status;\r
493 \r
494         IBSP_ENTER( IBSP_DBG_HW );\r
495 \r
496         cq_tinfo = HeapAlloc(\r
497                 g_ibsp.heap, HEAP_ZERO_MEMORY, sizeof(struct cq_thread_info) );\r
498 \r
499         if( !cq_tinfo )\r
500         {\r
501                 IBSP_ERROR_EXIT( ("HeapAlloc() Failed.\n") );\r
502                 return NULL;\r
503         }\r
504 \r
505         cl_status = cl_waitobj_create( FALSE, &cq_tinfo->cq_waitobj );\r
506         if( cl_status != CL_SUCCESS )\r
507         {\r
508                 cq_tinfo->cq_waitobj = NULL;\r
509                 ib_destroy_cq_tinfo( cq_tinfo );\r
510                 IBSP_ERROR_EXIT(\r
511                         ("cl_waitobj_create() returned %s\n", CL_STATUS_MSG(cl_status)) );\r
512                 return NULL;\r
513         }\r
514 \r
515         cq_tinfo->hca = hca;\r
516         cq_tinfo->ib_cq_thread_exit_wanted = FALSE;\r
517 \r
518         /* Create a cleanup thread */\r
519         cq_tinfo->ib_cq_thread = CreateThread( NULL, 0, ib_cq_thread, cq_tinfo, 0, (LPDWORD)&cq_tinfo->ib_cq_thread_id );\r
520 \r
521         if( cq_tinfo->ib_cq_thread == NULL )\r
522         {\r
523                 ib_destroy_cq_tinfo( cq_tinfo );\r
524                 IBSP_ERROR_EXIT( ("CreateThread failed (%d)", GetLastError()) );\r
525                 return NULL;\r
526         }\r
527 \r
528         STAT_INC( thread_num );\r
529 \r
530         /* Completion queue */\r
531         cq_create.size = IB_CQ_SIZE;\r
532 \r
533         cq_create.pfn_comp_cb = NULL;\r
534         cq_create.h_wait_obj = cq_tinfo->cq_waitobj;\r
535 \r
536         status = ib_create_cq( hca->hca_handle, &cq_create, cq_tinfo,   /* context */\r
537                 NULL,   /* async handler */\r
538                 &cq_tinfo->cq );\r
539         if( status )\r
540         {\r
541                 ib_destroy_cq_tinfo( cq_tinfo );\r
542                 IBSP_ERROR_EXIT(\r
543                         ("ib_create_cq returned %s\n", ib_get_err_str( status )) );\r
544                 return NULL;\r
545         }\r
546 \r
547         STAT_INC( cq_num );\r
548 \r
549         status = ib_rearm_cq( cq_tinfo->cq, FALSE );\r
550         if( status )\r
551         {\r
552                 ib_destroy_cq_tinfo( cq_tinfo );\r
553                 IBSP_ERROR_EXIT(\r
554                         ("ib_rearm_cq returned %s\n", ib_get_err_str( status )) );\r
555                 return NULL;\r
556         }\r
557 \r
558         cq_tinfo->cqe_size = IB_CQ_SIZE;\r
559 \r
560         if( hca->cq_tinfo )\r
561         {\r
562                 __cl_primitive_insert(\r
563                         &hca->cq_tinfo->list_item, &cq_tinfo->list_item );\r
564         }\r
565         else\r
566         {\r
567                 /* Setup the list entry to point to itself. */\r
568                 cq_tinfo->list_item.p_next = &cq_tinfo->list_item;\r
569                 cq_tinfo->list_item.p_prev = &cq_tinfo->list_item;\r
570         }\r
571 \r
572         /* Upon allocation, the new CQ becomes the primary. */\r
573         hca->cq_tinfo = cq_tinfo;\r
574 \r
575         IBSP_EXIT( IBSP_DBG_HW );\r
576         return (cq_tinfo);\r
577 }\r
578 \r
579 \r
580 void\r
581 ib_destroy_cq_tinfo(\r
582                                         struct cq_thread_info           *cq_tinfo )\r
583 {\r
584         ib_wc_t wclist;\r
585         ib_wc_t *free_wclist;\r
586         ib_wc_t *done_wclist;\r
587         ib_api_status_t status;\r
588         HANDLE h_cq_thread;\r
589 \r
590         IBSP_ENTER( IBSP_DBG_HW );\r
591 \r
592         CL_ASSERT( cq_tinfo );\r
593         CL_ASSERT( cq_tinfo->qp_count == 0 );\r
594 \r
595         if( cq_tinfo->cq )\r
596         {\r
597                 wclist.p_next = NULL;\r
598                 free_wclist = &wclist;\r
599 \r
600                 while( ib_poll_cq(\r
601                         cq_tinfo->cq, &free_wclist, &done_wclist ) == IB_SUCCESS )\r
602                 {\r
603                         IBSP_TRACE1( IBSP_DBG_WQ,\r
604                                 ("free=%p, done=%p\n", free_wclist, done_wclist) );\r
605                 }\r
606 \r
607                 IBSP_TRACE4( IBSP_DBG_WQ, ("ib_destroy_cq() start..\n") );\r
608 \r
609                 /*\r
610                  * Called from cleanup thread, okay to block.\r
611                  */\r
612                 status = ib_destroy_cq( cq_tinfo->cq, ib_sync_destroy );\r
613                 if( status )\r
614                 {\r
615                         IBSP_ERROR(\r
616                                 ("ib_destroy_cq returned %s\n", ib_get_err_str( status )) );\r
617                 }\r
618                 else\r
619                 {\r
620                         IBSP_TRACE4( IBSP_DBG_WQ, ("ib_destroy_cq() finished.\n") );\r
621 \r
622                         cq_tinfo->cq = NULL;\r
623 \r
624                         STAT_DEC( cq_num );\r
625                 }\r
626         }\r
627 \r
628         if( cq_tinfo->ib_cq_thread )\r
629         {\r
630                 /* ib_cq_thread() will release the cq_tinfo before exit. Don't\r
631                    reference cq_tinfo after signaling  */\r
632                 h_cq_thread = cq_tinfo->ib_cq_thread;\r
633                 cq_tinfo->ib_cq_thread = NULL;\r
634 \r
635                 cq_tinfo->ib_cq_thread_exit_wanted = TRUE;\r
636                 cl_waitobj_signal( cq_tinfo->cq_waitobj );\r
637 \r
638                 /* Wait for ib_cq_thread to die, if we are not running on it */\r
639                 if( GetCurrentThreadId() != cq_tinfo->ib_cq_thread_id )\r
640                 {\r
641                         fzprint(("%s():%d:0x%x:0x%x: Waiting for ib_cq_thread=0x%x to die\n",\r
642                                          __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(),\r
643                                          cq_tinfo->ib_cq_thread_id ));\r
644                         if( WaitForSingleObject( h_cq_thread, INFINITE ) != WAIT_OBJECT_0 )\r
645                         {\r
646                                 IBSP_ERROR( ("WaitForSingleObject failed\n") );\r
647                         }\r
648                         else\r
649                         {\r
650                                 STAT_DEC( thread_num );\r
651                         }\r
652                 }\r
653                 else\r
654                 {\r
655                         fzprint(("%s():%d:0x%x:0x%x: Currently on ib_cq_thread.\n", __FUNCTION__,\r
656                                          __LINE__, GetCurrentProcessId(), GetCurrentThreadId()));\r
657                         STAT_DEC( thread_num );\r
658                 }\r
659                 CloseHandle( h_cq_thread );\r
660         }\r
661         else\r
662         {\r
663                 /* There was no thread created, destroy cq_waitobj and\r
664                    free memory */\r
665                 if( cq_tinfo->cq_waitobj )\r
666                 {\r
667                         cl_waitobj_destroy( cq_tinfo->cq_waitobj );\r
668                         cq_tinfo->cq_waitobj = NULL;\r
669                 }\r
670                 HeapFree( g_ibsp.heap, 0, cq_tinfo );\r
671         }\r
672 \r
673         IBSP_EXIT( IBSP_DBG_HW );\r
674 }\r
675 \r
676 \r
677 static struct cq_thread_info *\r
678 ib_acquire_cq_tinfo(\r
679                                         struct ibsp_hca                         *hca )\r
680 {\r
681         struct cq_thread_info   *cq_tinfo = NULL;\r
682         uint32_t                                cqe_size;\r
683         ib_api_status_t                 status;\r
684 \r
685         IBSP_ENTER( IBSP_DBG_HW );\r
686 \r
687         cl_spinlock_acquire( &hca->cq_lock );\r
688 \r
689         if( !hca->cq_tinfo )\r
690         {\r
691                 cq_tinfo = ib_alloc_cq_tinfo( hca );\r
692                 if( !cq_tinfo )\r
693                 {\r
694                         IBSP_ERROR_EXIT( ("ib_alloc_cq_tinfo() failed\n") );\r
695                         cl_spinlock_release( &hca->cq_lock );\r
696                         return (NULL);\r
697                 }\r
698         }\r
699         else\r
700         {\r
701                 cq_tinfo = hca->cq_tinfo;\r
702         }\r
703 \r
704         CL_ASSERT( cq_tinfo != NULL );\r
705 \r
706         cqe_size = (cq_tinfo->qp_count + 1) * IB_CQ_SIZE;\r
707 \r
708         if( cq_tinfo->cqe_size < cqe_size )\r
709         {\r
710                 status = ib_modify_cq( cq_tinfo->cq, &cqe_size );\r
711                 switch( status )\r
712                 {\r
713                 case IB_INVALID_CQ_SIZE:\r
714                         cq_tinfo = ib_alloc_cq_tinfo( hca );\r
715                         if( !cq_tinfo )\r
716                                 break;\r
717 \r
718                         cq_tinfo->qp_count++;\r
719                         break;\r
720 \r
721                 case IB_SUCCESS:\r
722                         cq_tinfo->cqe_size = cqe_size;\r
723 \r
724                         cq_tinfo->qp_count++;\r
725 \r
726                         fzprint(("%s():%d:0x%x:0x%x: New cq size=%d.\n",\r
727                                          __FUNCTION__,\r
728                                          __LINE__, GetCurrentProcessId(),\r
729                                          GetCurrentThreadId(), cq_tinfo->cqe_size));\r
730                         break;\r
731 \r
732                 default:\r
733                         IBSP_ERROR_EXIT(\r
734                                 ("ib_modify_cq() returned %s\n", ib_get_err_str(status)) );\r
735                         cq_tinfo = NULL;\r
736                 }\r
737         }\r
738         else\r
739         {\r
740                 cq_tinfo->qp_count++;\r
741         }\r
742 \r
743         cl_spinlock_release( &hca->cq_lock );\r
744         IBSP_EXIT( IBSP_DBG_HW );\r
745         return (cq_tinfo);\r
746 }\r
747 \r
748 void\r
749 ib_release_cq_tinfo(\r
750                                         struct cq_thread_info           *cq_tinfo )\r
751 {\r
752         IBSP_ENTER( IBSP_DBG_HW );\r
753 \r
754         CL_ASSERT( cq_tinfo );\r
755         CL_ASSERT( cq_tinfo->hca );\r
756 \r
757         cl_spinlock_acquire( &cq_tinfo->hca->cq_lock );\r
758         /* If this CQ now has fewer QPs than the primary, make it the primary. */\r
759         if( --cq_tinfo->qp_count < cq_tinfo->hca->cq_tinfo->qp_count )\r
760                 cq_tinfo->hca->cq_tinfo = cq_tinfo;\r
761         cl_spinlock_release( &cq_tinfo->hca->cq_lock );\r
762 \r
763         IBSP_EXIT( IBSP_DBG_HW );\r
764 }\r
765 \r
766 \r
767 /* Release IB ressources. */\r
768 void\r
769 ib_release(void)\r
770 {\r
771         cl_fmap_item_t                  *p_item;\r
772 \r
773         IBSP_ENTER( IBSP_DBG_HW );\r
774 \r
775         if( g_ibsp.al_handle )\r
776         {\r
777                 cl_list_item_t *item;\r
778                 ib_api_status_t status;\r
779 \r
780                 unregister_pnp();\r
781 \r
782                 while( (item = cl_qlist_head( &g_ibsp.hca_list )) != cl_qlist_end( &g_ibsp.hca_list ) )\r
783                 {\r
784                         struct ibsp_hca *hca = PARENT_STRUCT(item, struct ibsp_hca, item);\r
785 \r
786                         pnp_ca_remove( hca );\r
787                 }\r
788 \r
789                 fzprint(("%s():%d:0x%x:0x%x: Calling ib_close_al...\n", __FUNCTION__,\r
790                                  __LINE__, GetCurrentProcessId(), GetCurrentThreadId()));\r
791 \r
792                 status = ib_close_al( g_ibsp.al_handle );\r
793 \r
794                 fzprint(("%s():%d:0x%x:0x%x: Done calling ib_close_al, status=%d.\n",\r
795                                  __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(),\r
796                                  status));\r
797                 if( status != IB_SUCCESS )\r
798                 {\r
799                         IBSP_ERROR(\r
800                                 ("ib_close_al returned %s\n", ib_get_err_str( status )) );\r
801                 }\r
802                 else\r
803                 {\r
804                         IBSP_TRACE( IBSP_DBG_HW, ("ib_close_al success\n") );\r
805                         STAT_DEC( al_num );\r
806                 }\r
807                 g_ibsp.al_handle = NULL;\r
808         }\r
809 \r
810         for( p_item = cl_fmap_head( &g_ibsp.ip_map );\r
811                 p_item != cl_fmap_end( &g_ibsp.ip_map );\r
812                 p_item = cl_fmap_head( &g_ibsp.ip_map ) )\r
813         {\r
814                 cl_fmap_remove_item( &g_ibsp.ip_map, p_item );\r
815 \r
816                 HeapFree( g_ibsp.heap, 0,\r
817                         PARENT_STRUCT(p_item, struct ibsp_ip_addr, item) );\r
818         }\r
819 \r
820         IBSP_EXIT( IBSP_DBG_HW );\r
821 }\r
822 \r
823 \r
824 /* Initialize IB ressources. */\r
825 int\r
826 ibsp_initialize(void)\r
827 {\r
828         ib_api_status_t status;\r
829         int ret;\r
830 \r
831         IBSP_ENTER( IBSP_DBG_HW );\r
832 \r
833         CL_ASSERT( g_ibsp.al_handle == NULL );\r
834         CL_ASSERT( cl_qlist_count( &g_ibsp.hca_list ) == 0 );\r
835 \r
836         /* Open the IB library */\r
837         status = ib_open_al( &g_ibsp.al_handle );\r
838 \r
839         IBSP_TRACE( IBSP_DBG_HW, ("open is %d %p\n", status, g_ibsp.al_handle) );\r
840 \r
841         if( status != IB_SUCCESS )\r
842         {\r
843                 IBSP_ERROR( ("ib_open_al failed (%d)\n", status) );\r
844                 ret = WSAEPROVIDERFAILEDINIT;\r
845                 goto done;\r
846         }\r
847 \r
848         STAT_INC( al_num );\r
849 \r
850         /* Register for PNP events */\r
851         status = register_pnp();\r
852         if( status )\r
853         {\r
854                 IBSP_ERROR( ("register_pnp failed (%d)\n", status) );\r
855                 ret = WSAEPROVIDERFAILEDINIT;\r
856                 goto done;\r
857         }\r
858 \r
859         /* Populate IP list. */\r
860         update_all_ip_addrs();\r
861 \r
862         STAT_INC( thread_num );\r
863 \r
864         ret = 0;\r
865 done:\r
866         if( ret )\r
867         {\r
868                 /* Free up resources. */\r
869                 ib_release();\r
870         }\r
871 \r
872         IBSP_EXIT( IBSP_DBG_HW );\r
873 \r
874         return ret;\r
875 }\r
876 \r
877 \r
878 /* Destroys the infiniband ressources of a socket. */\r
879 void\r
880 ib_destroy_socket(\r
881         IN      OUT                     struct ibsp_socket_info         *socket_info )\r
882 {\r
883         ib_api_status_t status;\r
884 \r
885         IBSP_ENTER( IBSP_DBG_EP );\r
886 \r
887         if( socket_info->qp )\r
888         {\r
889                 cl_atomic_inc( &socket_info->ref_cnt );\r
890                 status = ib_destroy_qp( socket_info->qp, deref_socket_info );\r
891                 if( status != IB_SUCCESS )\r
892                 {\r
893                         IBSP_ERROR( ("ib_destroy_qp returned %s\n",\r
894                                 ib_get_err_str( status )) );\r
895                         deref_socket_info( socket_info );\r
896                 }\r
897 \r
898                 ib_release_cq_tinfo( socket_info->cq_tinfo );\r
899 \r
900                 socket_info->qp = NULL;\r
901         }\r
902 \r
903         IBSP_EXIT( IBSP_DBG_EP );\r
904 }\r
905 \r
906 \r
907 /*\r
908  * Creates the necessary IB ressources for a socket\r
909  */\r
910 int\r
911 ib_create_socket(\r
912         IN      OUT                     struct ibsp_socket_info         *socket_info)\r
913 {\r
914         ib_qp_create_t                  qp_create;\r
915         ib_api_status_t                 status;\r
916         ib_qp_attr_t                    qp_attr;\r
917 \r
918         IBSP_ENTER( IBSP_DBG_EP );\r
919 \r
920         CL_ASSERT( socket_info != NULL );\r
921         CL_ASSERT( socket_info->port != NULL );\r
922         CL_ASSERT( socket_info->qp == NULL );\r
923 \r
924         socket_info->hca_pd = socket_info->port->hca->pd;\r
925 \r
926         /* Get the completion queue and thread info for this socket */\r
927         socket_info->cq_tinfo = ib_acquire_cq_tinfo( socket_info->port->hca );\r
928         if( !socket_info->cq_tinfo )\r
929         {\r
930                 IBSP_ERROR_EXIT( ("ib_acquire_cq_tinfo failed\n") );\r
931                 return WSAENOBUFS;\r
932         }\r
933 \r
934         /* Queue pair */\r
935         qp_create.qp_type = IB_QPT_RELIABLE_CONN;\r
936         qp_create.sq_depth = QP_ATTRIB_SQ_DEPTH;\r
937         qp_create.rq_depth = QP_ATTRIB_RQ_DEPTH;\r
938         qp_create.sq_sge = QP_ATTRIB_SQ_SGE;\r
939         qp_create.rq_sge = 1;\r
940         qp_create.h_rq_cq = socket_info->cq_tinfo->cq;\r
941         qp_create.h_sq_cq = socket_info->cq_tinfo->cq;\r
942         qp_create.sq_signaled = TRUE;\r
943 \r
944         status = ib_create_qp( socket_info->hca_pd, &qp_create, socket_info,    /* context */\r
945                 NULL,   /* async handler */\r
946                 &socket_info->qp );\r
947         if( status )\r
948         {\r
949                 ib_release_cq_tinfo( socket_info->cq_tinfo );\r
950                 IBSP_ERROR_EXIT(\r
951                         ("ib_create_qp returned %s\n", ib_get_err_str( status )) );\r
952                 return WSAENOBUFS;\r
953         }\r
954 \r
955         status = ib_query_qp( socket_info->qp, &qp_attr );\r
956         if( status == IB_SUCCESS )\r
957         {\r
958                 socket_info->max_inline = min( g_max_inline, qp_attr.sq_max_inline );\r
959         }\r
960         else\r
961         {\r
962                 IBSP_ERROR( ("ib_query_qp returned %s\n", ib_get_err_str( status )) );\r
963                 socket_info->max_inline = 0;\r
964         }\r
965 \r
966         STAT_INC( qp_num );\r
967 \r
968         IBSP_EXIT( IBSP_DBG_EP );\r
969         return 0;\r
970 }\r
971 \r
972 \r
973 void\r
974 wait_cq_drain(\r
975         IN      OUT                     struct ibsp_socket_info         *socket_info )\r
976 {\r
977         IBSP_ENTER( IBSP_DBG_EP );\r
978 \r
979         if( socket_info->cq_tinfo == NULL )\r
980         {\r
981                 IBSP_EXIT( IBSP_DBG_EP );\r
982                 return;\r
983         }\r
984 \r
985         /* Wait for the QP to be drained. */\r
986         while( socket_info->send_cnt || socket_info->recv_cnt )\r
987         {\r
988                 fzprint(("%s():%d:0x%x:0x%x: socket=0x%p wr_list_count=%d qp state=%d\n",\r
989                                  __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(),\r
990                                  socket_info, cl_qlist_count(&socket_info->wr_list)));\r
991 \r
992                 Sleep(100);\r
993         }\r
994 \r
995         IBSP_EXIT( IBSP_DBG_EP );\r
996 }\r
997 \r
998 \r
999 void\r
1000 ibsp_dup_overlap_abort(\r
1001         IN      OUT                     struct ibsp_socket_info         *socket_info )\r
1002 {\r
1003         LPWSAOVERLAPPED lpOverlapped = NULL;\r
1004         int error;\r
1005         int ret;\r
1006         uint8_t                         idx;\r
1007 \r
1008         IBSP_ENTER( IBSP_DBG_EP );\r
1009         CL_ASSERT( !socket_info->send_cnt && !socket_info->recv_cnt );\r
1010 \r
1011         /* Browse the list of all posted overlapped structures\r
1012          * to mark them as aborted. */\r
1013         idx = socket_info->dup_idx - (uint8_t)socket_info->dup_cnt;\r
1014         if( idx >= QP_ATTRIB_RQ_DEPTH )\r
1015                 idx += QP_ATTRIB_RQ_DEPTH;\r
1016 \r
1017         while( socket_info->dup_cnt )\r
1018         {\r
1019                 lpOverlapped = socket_info->dup_wr[idx].wr.lpOverlapped;\r
1020 \r
1021                 fzprint(("%s():%d:0x%x:0x%x: socket=0x%p wr=0x%p overlapped=0x%p Internal=%d InternalHigh=%d hEvent=%d\n",\r
1022                         __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(), socket_info, &socket_info->dup_wr[idx], lpOverlapped, lpOverlapped->Internal, lpOverlapped->InternalHigh, lpOverlapped->hEvent));\r
1023 \r
1024                 lpOverlapped->OffsetHigh = WSAECONNABORTED;\r
1025                 lpOverlapped->InternalHigh = 0;\r
1026 \r
1027                 if( ((uintptr_t) lpOverlapped->hEvent) & 0x00000001 )\r
1028                 {\r
1029                         /* Indicate this operation is complete. The switch will poll\r
1030                          * with calls to WSPGetOverlappedResult(). */\r
1031 #ifdef _DEBUG_\r
1032                         cl_atomic_dec(&g_ibsp.overlap_h1_comp_count);\r
1033 \r
1034                         fzprint(("%s():%d:0x%x:0x%x: ov=0x%p h0=%d h1=%d h1_c=%d send=%d recv=%d\n",\r
1035                                          __FUNCTION__, __LINE__, GetCurrentProcessId(),\r
1036                                          GetCurrentThreadId(), lpOverlapped,\r
1037                                          g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count,\r
1038                                          g_ibsp.overlap_h1_comp_count, g_ibsp.send_count, g_ibsp.recv_count));\r
1039 #endif\r
1040 \r
1041                         CL_TRACE(IBSP_DBG_WQ, gdbg_lvl,\r
1042                                          ("%s: set internal overlapped=0x%p Internal=%d OffsetHigh=%d\n",\r
1043                                           __FUNCTION__, lpOverlapped, lpOverlapped->Internal,\r
1044                                           lpOverlapped->OffsetHigh));\r
1045 \r
1046                         lpOverlapped->Internal = 0;\r
1047                 }\r
1048                 else\r
1049                 {\r
1050 #ifdef _DEBUG_\r
1051                         cl_atomic_dec(&g_ibsp.overlap_h0_count);\r
1052 \r
1053 \r
1054                         fzprint(("%s():%d:0x%x:0x%x: ov=0x%p h0=%d h1=%d h1_c=%d send=%d recv=%d\n",\r
1055                                          __FUNCTION__, __LINE__, GetCurrentProcessId(),\r
1056                                          GetCurrentThreadId(), lpOverlapped,\r
1057                                          g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count,\r
1058                                          g_ibsp.overlap_h1_comp_count, g_ibsp.send_count, g_ibsp.recv_count));\r
1059 #endif\r
1060                         CL_TRACE(IBSP_DBG_WQ, gdbg_lvl,\r
1061                                          ("%s: calls lpWPUCompleteOverlappedRequest, overlapped=0x%p OffsetHigh=%d InternalHigh=%d hEvent=%d\n",\r
1062                                           __FUNCTION__, lpOverlapped, lpOverlapped->OffsetHigh,\r
1063                                           lpOverlapped->InternalHigh, lpOverlapped->hEvent));\r
1064 \r
1065                         ret = g_ibsp.up_call_table.lpWPUCompleteOverlappedRequest\r
1066                                 (socket_info->switch_socket,\r
1067                                  lpOverlapped,\r
1068                                  lpOverlapped->OffsetHigh, (DWORD) lpOverlapped->InternalHigh, &error);\r
1069 \r
1070                         if( ret != 0 )\r
1071                         {\r
1072                                 CL_ERROR(IBSP_DBG_EP, gdbg_lvl,\r
1073                                                  ("lpWPUCompleteOverlappedRequest failed with %d/%d\n", ret,\r
1074                                                   error));\r
1075                         }\r
1076                 }\r
1077                 cl_atomic_dec( &socket_info->dup_cnt );\r
1078         }\r
1079 \r
1080         IBSP_EXIT( IBSP_DBG_EP );\r
1081 }\r
1082 \r
1083 \r
1084 /* Closes a connection and release its ressources. */\r
1085 void\r
1086 shutdown_and_destroy_socket_info(\r
1087         IN      OUT                     struct ibsp_socket_info         *socket_info )\r
1088 {\r
1089         enum ibsp_socket_state  old_state;\r
1090 \r
1091         IBSP_ENTER( IBSP_DBG_EP );\r
1092 \r
1093         cl_spinlock_acquire( &socket_info->mutex );\r
1094         old_state = socket_info->socket_state;\r
1095         IBSP_CHANGE_SOCKET_STATE( socket_info, IBSP_CLOSED );\r
1096         cl_spinlock_release( &socket_info->mutex );\r
1097 \r
1098         if( socket_info->duplicate.mmap_handle )\r
1099         {\r
1100                 CloseHandle( socket_info->duplicate.mmap_handle );\r
1101                 socket_info->duplicate.mmap_handle = NULL;\r
1102         }\r
1103 \r
1104         if( socket_info->listen.handle )\r
1105         {\r
1106                 /* Stop listening and reject queued connections. */\r
1107                 ib_listen_cancel( socket_info );\r
1108         }\r
1109 \r
1110         cl_spinlock_acquire( &g_ibsp.socket_info_mutex );\r
1111         cl_qlist_remove_item( &g_ibsp.socket_info_list, &socket_info->item );\r
1112 \r
1113         switch( old_state )\r
1114         {\r
1115         case IBSP_CREATE:\r
1116         case IBSP_LISTEN:\r
1117                 /* Nothing to do. */\r
1118                 break;\r
1119 \r
1120         case IBSP_CONNECTED:\r
1121                 {\r
1122                         struct disconnect_reason reason;\r
1123 \r
1124                         memset( &reason, 0, sizeof(reason) );\r
1125                         reason.type = DISC_SHUTDOWN;\r
1126                         ib_disconnect( socket_info, &reason );\r
1127                 }\r
1128                 /* Fall through. */\r
1129 \r
1130         case IBSP_CONNECT:\r
1131         case IBSP_DISCONNECTED:\r
1132                 /* We changed the state - remove from connection map. */\r
1133                 CL_ASSERT( socket_info->conn_item.p_map );\r
1134                 cl_rbmap_remove_item( &g_ibsp.conn_map, &socket_info->conn_item );\r
1135                 break;\r
1136         }\r
1137         cl_spinlock_release( &g_ibsp.socket_info_mutex );\r
1138 \r
1139         /* Flush all completions. */\r
1140         if( socket_info->dup_cnt )\r
1141                 ibsp_dup_overlap_abort( socket_info );\r
1142 \r
1143         while( socket_info->send_cnt || socket_info->recv_cnt )\r
1144                 ib_cq_comp( socket_info->cq_tinfo );\r
1145 \r
1146         ibsp_dereg_socket( socket_info );\r
1147 \r
1148         ib_destroy_socket( socket_info );\r
1149 \r
1150         /* Release the initial reference and clean up. */\r
1151         deref_socket_info( socket_info );\r
1152 \r
1153         IBSP_EXIT( IBSP_DBG_EP );\r
1154 }\r
1155 \r
1156 \r
1157 boolean_t\r
1158 ibsp_conn_insert(\r
1159         IN                              struct ibsp_socket_info         *s )\r
1160 {\r
1161         struct ibsp_socket_info         *p_sock;\r
1162         cl_rbmap_item_t                         *p_item, *p_insert_at;\r
1163         boolean_t                                       left = TRUE;\r
1164 \r
1165         p_item = cl_rbmap_root( &g_ibsp.conn_map );\r
1166         p_insert_at = p_item;\r
1167 \r
1168         cl_spinlock_acquire( &g_ibsp.socket_info_mutex );\r
1169         CL_ASSERT( !s->conn_item.p_map );\r
1170         while( p_item != cl_rbmap_end( &g_ibsp.conn_map ) )\r
1171         {\r
1172                 p_insert_at = p_item;\r
1173                 p_sock = PARENT_STRUCT( p_item, struct ibsp_socket_info, conn_item );\r
1174                 if( p_sock->local_addr.sin_family < s->local_addr.sin_family )\r
1175                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1176                 else if( p_sock->local_addr.sin_family > s->local_addr.sin_family )\r
1177                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1178                 else if( p_sock->local_addr.sin_addr.S_un.S_addr < s->local_addr.sin_addr.S_un.S_addr )\r
1179                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1180                 else if( p_sock->local_addr.sin_addr.S_un.S_addr > s->local_addr.sin_addr.S_un.S_addr )\r
1181                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1182                 else if( p_sock->local_addr.sin_port < s->local_addr.sin_port )\r
1183                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1184                 else if( p_sock->local_addr.sin_port > s->local_addr.sin_port )\r
1185                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1186                 else if( p_sock->peer_addr.sin_family < s->peer_addr.sin_family )\r
1187                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1188                 else if( p_sock->peer_addr.sin_family > s->peer_addr.sin_family )\r
1189                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1190                 else if( p_sock->peer_addr.sin_addr.S_un.S_addr < s->peer_addr.sin_addr.S_un.S_addr )\r
1191                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1192                 else if( p_sock->peer_addr.sin_addr.S_un.S_addr > s->peer_addr.sin_addr.S_un.S_addr )\r
1193                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1194                 else if( p_sock->peer_addr.sin_port < s->peer_addr.sin_port )\r
1195                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1196                 else if( p_sock->peer_addr.sin_port > s->peer_addr.sin_port )\r
1197                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1198                 else\r
1199                         goto done;\r
1200         }\r
1201 \r
1202         cl_rbmap_insert( &g_ibsp.conn_map, p_insert_at, &s->conn_item, left );\r
1203 \r
1204 done:\r
1205         cl_spinlock_release( &g_ibsp.socket_info_mutex );\r
1206         return p_item == cl_rbmap_end( &g_ibsp.conn_map );\r
1207 }\r
1208 \r
1209 \r
1210 void\r
1211 ibsp_conn_remove(\r
1212         IN                              struct ibsp_socket_info         *s )\r
1213 {\r
1214         cl_spinlock_acquire( &g_ibsp.socket_info_mutex );\r
1215         CL_ASSERT( s->conn_item.p_map );\r
1216         cl_rbmap_remove_item( &g_ibsp.conn_map, &s->conn_item );\r
1217         cl_spinlock_release( &g_ibsp.socket_info_mutex );\r
1218 }\r