[WSD] Support HCAs that don't implement CQ resize.
[mirror/winof/.git] / ulp / wsd / user / ibsp_iblow.c
1 /*\r
2  * Copyright (c) 2005 SilverStorm Technologies.  All rights reserved.\r
3  *\r
4  * This software is available to you under the OpenIB.org BSD license\r
5  * below:\r
6  *\r
7  *     Redistribution and use in source and binary forms, with or\r
8  *     without modification, are permitted provided that the following\r
9  *     conditions are met:\r
10  *\r
11  *      - Redistributions of source code must retain the above\r
12  *        copyright notice, this list of conditions and the following\r
13  *        disclaimer.\r
14  *\r
15  *      - Redistributions in binary form must reproduce the above\r
16  *        copyright notice, this list of conditions and the following\r
17  *        disclaimer in the documentation and/or other materials\r
18  *        provided with the distribution.\r
19  *\r
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
21  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
22  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\r
23  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
24  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
25  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
26  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
27  * SOFTWARE.\r
28  *\r
29  * $Id$\r
30  */\r
31 \r
32 #include "ibspdll.h"\r
33 \r
34 #ifdef PERFMON_ENABLED\r
35 #include "ibsp_perfmon.h"\r
36 #endif\r
37 \r
38 \r
39 typedef struct _io_comp_info\r
40 {\r
41         struct ibsp_socket_info *p_socket;\r
42         LPWSAOVERLAPPED                 p_ov;\r
43 \r
44 } io_comp_info_t;\r
45 \r
46 \r
47 /* Work queue entry completion routine. */\r
48 static void\r
49 complete_wq(\r
50         IN              const   ib_wc_t                                         *wc,\r
51                 OUT                     io_comp_info_t                          *p_io_info )\r
52 {\r
53         struct _wr                              *wr = NULL;\r
54         struct _recv_wr                 *p_recv_wr = NULL;\r
55         LPWSAOVERLAPPED                 lpOverlapped = NULL;\r
56         struct ibsp_socket_info *socket_info = NULL;\r
57 \r
58         IBSP_ENTER( IBSP_DBG_IO );\r
59 \r
60         wr = (struct _wr * __ptr64)wc->wr_id;\r
61         p_recv_wr = (struct _recv_wr * __ptr64)wc->wr_id;\r
62 \r
63         CL_ASSERT( wr );\r
64 \r
65         socket_info = wr->socket_info;\r
66         p_io_info->p_socket = socket_info;\r
67 \r
68         lpOverlapped = wr->lpOverlapped;\r
69 \r
70         IBSP_TRACE4( IBSP_DBG_IO,\r
71                 ("socket %p, ov %p, work completion status=%s, wc_type=%s\n",\r
72                 socket_info, lpOverlapped, ib_get_wc_status_str( wc->status ),\r
73                 ib_get_wc_type_str( wc->wc_type )) );\r
74 \r
75         /* Set the windows error code. It's not easy to find an easy\r
76          * correspondence between the IBAL error codes and windows error\r
77          * codes; but it probably does not matter, as long as it returns an\r
78          * error. */\r
79         switch( wc->status )\r
80         {\r
81         case IB_WCS_SUCCESS:\r
82                 /*\r
83                  * Set the length of the operation. Under Infiniband, the work\r
84                  * completion length is only valid for a receive\r
85                  * operation. Fortunately we had already set the length during the\r
86                  * send operation. \r
87                  *\r
88                  * lpWPUCompleteOverlappedRequest is supposed to store the length\r
89                  * into InternalHigh, however it will not be called if the low\r
90                  * order bit of lpOverlapped->hEvent is set. So we do it and hope\r
91                  * for the best. \r
92                  *\r
93                  * NOTE: Without a valid length, the switch doesn't seem to call \r
94                  * GetOverlappedResult() even if we call lpWPUCompleteOverlappedRequest()\r
95                  */\r
96                 switch ( wc->wc_type ) \r
97                 {\r
98                 case IB_WC_RECV:\r
99                         lpOverlapped->InternalHigh = wc->length;\r
100 #ifdef IBSP_LOGGING\r
101                         cl_spinlock_acquire( &socket_info->recv_lock );\r
102                         DataLogger_WriteData(&socket_info->RecvDataLogger,\r
103                                 p_recv_wr->idx, (void * __ptr64)p_recv_wr->ds_array[0].vaddr,\r
104                                 wc->length);\r
105                         cl_spinlock_release( &socket_info->recv_lock );\r
106 #endif\r
107 #ifdef PERFMON_ENABLED\r
108                         InterlockedIncrement64( &g_pm_stat.pdata[COMP_RECV] );\r
109                         InterlockedExchangeAdd64( &g_pm_stat.pdata[BYTES_RECV],\r
110                                 lpOverlapped->InternalHigh );\r
111 #endif\r
112                         break;\r
113 #ifdef PERFMON_ENABLED\r
114 \r
115                 case IB_WC_RDMA_READ:\r
116                         lpOverlapped->InternalHigh = wc->length;\r
117                         InterlockedIncrement64( &g_pm_stat.pdata[COMP_RECV] );\r
118                         InterlockedExchangeAdd64( &g_pm_stat.pdata[BYTES_READ],\r
119                                 lpOverlapped->InternalHigh );\r
120                         break;\r
121 \r
122                 case IB_WC_SEND:\r
123                         InterlockedIncrement64( &g_pm_stat.pdata[COMP_SEND] );\r
124                         InterlockedExchangeAdd64( &g_pm_stat.pdata[BYTES_SEND],\r
125                                 lpOverlapped->InternalHigh );\r
126                         break;\r
127 \r
128                 case IB_WC_RDMA_WRITE:\r
129                         InterlockedIncrement64( &g_pm_stat.pdata[COMP_SEND] );\r
130                         InterlockedExchangeAdd64( &g_pm_stat.pdata[BYTES_WRITE],\r
131                                 lpOverlapped->InternalHigh );\r
132                 default:\r
133                         break;\r
134                 }\r
135 \r
136 #endif /* PERFMON_ENABLED */\r
137 \r
138                 lpOverlapped->OffsetHigh = 0;\r
139                 break;\r
140 \r
141         case IB_WCS_WR_FLUSHED_ERR:\r
142                 cl_spinlock_acquire( &socket_info->mutex );\r
143 \r
144                 if( socket_info->socket_state == IBSP_DUPLICATING_REMOTE &&\r
145                         wc->wc_type == IB_WC_RECV )\r
146                 {\r
147                         /*\r
148                          * Take the wr off the wr_list, and place onto the\r
149                          * dup_wr_list.  We will post them later on the new QP. \r
150                          */\r
151                         cl_spinlock_acquire( &socket_info->recv_lock );\r
152 \r
153                         /* Copy to the duplicate WR array. */\r
154                         socket_info->dup_wr[socket_info->dup_idx] = *p_recv_wr;\r
155 \r
156 #if QP_ATTRIB_RQ_DEPTH == 256 || QP_ATTRIB_RQ_DEPTH == 128 || \\r
157         QP_ATTRIB_RQ_DEPTH == 64 || QP_ATTRIB_RQ_DEPTH == 32 || \\r
158         QP_ATTRIB_RQ_DEPTH == 16 || QP_ATTRIB_RQ_DEPTH == 8\r
159                         socket_info->dup_idx++;\r
160                         socket_info->dup_idx &= (QP_ATTRIB_RQ_DEPTH - 1);\r
161 #else\r
162                         if( ++socket_info->dup_idx == QP_ATTRIB_RQ_DEPTH )\r
163                                 socket_info->dup_idx = 0;\r
164 #endif\r
165 \r
166                         cl_atomic_inc( &socket_info->dup_cnt );\r
167                         /* ib_cq_comp will decrement the receive count. */\r
168                         cl_atomic_dec( &socket_info->recv_cnt );\r
169 \r
170                         cl_spinlock_release( &socket_info->recv_lock );\r
171 \r
172                         cl_spinlock_release( &socket_info->mutex );\r
173                         p_io_info->p_ov = NULL;\r
174                         IBSP_EXIT( IBSP_DBG_IO );\r
175                         return;\r
176                 }\r
177                 \r
178                 /* Check for flushing the receive buffers on purpose. */\r
179                 if( socket_info->socket_state == IBSP_DUPLICATING_OLD )\r
180                         wr->lpOverlapped->OffsetHigh = 0;\r
181                 else\r
182                         wr->lpOverlapped->OffsetHigh = WSA_OPERATION_ABORTED;\r
183 \r
184                 cl_spinlock_release( &socket_info->mutex );\r
185 \r
186                 /* Override the length, as per the WSD specs. */\r
187                 wr->lpOverlapped->InternalHigh = 0;\r
188                 break;\r
189 \r
190         case IB_WCS_LOCAL_LEN_ERR:\r
191         case IB_WCS_LOCAL_OP_ERR:\r
192         case IB_WCS_LOCAL_PROTECTION_ERR:\r
193         case IB_WCS_MEM_WINDOW_BIND_ERR:\r
194         case IB_WCS_REM_ACCESS_ERR:\r
195         case IB_WCS_REM_OP_ERR:\r
196         case IB_WCS_RNR_RETRY_ERR:\r
197         case IB_WCS_TIMEOUT_RETRY_ERR:\r
198         case IB_WCS_REM_INVALID_REQ_ERR:\r
199         default:\r
200                 IBSP_ERROR( ("%s error: %s\n",\r
201                         ib_get_wc_type_str( wc->wc_type ),\r
202                         ib_get_wc_status_str( wc->status )) );\r
203                 lpOverlapped->OffsetHigh = WSAECONNABORTED;\r
204                 wr->lpOverlapped->InternalHigh = 0;\r
205                 socket_info->qp_error = WSAECONNABORTED;\r
206                 break;\r
207         }\r
208 \r
209 #ifdef PERFMON_ENABLED\r
210         InterlockedIncrement64( &g_pm_stat.pdata[COMP_TOTAL] );\r
211 #endif\r
212 \r
213 #ifdef _DEBUG_\r
214         if( wc->wc_type == IB_WC_RECV )\r
215         {\r
216                 // This code requires the recv count to be decremented here, but it needs\r
217                 // to be decremented after any callbacks are invoked so socket destruction\r
218                 // gets delayed until all callbacks have been invoked.\r
219                 //{\r
220                 //      uint8_t idx;\r
221 \r
222                 //      cl_spinlock_acquire( &socket_info->recv_lock );\r
223                 //      idx = socket_info->recv_idx - (uint8_t)socket_info->recv_cnt;\r
224                 //      if( idx >= QP_ATTRIB_RQ_DEPTH )\r
225                 //              idx += QP_ATTRIB_RQ_DEPTH;\r
226 \r
227                 //      CL_ASSERT( wc->wr_id == (uint64_t)(void* __ptr64)&socket_info->recv_wr[idx] );\r
228                 //      cl_atomic_dec( &socket_info->recv_cnt );\r
229                 //      cl_spinlock_release( &socket_info->recv_lock );\r
230                 //}\r
231 \r
232                 if( wc->status == IB_SUCCESS && p_recv_wr->ds_array[0].length >= 40 )\r
233                 {\r
234                         debug_dump_buffer( IBSP_DBG_WQ | IBSP_DBG_LEVEL4, "RECV",\r
235                                 (void * __ptr64)p_recv_wr->ds_array[0].vaddr, 40 );\r
236                 }\r
237 \r
238                 cl_atomic_dec( &g_ibsp.recv_count );\r
239                 cl_atomic_inc( &socket_info->recv_comp );\r
240 \r
241                 memset( p_recv_wr, 0x33, sizeof(struct _recv_wr) );\r
242         }\r
243         else\r
244         {\r
245                 // This code requires the send count to be decremented here, but it needs\r
246                 // to be decremented after any callbacks are invoked so socket destruction\r
247                 // gets delayed until all callbacks have been invoked.\r
248                 //{\r
249                 //      uint8_t idx;\r
250 \r
251                 //      cl_spinlock_acquire( &socket_info->send_lock );\r
252                 //      idx = socket_info->send_idx - (uint8_t)socket_info->send_cnt;\r
253                 //      if( idx >= QP_ATTRIB_SQ_DEPTH )\r
254                 //              idx += QP_ATTRIB_SQ_DEPTH;\r
255                 //      CL_ASSERT( wc->wr_id == (uint64_t)(void* __ptr64)&socket_info->send_wr[idx] );\r
256                 //      cl_atomic_dec( &socket_info->send_cnt );\r
257                 //      cl_spinlock_release( &socket_info->send_lock );\r
258                 //}\r
259 \r
260                 if( wc->wc_type == IB_WC_SEND )\r
261                 {\r
262                         cl_atomic_dec( &g_ibsp.send_count );\r
263                         cl_atomic_inc( &socket_info->send_comp );\r
264 \r
265                         fzprint(("%s():%d:0x%x:0x%x: send_count=%d\n",\r
266                                 __FUNCTION__,\r
267                                 __LINE__, GetCurrentProcessId(), GetCurrentThreadId(), g_ibsp.send_count));\r
268                 }\r
269 \r
270                 memset( wr, 0x33, sizeof(struct _wr) );\r
271         }\r
272 #endif\r
273 \r
274         IBSP_TRACE4( IBSP_DBG_IO,\r
275                 ("overlapped=%p, InternalHigh=%d, hEvent=%x\n",\r
276                 lpOverlapped, lpOverlapped->InternalHigh,\r
277                 (uintptr_t) lpOverlapped->hEvent) );\r
278 \r
279         /* Don't notify the switch for that completion only if:\r
280          *   - the switch don't want a notification\r
281          *   - the wq completed with success\r
282          *   - the socket is still connected\r
283          */\r
284         if( ((uintptr_t) lpOverlapped->hEvent) & 0x00000001 )\r
285         {\r
286                 /* Indicate this operation is complete. The switch will poll\r
287                  * with calls to WSPGetOverlappedResult(). */\r
288 \r
289 #ifdef _DEBUG_\r
290                 cl_atomic_dec( &g_ibsp.overlap_h1_comp_count );\r
291 \r
292                 fzprint(("%s():%d:0x%x:0x%x: ov=0x%p h0=%d h1=%d h1_c=%d send=%d recv=%d\n",\r
293                                  __FUNCTION__, __LINE__, GetCurrentProcessId(),\r
294                                  GetCurrentThreadId(), lpOverlapped,\r
295                                  g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count,\r
296                                  g_ibsp.overlap_h1_comp_count, g_ibsp.send_count, g_ibsp.recv_count));\r
297 #endif\r
298 \r
299                 IBSP_TRACE1( IBSP_DBG_IO,\r
300                         ("Not calling lpWPUCompleteOverlappedRequest: "\r
301                         "socket=%p, ov=%p OffsetHigh=%d, InternalHigh=%d hEvent=%p\n",\r
302                         socket_info, lpOverlapped, lpOverlapped->OffsetHigh,\r
303                         lpOverlapped->InternalHigh, lpOverlapped->hEvent) );\r
304 \r
305                 lpOverlapped->Internal = 0;\r
306                 p_io_info->p_ov = NULL;\r
307         }\r
308         else\r
309         {\r
310 #ifdef _DEBUG_\r
311                 cl_atomic_dec( &g_ibsp.overlap_h0_count );\r
312 \r
313                 fzprint(("%s():%d:0x%x:0x%x: ov=0x%p h0=%d h1=%d h1_c=%d send=%d recv=%d\n",\r
314                                  __FUNCTION__, __LINE__, GetCurrentProcessId(),\r
315                                  GetCurrentThreadId(), lpOverlapped,\r
316                                  g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count,\r
317                                  g_ibsp.overlap_h1_comp_count, g_ibsp.send_count, g_ibsp.recv_count));\r
318 #endif\r
319 \r
320                 p_io_info->p_ov = lpOverlapped;\r
321                 cl_atomic_inc( &socket_info->ref_cnt );\r
322         }\r
323 \r
324         if( wc->wc_type == IB_WC_RECV )\r
325         {\r
326                 cl_atomic_dec( &socket_info->recv_cnt );\r
327         }\r
328         else\r
329         {\r
330                 cl_atomic_dec( &socket_info->send_cnt );\r
331         }\r
332 \r
333         IBSP_EXIT( IBSP_DBG_IO );\r
334 }\r
335 \r
336 \r
337 /* CQ completion handler. */\r
338 int\r
339 ib_cq_comp(\r
340                                         void                                            *cq_context )\r
341 {\r
342         struct cq_thread_info   *cq_tinfo = cq_context;\r
343         ib_api_status_t                 status;\r
344         ib_wc_t                                 wclist[WC_LIST_SIZE];\r
345         ib_wc_t                                 *free_wclist;\r
346         ib_wc_t                                 *done_wclist;\r
347         io_comp_info_t                  info[WC_LIST_SIZE];\r
348         int                                             cb_idx;\r
349         int                                             i;\r
350         int                                             n_comp = 0;\r
351 #ifdef _DEBUG_\r
352         int                                             comp_count;\r
353 #endif\r
354 \r
355         IBSP_ENTER( IBSP_DBG_WQ );\r
356 \r
357         CL_ASSERT( WC_LIST_SIZE >= 1 );\r
358 \r
359         do\r
360         {\r
361                 /* Try to retrieve up to WC_LIST_SIZE completions at a time. */\r
362                 for( i = 0; i < (WC_LIST_SIZE - 1); i++ )\r
363                 {\r
364                         wclist[i].p_next = &wclist[i + 1];\r
365                 }\r
366                 wclist[(WC_LIST_SIZE - 1)].p_next = NULL;\r
367 \r
368                 free_wclist = &wclist[0];\r
369                 done_wclist = NULL;\r
370 \r
371                 status = ib_poll_cq( cq_tinfo->cq, &free_wclist, &done_wclist );\r
372 \r
373                 IBSP_TRACE( IBSP_DBG_WQ,\r
374                         ("%s():%d:0x%x:0x%x: poll CQ got status %d, free=%p, done=%p\n",\r
375                         __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(),\r
376                         status, free_wclist, done_wclist) );\r
377 \r
378                 switch( status )\r
379                 {\r
380                 case IB_NOT_FOUND:\r
381                 case IB_SUCCESS:\r
382                         break;\r
383 \r
384                 case IB_INVALID_CQ_HANDLE:\r
385                         /* This happens when the switch closes the socket while the \r
386                          * execution thread was calling lpWPUCompleteOverlappedRequest. */\r
387                         IBSP_ERROR(\r
388                                 ("ib_poll_cq returned IB_INVLALID_CQ_HANDLE\n") );\r
389                         goto done;\r
390 \r
391                 default:\r
392                         IBSP_ERROR(\r
393                                 ("ib_poll_cq failed returned %s\n", ib_get_err_str( status )) );\r
394                         break;\r
395                 }\r
396 \r
397 #ifdef _DEBUG_\r
398                 comp_count = 0;\r
399 #endif\r
400 \r
401                 /* We have some completions. */\r
402                 cb_idx = 0;\r
403                 while( done_wclist )\r
404                 {\r
405 #ifdef _DEBUG_\r
406                         comp_count++;\r
407 #endif\r
408                         complete_wq( done_wclist, &info[cb_idx++] );\r
409 \r
410                         done_wclist = done_wclist->p_next;\r
411                 }\r
412 \r
413                 for( i = 0; i < cb_idx; i++ )\r
414                 {\r
415                         int error;\r
416                         int ret;\r
417 \r
418                         if( info[i].p_ov )\r
419                         {\r
420                                 IBSP_TRACE1( IBSP_DBG_IO,\r
421                                         ("Calling lpWPUCompleteOverlappedRequest: "\r
422                                         "socket=%p, ov=%p OffsetHigh=%d "\r
423                                         "InternalHigh=%d hEvent=%p\n",\r
424                                         info[i].p_socket, info[i].p_ov, info[i].p_ov->OffsetHigh,\r
425                                         info[i].p_ov->InternalHigh, info[i].p_ov->hEvent) );\r
426 \r
427                                 ret = g_ibsp.up_call_table.lpWPUCompleteOverlappedRequest(\r
428                                         info[i].p_socket->switch_socket, info[i].p_ov,\r
429                                         info[i].p_ov->OffsetHigh,\r
430                                         (DWORD)info[i].p_ov->InternalHigh, &error );\r
431                                 if( ret != 0 )\r
432                                 {\r
433                                         IBSP_ERROR( ("WPUCompleteOverlappedRequest for ov=%p "\r
434                                                 "returned %d err %d\n", info[i].p_ov, ret, error) );\r
435                                 }\r
436                                 deref_socket_info( info[i].p_socket );\r
437                         }\r
438                 }\r
439 \r
440                 n_comp += i;\r
441 \r
442 #ifdef _DEBUG_\r
443                 if( comp_count > g_ibsp.max_comp_count )\r
444                 {\r
445                         g_ibsp.max_comp_count = comp_count;\r
446                 }\r
447 #endif\r
448         } while( !free_wclist );\r
449 \r
450 done:\r
451 \r
452 #ifdef _DEBUG_\r
453         fzprint(("%s():%d:0x%x:0x%x: overlap_h0_count=%d overlap_h1_count=%d\n",\r
454                          __FUNCTION__,\r
455                          __LINE__, GetCurrentProcessId(),\r
456                          GetCurrentThreadId(), g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count));\r
457 #endif\r
458 \r
459         IBSP_EXIT( IBSP_DBG_WQ );\r
460         return n_comp;\r
461 }\r
462 \r
463 \r
464 /* IB completion thread */\r
465 static DWORD WINAPI\r
466 ib_cq_thread(\r
467                                         LPVOID                                          lpParameter )\r
468 {\r
469         struct cq_thread_info   *cq_tinfo = (struct cq_thread_info *)lpParameter;\r
470         cl_status_t                             cl_status;\r
471         ib_api_status_t                 status;\r
472         int                                             i;\r
473 \r
474         IBSP_ENTER( IBSP_DBG_HW );\r
475 \r
476         fzprint(("%s():%d:0x%x:0x%x: cq_tinfo=0x%p\n", __FUNCTION__,\r
477                          __LINE__, GetCurrentProcessId(), GetCurrentThreadId(), cq_tinfo));\r
478 \r
479         do\r
480         {\r
481                 cl_status = cl_waitobj_wait_on( cq_tinfo->cq_waitobj, EVENT_NO_TIMEOUT, TRUE );\r
482                 if( cl_status != CL_SUCCESS )\r
483                 {\r
484                         IBSP_ERROR(\r
485                                 ("cl_waitobj_wait_on() (%d)\n", cl_status) );\r
486                 }\r
487 \r
488                 /* \r
489                  * TODO: By rearranging thread creation and cq creation, this check\r
490                  * may be eliminated.\r
491                  */\r
492                 if( cq_tinfo->cq != NULL )\r
493                 {\r
494                         fzprint(("%s():%d:0x%x:0x%x: Calling ib_cq_comp().\n", __FUNCTION__,\r
495                                          __LINE__, GetCurrentProcessId(), GetCurrentThreadId()));\r
496 \r
497 #ifdef PERFMON_ENABLED\r
498                         InterlockedIncrement64( &g_pm_stat.pdata[INTR_TOTAL] );\r
499 #endif\r
500                         i = g_max_poll;\r
501                         do\r
502                         {\r
503                                 if( ib_cq_comp( cq_tinfo ) )\r
504                                         i = g_max_poll;\r
505 \r
506                         } while( i-- );\r
507 \r
508                         fzprint(("%s():%d:0x%x:0x%x: Done calling ib_cq_comp().\n", __FUNCTION__,\r
509                                          __LINE__, GetCurrentProcessId(), GetCurrentThreadId()));\r
510 \r
511                         status = ib_rearm_cq( cq_tinfo->cq, FALSE );\r
512                         if( status != IB_SUCCESS )\r
513                         {\r
514                                 IBSP_ERROR(\r
515                                         ("ib_rearm_cq returned %s)\n", ib_get_err_str( status )) );\r
516                         }\r
517                 }\r
518 \r
519         } while( !cq_tinfo->ib_cq_thread_exit_wanted );\r
520 \r
521         cl_status = cl_waitobj_destroy( cq_tinfo->cq_waitobj );\r
522         if( cl_status != CL_SUCCESS )\r
523         {\r
524                 IBSP_ERROR(\r
525                         ("cl_waitobj_destroy() returned %s\n", CL_STATUS_MSG(cl_status)) );\r
526         }\r
527         HeapFree( g_ibsp.heap, 0, cq_tinfo );\r
528 \r
529         /* No special exit code, even on errors. */\r
530         IBSP_EXIT( IBSP_DBG_HW );\r
531         ExitThread( 0 );\r
532 }\r
533 \r
534 \r
535 /* Called with the HCA's CQ lock held. */\r
536 static struct cq_thread_info *\r
537 ib_alloc_cq_tinfo(\r
538                                         struct ibsp_hca                         *hca )\r
539 {\r
540         struct cq_thread_info *cq_tinfo = NULL;\r
541         ib_cq_create_t cq_create;\r
542         ib_api_status_t status;\r
543         cl_status_t cl_status;\r
544 \r
545         IBSP_ENTER( IBSP_DBG_HW );\r
546 \r
547         cq_tinfo = HeapAlloc(\r
548                 g_ibsp.heap, HEAP_ZERO_MEMORY, sizeof(struct cq_thread_info) );\r
549 \r
550         if( !cq_tinfo )\r
551         {\r
552                 IBSP_ERROR_EXIT( ("HeapAlloc() Failed.\n") );\r
553                 return NULL;\r
554         }\r
555 \r
556         cl_status = cl_waitobj_create( FALSE, &cq_tinfo->cq_waitobj );\r
557         if( cl_status != CL_SUCCESS )\r
558         {\r
559                 cq_tinfo->cq_waitobj = NULL;\r
560                 ib_destroy_cq_tinfo( cq_tinfo );\r
561                 IBSP_ERROR_EXIT(\r
562                         ("cl_waitobj_create() returned %s\n", CL_STATUS_MSG(cl_status)) );\r
563                 return NULL;\r
564         }\r
565 \r
566         cq_tinfo->hca = hca;\r
567         cq_tinfo->ib_cq_thread_exit_wanted = FALSE;\r
568 \r
569         cq_tinfo->ib_cq_thread = CreateThread( NULL, 0, ib_cq_thread, cq_tinfo, 0,\r
570                 (LPDWORD)&cq_tinfo->ib_cq_thread_id );\r
571 \r
572         if( cq_tinfo->ib_cq_thread == NULL )\r
573         {\r
574                 ib_destroy_cq_tinfo( cq_tinfo );\r
575                 IBSP_ERROR_EXIT( ("CreateThread failed (%d)", GetLastError()) );\r
576                 return NULL;\r
577         }\r
578 \r
579         STAT_INC( thread_num );\r
580 \r
581         /* Completion queue */\r
582         cq_create.size = IB_CQ_SIZE;\r
583 \r
584         cq_create.pfn_comp_cb = NULL;\r
585         cq_create.h_wait_obj = cq_tinfo->cq_waitobj;\r
586 \r
587         status = ib_create_cq( hca->hca_handle, &cq_create, cq_tinfo,\r
588                 NULL, &cq_tinfo->cq );\r
589         if( status )\r
590         {\r
591                 ib_destroy_cq_tinfo( cq_tinfo );\r
592                 IBSP_ERROR_EXIT(\r
593                         ("ib_create_cq returned %s\n", ib_get_err_str( status )) );\r
594                 return NULL;\r
595         }\r
596 \r
597         STAT_INC( cq_num );\r
598 \r
599         status = ib_rearm_cq( cq_tinfo->cq, FALSE );\r
600         if( status )\r
601         {\r
602                 ib_destroy_cq_tinfo( cq_tinfo );\r
603                 IBSP_ERROR_EXIT(\r
604                         ("ib_rearm_cq returned %s\n", ib_get_err_str( status )) );\r
605                 return NULL;\r
606         }\r
607 \r
608         cq_tinfo->cqe_size = IB_CQ_SIZE;\r
609 \r
610         if( hca->cq_tinfo )\r
611         {\r
612                 __cl_primitive_insert(\r
613                         &hca->cq_tinfo->list_item, &cq_tinfo->list_item );\r
614         }\r
615         else\r
616         {\r
617                 /* Setup the list entry to point to itself. */\r
618                 cq_tinfo->list_item.p_next = &cq_tinfo->list_item;\r
619                 cq_tinfo->list_item.p_prev = &cq_tinfo->list_item;\r
620         }\r
621 \r
622         /* Upon allocation, the new CQ becomes the primary. */\r
623         hca->cq_tinfo = cq_tinfo;\r
624 \r
625         IBSP_EXIT( IBSP_DBG_HW );\r
626         return (cq_tinfo);\r
627 }\r
628 \r
629 \r
630 void\r
631 ib_destroy_cq_tinfo(\r
632                                         struct cq_thread_info           *cq_tinfo )\r
633 {\r
634         ib_wc_t wclist;\r
635         ib_wc_t *free_wclist;\r
636         ib_wc_t *done_wclist;\r
637         ib_api_status_t status;\r
638         HANDLE h_cq_thread;\r
639 \r
640         IBSP_ENTER( IBSP_DBG_HW );\r
641 \r
642         CL_ASSERT( cq_tinfo );\r
643         CL_ASSERT( cq_tinfo->qp_count == 0 );\r
644 \r
645         if( cq_tinfo->cq )\r
646         {\r
647                 wclist.p_next = NULL;\r
648                 free_wclist = &wclist;\r
649 \r
650                 while( ib_poll_cq(\r
651                         cq_tinfo->cq, &free_wclist, &done_wclist ) == IB_SUCCESS )\r
652                 {\r
653                         IBSP_TRACE1( IBSP_DBG_WQ,\r
654                                 ("free=%p, done=%p\n", free_wclist, done_wclist) );\r
655                 }\r
656 \r
657                 IBSP_TRACE4( IBSP_DBG_WQ, ("ib_destroy_cq() start..\n") );\r
658 \r
659                 /*\r
660                  * Called from cleanup thread, okay to block.\r
661                  */\r
662                 status = ib_destroy_cq( cq_tinfo->cq, ib_sync_destroy );\r
663                 if( status )\r
664                 {\r
665                         IBSP_ERROR(\r
666                                 ("ib_destroy_cq returned %s\n", ib_get_err_str( status )) );\r
667                 }\r
668                 else\r
669                 {\r
670                         IBSP_TRACE4( IBSP_DBG_WQ, ("ib_destroy_cq() finished.\n") );\r
671 \r
672                         cq_tinfo->cq = NULL;\r
673 \r
674                         STAT_DEC( cq_num );\r
675                 }\r
676         }\r
677 \r
678         if( cq_tinfo->ib_cq_thread )\r
679         {\r
680                 /* ib_cq_thread() will release the cq_tinfo before exit. Don't\r
681                    reference cq_tinfo after signaling  */\r
682                 h_cq_thread = cq_tinfo->ib_cq_thread;\r
683                 cq_tinfo->ib_cq_thread = NULL;\r
684 \r
685                 cq_tinfo->ib_cq_thread_exit_wanted = TRUE;\r
686                 cl_waitobj_signal( cq_tinfo->cq_waitobj );\r
687 \r
688                 /* Wait for ib_cq_thread to die, if we are not running on it */\r
689                 if( GetCurrentThreadId() != cq_tinfo->ib_cq_thread_id )\r
690                 {\r
691                         fzprint(("%s():%d:0x%x:0x%x: Waiting for ib_cq_thread=0x%x to die\n",\r
692                                          __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(),\r
693                                          cq_tinfo->ib_cq_thread_id ));\r
694                         if( WaitForSingleObject( h_cq_thread, INFINITE ) != WAIT_OBJECT_0 )\r
695                         {\r
696                                 IBSP_ERROR( ("WaitForSingleObject failed\n") );\r
697                         }\r
698                         else\r
699                         {\r
700                                 STAT_DEC( thread_num );\r
701                         }\r
702                 }\r
703                 else\r
704                 {\r
705                         fzprint(("%s():%d:0x%x:0x%x: Currently on ib_cq_thread.\n", __FUNCTION__,\r
706                                          __LINE__, GetCurrentProcessId(), GetCurrentThreadId()));\r
707                         STAT_DEC( thread_num );\r
708                 }\r
709                 CloseHandle( h_cq_thread );\r
710         }\r
711         else\r
712         {\r
713                 /* There was no thread created, destroy cq_waitobj and\r
714                    free memory */\r
715                 if( cq_tinfo->cq_waitobj )\r
716                 {\r
717                         cl_waitobj_destroy( cq_tinfo->cq_waitobj );\r
718                         cq_tinfo->cq_waitobj = NULL;\r
719                 }\r
720                 HeapFree( g_ibsp.heap, 0, cq_tinfo );\r
721         }\r
722 \r
723         IBSP_EXIT( IBSP_DBG_HW );\r
724 }\r
725 \r
726 \r
727 static struct cq_thread_info *\r
728 ib_acquire_cq_tinfo(\r
729                                         struct ibsp_hca                         *hca )\r
730 {\r
731         struct cq_thread_info   *cq_tinfo = NULL;\r
732         uint32_t                                cqe_size;\r
733         ib_api_status_t                 status;\r
734 \r
735         IBSP_ENTER( IBSP_DBG_HW );\r
736 \r
737         cl_spinlock_acquire( &hca->cq_lock );\r
738 \r
739         if( !hca->cq_tinfo )\r
740         {\r
741                 cq_tinfo = ib_alloc_cq_tinfo( hca );\r
742                 if( !cq_tinfo )\r
743                 {\r
744                         IBSP_ERROR_EXIT( ("ib_alloc_cq_tinfo() failed\n") );\r
745                         cl_spinlock_release( &hca->cq_lock );\r
746                         return (NULL);\r
747                 }\r
748         }\r
749         else\r
750         {\r
751                 cq_tinfo = hca->cq_tinfo;\r
752         }\r
753 \r
754         CL_ASSERT( cq_tinfo != NULL );\r
755 \r
756         cqe_size = (cq_tinfo->qp_count + 1) * IB_CQ_SIZE;\r
757 \r
758         if( cq_tinfo->cqe_size < cqe_size )\r
759         {\r
760                 status = ib_modify_cq( cq_tinfo->cq, &cqe_size );\r
761                 switch( status )\r
762                 {\r
763                 case IB_INVALID_CQ_SIZE:\r
764                 case IB_UNSUPPORTED:\r
765                         cq_tinfo = ib_alloc_cq_tinfo( hca );\r
766                         if( !cq_tinfo )\r
767                                 break;\r
768 \r
769                         cq_tinfo->qp_count++;\r
770                         break;\r
771 \r
772                 case IB_SUCCESS:\r
773                         cq_tinfo->cqe_size = cqe_size;\r
774 \r
775                         cq_tinfo->qp_count++;\r
776 \r
777                         fzprint(("%s():%d:0x%x:0x%x: New cq size=%d.\n",\r
778                                          __FUNCTION__,\r
779                                          __LINE__, GetCurrentProcessId(),\r
780                                          GetCurrentThreadId(), cq_tinfo->cqe_size));\r
781                         break;\r
782 \r
783                 default:\r
784                         IBSP_ERROR_EXIT(\r
785                                 ("ib_modify_cq() returned %s\n", ib_get_err_str(status)) );\r
786                         cq_tinfo = NULL;\r
787                 }\r
788         }\r
789         else\r
790         {\r
791                 cq_tinfo->qp_count++;\r
792         }\r
793 \r
794         cl_spinlock_release( &hca->cq_lock );\r
795         IBSP_EXIT( IBSP_DBG_HW );\r
796         return (cq_tinfo);\r
797 }\r
798 \r
799 void\r
800 ib_release_cq_tinfo(\r
801                                         struct cq_thread_info           *cq_tinfo )\r
802 {\r
803         IBSP_ENTER( IBSP_DBG_HW );\r
804 \r
805         CL_ASSERT( cq_tinfo );\r
806         CL_ASSERT( cq_tinfo->hca );\r
807 \r
808         cl_spinlock_acquire( &cq_tinfo->hca->cq_lock );\r
809         /* If this CQ now has fewer QPs than the primary, make it the primary. */\r
810         if( --cq_tinfo->qp_count < cq_tinfo->hca->cq_tinfo->qp_count )\r
811                 cq_tinfo->hca->cq_tinfo = cq_tinfo;\r
812         cl_spinlock_release( &cq_tinfo->hca->cq_lock );\r
813 \r
814         IBSP_EXIT( IBSP_DBG_HW );\r
815 }\r
816 \r
817 \r
818 /* Release IB ressources. */\r
819 void\r
820 ib_release(void)\r
821 {\r
822         cl_fmap_item_t                  *p_item;\r
823 \r
824         IBSP_ENTER( IBSP_DBG_HW );\r
825 \r
826         if( g_ibsp.al_handle )\r
827         {\r
828                 cl_list_item_t *item;\r
829                 ib_api_status_t status;\r
830 \r
831                 unregister_pnp();\r
832 \r
833                 while( (item = cl_qlist_head( &g_ibsp.hca_list )) != cl_qlist_end( &g_ibsp.hca_list ) )\r
834                 {\r
835                         struct ibsp_hca *hca = PARENT_STRUCT(item, struct ibsp_hca, item);\r
836 \r
837                         pnp_ca_remove( hca );\r
838                 }\r
839 \r
840                 fzprint(("%s():%d:0x%x:0x%x: Calling ib_close_al...\n", __FUNCTION__,\r
841                                  __LINE__, GetCurrentProcessId(), GetCurrentThreadId()));\r
842 \r
843                 status = ib_close_al( g_ibsp.al_handle );\r
844 \r
845                 fzprint(("%s():%d:0x%x:0x%x: Done calling ib_close_al, status=%d.\n",\r
846                                  __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(),\r
847                                  status));\r
848                 if( status != IB_SUCCESS )\r
849                 {\r
850                         IBSP_ERROR(\r
851                                 ("ib_close_al returned %s\n", ib_get_err_str( status )) );\r
852                 }\r
853                 else\r
854                 {\r
855                         IBSP_TRACE( IBSP_DBG_HW, ("ib_close_al success\n") );\r
856                         STAT_DEC( al_num );\r
857                 }\r
858                 g_ibsp.al_handle = NULL;\r
859         }\r
860 \r
861         for( p_item = cl_fmap_head( &g_ibsp.ip_map );\r
862                 p_item != cl_fmap_end( &g_ibsp.ip_map );\r
863                 p_item = cl_fmap_head( &g_ibsp.ip_map ) )\r
864         {\r
865                 cl_fmap_remove_item( &g_ibsp.ip_map, p_item );\r
866 \r
867                 HeapFree( g_ibsp.heap, 0,\r
868                         PARENT_STRUCT(p_item, struct ibsp_ip_addr, item) );\r
869         }\r
870 \r
871         IBSP_EXIT( IBSP_DBG_HW );\r
872 }\r
873 \r
874 \r
875 /* Initialize IB ressources. */\r
876 int\r
877 ibsp_initialize(void)\r
878 {\r
879         ib_api_status_t status;\r
880         int ret;\r
881 \r
882         IBSP_ENTER( IBSP_DBG_HW );\r
883 \r
884         CL_ASSERT( g_ibsp.al_handle == NULL );\r
885         CL_ASSERT( cl_qlist_count( &g_ibsp.hca_list ) == 0 );\r
886 \r
887         /* Open the IB library */\r
888         status = ib_open_al( &g_ibsp.al_handle );\r
889 \r
890         IBSP_TRACE( IBSP_DBG_HW, ("open is %d %p\n", status, g_ibsp.al_handle) );\r
891 \r
892         if( status != IB_SUCCESS )\r
893         {\r
894                 IBSP_ERROR( ("ib_open_al failed (%d)\n", status) );\r
895                 ret = WSAEPROVIDERFAILEDINIT;\r
896                 goto done;\r
897         }\r
898 \r
899         STAT_INC( al_num );\r
900 \r
901         /* Register for PNP events */\r
902         status = register_pnp();\r
903         if( status )\r
904         {\r
905                 IBSP_ERROR( ("register_pnp failed (%d)\n", status) );\r
906                 ret = WSAEPROVIDERFAILEDINIT;\r
907                 goto done;\r
908         }\r
909 \r
910         STAT_INC( thread_num );\r
911 \r
912         ret = 0;\r
913 done:\r
914         if( ret )\r
915         {\r
916                 /* Free up resources. */\r
917                 ib_release();\r
918         }\r
919 \r
920         IBSP_EXIT( IBSP_DBG_HW );\r
921 \r
922         return ret;\r
923 }\r
924 \r
925 \r
926 /* Destroys the infiniband ressources of a socket. */\r
927 void\r
928 ib_destroy_socket(\r
929         IN      OUT                     struct ibsp_socket_info         *socket_info )\r
930 {\r
931         ib_api_status_t status;\r
932 \r
933         IBSP_ENTER( IBSP_DBG_EP );\r
934 \r
935         if( socket_info->qp )\r
936         {\r
937                 cl_atomic_inc( &socket_info->ref_cnt );\r
938                 status = ib_destroy_qp( socket_info->qp, deref_socket_info );\r
939                 if( status != IB_SUCCESS )\r
940                 {\r
941                         IBSP_ERROR( ("ib_destroy_qp returned %s\n",\r
942                                 ib_get_err_str( status )) );\r
943                         deref_socket_info( socket_info );\r
944                 }\r
945 \r
946                 ib_release_cq_tinfo( socket_info->cq_tinfo );\r
947 \r
948                 socket_info->qp = NULL;\r
949         }\r
950 \r
951         IBSP_EXIT( IBSP_DBG_EP );\r
952 }\r
953 \r
954 \r
955 /*\r
956  * Creates the necessary IB ressources for a socket\r
957  */\r
958 int\r
959 ib_create_socket(\r
960         IN      OUT                     struct ibsp_socket_info         *socket_info)\r
961 {\r
962         ib_qp_create_t                  qp_create;\r
963         ib_api_status_t                 status;\r
964         ib_qp_attr_t                    qp_attr;\r
965 \r
966         IBSP_ENTER( IBSP_DBG_EP );\r
967 \r
968         CL_ASSERT( socket_info != NULL );\r
969         CL_ASSERT( socket_info->port != NULL );\r
970         CL_ASSERT( socket_info->qp == NULL );\r
971 \r
972         socket_info->hca_pd = socket_info->port->hca->pd;\r
973 \r
974         /* Get the completion queue and thread info for this socket */\r
975         socket_info->cq_tinfo = ib_acquire_cq_tinfo( socket_info->port->hca );\r
976         if( !socket_info->cq_tinfo )\r
977         {\r
978                 IBSP_ERROR_EXIT( ("ib_acquire_cq_tinfo failed\n") );\r
979                 return WSAENOBUFS;\r
980         }\r
981 \r
982         /* Queue pair */\r
983         qp_create.qp_type = IB_QPT_RELIABLE_CONN;\r
984         qp_create.sq_depth = QP_ATTRIB_SQ_DEPTH;\r
985         qp_create.rq_depth = QP_ATTRIB_RQ_DEPTH;\r
986         qp_create.sq_sge = QP_ATTRIB_SQ_SGE;\r
987         qp_create.rq_sge = 1;\r
988         qp_create.h_rq_cq = socket_info->cq_tinfo->cq;\r
989         qp_create.h_sq_cq = socket_info->cq_tinfo->cq;\r
990         qp_create.sq_signaled = TRUE;\r
991 \r
992         status = ib_create_qp( socket_info->hca_pd, &qp_create, socket_info,    /* context */\r
993                 NULL,   /* async handler */\r
994                 &socket_info->qp );\r
995         if( status )\r
996         {\r
997                 ib_release_cq_tinfo( socket_info->cq_tinfo );\r
998                 IBSP_ERROR_EXIT(\r
999                         ("ib_create_qp returned %s\n", ib_get_err_str( status )) );\r
1000                 return WSAENOBUFS;\r
1001         }\r
1002 \r
1003         status = ib_query_qp( socket_info->qp, &qp_attr );\r
1004         if( status == IB_SUCCESS )\r
1005         {\r
1006                 socket_info->max_inline = min( g_max_inline, qp_attr.sq_max_inline );\r
1007         }\r
1008         else\r
1009         {\r
1010                 IBSP_ERROR( ("ib_query_qp returned %s\n", ib_get_err_str( status )) );\r
1011                 socket_info->max_inline = 0;\r
1012         }\r
1013 \r
1014         STAT_INC( qp_num );\r
1015 \r
1016         IBSP_EXIT( IBSP_DBG_EP );\r
1017         return 0;\r
1018 }\r
1019 \r
1020 \r
1021 void\r
1022 wait_cq_drain(\r
1023         IN      OUT                     struct ibsp_socket_info         *socket_info )\r
1024 {\r
1025         IBSP_ENTER( IBSP_DBG_EP );\r
1026 \r
1027         if( socket_info->cq_tinfo == NULL )\r
1028         {\r
1029                 IBSP_EXIT( IBSP_DBG_EP );\r
1030                 return;\r
1031         }\r
1032 \r
1033         /* Wait for the QP to be drained. */\r
1034         while( socket_info->send_cnt || socket_info->recv_cnt )\r
1035         {\r
1036                 fzprint(("%s():%d:0x%x:0x%x: socket=0x%p wr_list_count=%d qp state=%d\n",\r
1037                                  __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(),\r
1038                                  socket_info, cl_qlist_count(&socket_info->wr_list)));\r
1039 \r
1040                 Sleep(100);\r
1041         }\r
1042 \r
1043         IBSP_EXIT( IBSP_DBG_EP );\r
1044 }\r
1045 \r
1046 \r
1047 void\r
1048 ibsp_dup_overlap_abort(\r
1049         IN      OUT                     struct ibsp_socket_info         *socket_info )\r
1050 {\r
1051         LPWSAOVERLAPPED lpOverlapped = NULL;\r
1052         int error;\r
1053         int ret;\r
1054         uint8_t                         idx;\r
1055 \r
1056         IBSP_ENTER( IBSP_DBG_EP );\r
1057         CL_ASSERT( !socket_info->send_cnt && !socket_info->recv_cnt );\r
1058 \r
1059         /* Browse the list of all posted overlapped structures\r
1060          * to mark them as aborted. */\r
1061         idx = socket_info->dup_idx - (uint8_t)socket_info->dup_cnt;\r
1062         if( idx >= QP_ATTRIB_RQ_DEPTH )\r
1063                 idx += QP_ATTRIB_RQ_DEPTH;\r
1064 \r
1065         while( socket_info->dup_cnt )\r
1066         {\r
1067                 lpOverlapped = socket_info->dup_wr[idx].wr.lpOverlapped;\r
1068 \r
1069                 fzprint(("%s():%d:0x%x:0x%x: socket=0x%p wr=0x%p overlapped=0x%p Internal=%d InternalHigh=%d hEvent=%d\n",\r
1070                         __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(), socket_info, &socket_info->dup_wr[idx], lpOverlapped, lpOverlapped->Internal, lpOverlapped->InternalHigh, lpOverlapped->hEvent));\r
1071 \r
1072                 lpOverlapped->OffsetHigh = WSAECONNABORTED;\r
1073                 lpOverlapped->InternalHigh = 0;\r
1074 \r
1075                 if( ((uintptr_t) lpOverlapped->hEvent) & 0x00000001 )\r
1076                 {\r
1077                         /* Indicate this operation is complete. The switch will poll\r
1078                          * with calls to WSPGetOverlappedResult(). */\r
1079 #ifdef _DEBUG_\r
1080                         cl_atomic_dec(&g_ibsp.overlap_h1_comp_count);\r
1081 \r
1082                         fzprint(("%s():%d:0x%x:0x%x: ov=0x%p h0=%d h1=%d h1_c=%d send=%d recv=%d\n",\r
1083                                          __FUNCTION__, __LINE__, GetCurrentProcessId(),\r
1084                                          GetCurrentThreadId(), lpOverlapped,\r
1085                                          g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count,\r
1086                                          g_ibsp.overlap_h1_comp_count, g_ibsp.send_count, g_ibsp.recv_count));\r
1087 #endif\r
1088 \r
1089                         CL_TRACE(IBSP_DBG_WQ, gdbg_lvl,\r
1090                                          ("%s: set internal overlapped=0x%p Internal=%d OffsetHigh=%d\n",\r
1091                                           __FUNCTION__, lpOverlapped, lpOverlapped->Internal,\r
1092                                           lpOverlapped->OffsetHigh));\r
1093 \r
1094                         lpOverlapped->Internal = 0;\r
1095                 }\r
1096                 else\r
1097                 {\r
1098 #ifdef _DEBUG_\r
1099                         cl_atomic_dec(&g_ibsp.overlap_h0_count);\r
1100 \r
1101 \r
1102                         fzprint(("%s():%d:0x%x:0x%x: ov=0x%p h0=%d h1=%d h1_c=%d send=%d recv=%d\n",\r
1103                                          __FUNCTION__, __LINE__, GetCurrentProcessId(),\r
1104                                          GetCurrentThreadId(), lpOverlapped,\r
1105                                          g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count,\r
1106                                          g_ibsp.overlap_h1_comp_count, g_ibsp.send_count, g_ibsp.recv_count));\r
1107 #endif\r
1108                         CL_TRACE(IBSP_DBG_WQ, gdbg_lvl,\r
1109                                          ("%s: calls lpWPUCompleteOverlappedRequest, overlapped=0x%p OffsetHigh=%d InternalHigh=%d hEvent=%d\n",\r
1110                                           __FUNCTION__, lpOverlapped, lpOverlapped->OffsetHigh,\r
1111                                           lpOverlapped->InternalHigh, lpOverlapped->hEvent));\r
1112 \r
1113                         ret = g_ibsp.up_call_table.lpWPUCompleteOverlappedRequest\r
1114                                 (socket_info->switch_socket,\r
1115                                  lpOverlapped,\r
1116                                  lpOverlapped->OffsetHigh, (DWORD) lpOverlapped->InternalHigh, &error);\r
1117 \r
1118                         if( ret != 0 )\r
1119                         {\r
1120                                 CL_ERROR(IBSP_DBG_EP, gdbg_lvl,\r
1121                                                  ("lpWPUCompleteOverlappedRequest failed with %d/%d\n", ret,\r
1122                                                   error));\r
1123                         }\r
1124                 }\r
1125                 cl_atomic_dec( &socket_info->dup_cnt );\r
1126         }\r
1127 \r
1128         IBSP_EXIT( IBSP_DBG_EP );\r
1129 }\r
1130 \r
1131 \r
1132 /* Closes a connection and release its ressources. */\r
1133 void\r
1134 shutdown_and_destroy_socket_info(\r
1135         IN      OUT                     struct ibsp_socket_info         *socket_info )\r
1136 {\r
1137         enum ibsp_socket_state  old_state;\r
1138 \r
1139         IBSP_ENTER( IBSP_DBG_EP );\r
1140 \r
1141         cl_spinlock_acquire( &socket_info->mutex );\r
1142         old_state = socket_info->socket_state;\r
1143         IBSP_CHANGE_SOCKET_STATE( socket_info, IBSP_CLOSED );\r
1144         cl_spinlock_release( &socket_info->mutex );\r
1145 \r
1146         if( socket_info->listen.handle )\r
1147         {\r
1148                 /* Stop listening and reject queued connections. */\r
1149                 ib_listen_cancel( socket_info );\r
1150         }\r
1151 \r
1152         cl_spinlock_acquire( &g_ibsp.socket_info_mutex );\r
1153         cl_qlist_remove_item( &g_ibsp.socket_info_list, &socket_info->item );\r
1154 \r
1155         switch( old_state )\r
1156         {\r
1157         case IBSP_CREATE:\r
1158         case IBSP_LISTEN:\r
1159                 /* Nothing to do. */\r
1160                 break;\r
1161 \r
1162         case IBSP_CONNECTED:\r
1163                 {\r
1164                         struct disconnect_reason reason;\r
1165 \r
1166                         memset( &reason, 0, sizeof(reason) );\r
1167                         reason.type = DISC_SHUTDOWN;\r
1168                         ib_disconnect( socket_info, &reason );\r
1169                 }\r
1170                 /* Fall through. */\r
1171 \r
1172         case IBSP_CONNECT:\r
1173         case IBSP_DISCONNECTED:\r
1174                 /* We changed the state - remove from connection map. */\r
1175                 CL_ASSERT( socket_info->conn_item.p_map );\r
1176                 cl_rbmap_remove_item( &g_ibsp.conn_map, &socket_info->conn_item );\r
1177                 break;\r
1178         }\r
1179         cl_spinlock_release( &g_ibsp.socket_info_mutex );\r
1180 \r
1181         /* Flush all completions. */\r
1182         if( socket_info->dup_cnt )\r
1183                 ibsp_dup_overlap_abort( socket_info );\r
1184 \r
1185         while( socket_info->send_cnt || socket_info->recv_cnt )\r
1186                 ib_cq_comp( socket_info->cq_tinfo );\r
1187 \r
1188         ibsp_dereg_socket( socket_info );\r
1189 \r
1190         ib_destroy_socket( socket_info );\r
1191 \r
1192 #ifdef IBSP_LOGGING\r
1193         DataLogger_Shutdown(&socket_info->SendDataLogger);\r
1194         DataLogger_Shutdown(&socket_info->RecvDataLogger);\r
1195 #endif\r
1196 \r
1197         /* Release the initial reference and clean up. */\r
1198         deref_socket_info( socket_info );\r
1199 \r
1200         IBSP_EXIT( IBSP_DBG_EP );\r
1201 }\r
1202 \r
1203 \r
1204 boolean_t\r
1205 ibsp_conn_insert(\r
1206         IN                              struct ibsp_socket_info         *s )\r
1207 {\r
1208         struct ibsp_socket_info         *p_sock;\r
1209         cl_rbmap_item_t                         *p_item, *p_insert_at;\r
1210         boolean_t                                       left = TRUE;\r
1211 \r
1212         p_item = cl_rbmap_root( &g_ibsp.conn_map );\r
1213         p_insert_at = p_item;\r
1214 \r
1215         cl_spinlock_acquire( &g_ibsp.socket_info_mutex );\r
1216         CL_ASSERT( !s->conn_item.p_map );\r
1217         while( p_item != cl_rbmap_end( &g_ibsp.conn_map ) )\r
1218         {\r
1219                 p_insert_at = p_item;\r
1220                 p_sock = PARENT_STRUCT( p_item, struct ibsp_socket_info, conn_item );\r
1221                 if( p_sock->local_addr.sin_family < s->local_addr.sin_family )\r
1222                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1223                 else if( p_sock->local_addr.sin_family > s->local_addr.sin_family )\r
1224                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1225                 else if( p_sock->local_addr.sin_addr.S_un.S_addr < s->local_addr.sin_addr.S_un.S_addr )\r
1226                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1227                 else if( p_sock->local_addr.sin_addr.S_un.S_addr > s->local_addr.sin_addr.S_un.S_addr )\r
1228                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1229                 else if( p_sock->local_addr.sin_port < s->local_addr.sin_port )\r
1230                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1231                 else if( p_sock->local_addr.sin_port > s->local_addr.sin_port )\r
1232                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1233                 else if( p_sock->peer_addr.sin_family < s->peer_addr.sin_family )\r
1234                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1235                 else if( p_sock->peer_addr.sin_family > s->peer_addr.sin_family )\r
1236                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1237                 else if( p_sock->peer_addr.sin_addr.S_un.S_addr < s->peer_addr.sin_addr.S_un.S_addr )\r
1238                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1239                 else if( p_sock->peer_addr.sin_addr.S_un.S_addr > s->peer_addr.sin_addr.S_un.S_addr )\r
1240                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1241                 else if( p_sock->peer_addr.sin_port < s->peer_addr.sin_port )\r
1242                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1243                 else if( p_sock->peer_addr.sin_port > s->peer_addr.sin_port )\r
1244                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1245                 else\r
1246                         goto done;\r
1247         }\r
1248 \r
1249         cl_rbmap_insert( &g_ibsp.conn_map, p_insert_at, &s->conn_item, left );\r
1250 \r
1251 done:\r
1252         cl_spinlock_release( &g_ibsp.socket_info_mutex );\r
1253         return p_item == cl_rbmap_end( &g_ibsp.conn_map );\r
1254 }\r
1255 \r
1256 \r
1257 void\r
1258 ibsp_conn_remove(\r
1259         IN                              struct ibsp_socket_info         *s )\r
1260 {\r
1261         cl_spinlock_acquire( &g_ibsp.socket_info_mutex );\r
1262         CL_ASSERT( s->conn_item.p_map );\r
1263         cl_rbmap_remove_item( &g_ibsp.conn_map, &s->conn_item );\r
1264         cl_spinlock_release( &g_ibsp.socket_info_mutex );\r
1265 }\r