[WSD] Update to CM interaction. Added tracking of connections by
[mirror/winof/.git] / ulp / wsd / user / ibsp_iblow.c
1 /*\r
2  * Copyright (c) 2005 SilverStorm Technologies.  All rights reserved.\r
3  *\r
4  * This software is available to you under the OpenIB.org BSD license\r
5  * below:\r
6  *\r
7  *     Redistribution and use in source and binary forms, with or\r
8  *     without modification, are permitted provided that the following\r
9  *     conditions are met:\r
10  *\r
11  *      - Redistributions of source code must retain the above\r
12  *        copyright notice, this list of conditions and the following\r
13  *        disclaimer.\r
14  *\r
15  *      - Redistributions in binary form must reproduce the above\r
16  *        copyright notice, this list of conditions and the following\r
17  *        disclaimer in the documentation and/or other materials\r
18  *        provided with the distribution.\r
19  *\r
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
21  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
22  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\r
23  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
24  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
25  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
26  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
27  * SOFTWARE.\r
28  *\r
29  * $Id$\r
30  */\r
31 \r
32 #include "ibspdll.h"\r
33 \r
34 \r
35 static void ib_destroy_cq_tinfo( struct cq_thread_info *cq_tinfo );\r
36 \r
37 \r
38 typedef struct _io_comp_info\r
39 {\r
40         SOCKET                                  socket;\r
41         LPWSAOVERLAPPED                 p_ov;\r
42         atomic32_t                              *p_io_cnt;\r
43 \r
44 } io_comp_info_t;\r
45 \r
46 \r
47 /* Work queue entry completion routine. */\r
48 static void\r
49 complete_wq(\r
50         IN              const   ib_wc_t                                         *wc,\r
51                 OUT                     io_comp_info_t                          *p_io_info )\r
52 {\r
53         struct _wr                              *wr = NULL;\r
54         struct _recv_wr                 *p_recv_wr = NULL;\r
55         LPWSAOVERLAPPED                 lpOverlapped = NULL;\r
56         struct ibsp_socket_info *socket_info = NULL;\r
57 \r
58         IBSP_ENTER( IBSP_DBG_IO );\r
59 \r
60         wr = (struct _wr * __ptr64)wc->wr_id;\r
61         p_recv_wr = (struct _recv_wr * __ptr64)wc->wr_id;\r
62 \r
63         CL_ASSERT( wr );\r
64 \r
65         socket_info = wr->socket_info;\r
66         p_io_info->socket = socket_info->switch_socket;\r
67 \r
68         lpOverlapped = wr->lpOverlapped;\r
69 \r
70         IBSP_TRACE4( IBSP_DBG_IO,\r
71                 ("socket %p, ov %p, work completion status=%s, wc_type=%s\n",\r
72                 socket_info, lpOverlapped, ib_get_wc_status_str( wc->status ),\r
73                 ib_get_wc_type_str( wc->wc_type )) );\r
74 \r
75         /* Set the windows error code. It's not easy to find an easy\r
76          * correspondence between the IBAL error codes and windows error\r
77          * codes; but it probably does not matter, as long as it returns an\r
78          * error. */\r
79         switch( wc->status )\r
80         {\r
81         case IB_WCS_SUCCESS:\r
82                 /*\r
83                  * Set the length of the operation. Under Infiniband, the work\r
84                  * completion length is only valid for a receive\r
85                  * operation. Fortunately we had already set the length during the\r
86                  * send operation. \r
87                  *\r
88                  * lpWPUCompleteOverlappedRequest is supposed to store the length\r
89                  * into InternalHigh, however it will not be called if the low\r
90                  * order bit of lpOverlapped->hEvent is set. So we do it and hope\r
91                  * for the best. \r
92                  *\r
93                  * NOTE: Without a valid length, the switch doesn't seem to call \r
94                  * GetOverlappedResult() even if we call lpWPUCompleteOverlappedRequest()\r
95                  */\r
96                 if( wc->wc_type == IB_WC_RECV )\r
97                         lpOverlapped->InternalHigh = wc->length;\r
98 \r
99                 lpOverlapped->OffsetHigh = 0;\r
100                 break;\r
101 \r
102         case IB_WCS_WR_FLUSHED_ERR:\r
103                 cl_spinlock_acquire( &socket_info->mutex );\r
104 \r
105                 if( socket_info->socket_state == IBSP_DUPLICATING_REMOTE &&\r
106                         wc->wc_type == IB_WC_RECV )\r
107                 {\r
108                         /*\r
109                          * Take the wr off the wr_list, and place onto the\r
110                          * dup_wr_list.  We will post them later on the new QP. \r
111                          */\r
112                         cl_spinlock_acquire( &socket_info->recv_lock );\r
113 \r
114                         /* Copy to the duplicate WR array. */\r
115                         socket_info->dup_wr[socket_info->dup_idx] = *p_recv_wr;\r
116 \r
117 #if QP_ATTRIB_RQ_DEPTH == 256 || QP_ATTRIB_RQ_DEPTH == 128 || \\r
118         QP_ATTRIB_RQ_DEPTH == 64 || QP_ATTRIB_RQ_DEPTH == 32 || \\r
119         QP_ATTRIB_RQ_DEPTH == 16 || QP_ATTRIB_RQ_DEPTH == 8\r
120                         socket_info->dup_idx++;\r
121                         socket_info->dup_idx &= (QP_ATTRIB_RQ_DEPTH - 1);\r
122 #else\r
123                         if( ++socket_info->dup_idx == QP_ATTRIB_RQ_DEPTH )\r
124                                 socket_info->dup_idx = 0;\r
125 #endif\r
126 \r
127                         cl_atomic_inc( &socket_info->dup_cnt );\r
128                         /* ib_cq_comp will decrement the receive count. */\r
129                         p_io_info->p_io_cnt = &socket_info->recv_cnt;\r
130 \r
131                         cl_spinlock_release( &socket_info->recv_lock );\r
132 \r
133                         cl_spinlock_release( &socket_info->mutex );\r
134                         IBSP_EXIT( IBSP_DBG_IO );\r
135                         return;\r
136                 }\r
137                 \r
138                 /* Check for flushing the receive buffers on purpose. */\r
139                 if( socket_info->socket_state == IBSP_DUPLICATING_OLD )\r
140                         wr->lpOverlapped->OffsetHigh = 0;\r
141                 else\r
142                         wr->lpOverlapped->OffsetHigh = WSA_OPERATION_ABORTED;\r
143 \r
144                 cl_spinlock_release( &socket_info->mutex );\r
145 \r
146                 /* Override the length, as per the WSD specs. */\r
147                 wr->lpOverlapped->InternalHigh = 0;\r
148                 break;\r
149 \r
150         case IB_WCS_LOCAL_LEN_ERR:\r
151         case IB_WCS_LOCAL_OP_ERR:\r
152         case IB_WCS_LOCAL_PROTECTION_ERR:\r
153         case IB_WCS_MEM_WINDOW_BIND_ERR:\r
154         case IB_WCS_REM_ACCESS_ERR:\r
155         case IB_WCS_REM_OP_ERR:\r
156         case IB_WCS_RNR_RETRY_ERR:\r
157         case IB_WCS_TIMEOUT_RETRY_ERR:\r
158         case IB_WCS_REM_INVALID_REQ_ERR:\r
159         default:\r
160                 IBSP_ERROR( ("%s error: %s\n",\r
161                         ib_get_wc_type_str( wc->wc_type ),\r
162                         ib_get_wc_status_str( wc->status )) );\r
163                 lpOverlapped->OffsetHigh = WSAECONNABORTED;\r
164                 wr->lpOverlapped->InternalHigh = 0;\r
165                 socket_info->qp_error = WSAECONNABORTED;\r
166                 break;\r
167         }\r
168 \r
169 #ifdef _DEBUG_\r
170         if( wc->wc_type == IB_WC_RECV )\r
171         {\r
172                 // This code requires the recv count to be decremented here, but it needs\r
173                 // to be decremented after any callbacks are invoked so socket destruction\r
174                 // gets delayed until all callbacks have been invoked.\r
175                 //{\r
176                 //      uint8_t idx;\r
177 \r
178                 //      cl_spinlock_acquire( &socket_info->recv_lock );\r
179                 //      idx = socket_info->recv_idx - (uint8_t)socket_info->recv_cnt;\r
180                 //      if( idx >= QP_ATTRIB_RQ_DEPTH )\r
181                 //              idx += QP_ATTRIB_RQ_DEPTH;\r
182 \r
183                 //      CL_ASSERT( wc->wr_id == (uint64_t)(void* __ptr64)&socket_info->recv_wr[idx] );\r
184                 //      cl_atomic_dec( &socket_info->recv_cnt );\r
185                 //      cl_spinlock_release( &socket_info->recv_lock );\r
186                 //}\r
187 \r
188                 if( wc->status == IB_SUCCESS && p_recv_wr->ds_array[0].length >= 40 )\r
189                 {\r
190                         debug_dump_buffer( IBSP_DBG_WQ | IBSP_DBG_LEVEL4, "RECV",\r
191                                 (void * __ptr64)p_recv_wr->ds_array[0].vaddr, 40 );\r
192                 }\r
193 \r
194                 cl_atomic_dec( &g_ibsp.recv_count );\r
195                 cl_atomic_inc( &socket_info->recv_comp );\r
196 \r
197                 memset( p_recv_wr, 0x33, sizeof(struct _recv_wr) );\r
198         }\r
199         else\r
200         {\r
201                 // This code requires the send count to be decremented here, but it needs\r
202                 // to be decremented after any callbacks are invoked so socket destruction\r
203                 // gets delayed until all callbacks have been invoked.\r
204                 //{\r
205                 //      uint8_t idx;\r
206 \r
207                 //      cl_spinlock_acquire( &socket_info->send_lock );\r
208                 //      idx = socket_info->send_idx - (uint8_t)socket_info->send_cnt;\r
209                 //      if( idx >= QP_ATTRIB_SQ_DEPTH )\r
210                 //              idx += QP_ATTRIB_SQ_DEPTH;\r
211                 //      CL_ASSERT( wc->wr_id == (uint64_t)(void* __ptr64)&socket_info->send_wr[idx] );\r
212                 //      cl_atomic_dec( &socket_info->send_cnt );\r
213                 //      cl_spinlock_release( &socket_info->send_lock );\r
214                 //}\r
215 \r
216                 if( wc->wc_type == IB_WC_SEND )\r
217                 {\r
218                         cl_atomic_dec( &g_ibsp.send_count );\r
219                         cl_atomic_inc( &socket_info->send_comp );\r
220 \r
221                         fzprint(("%s():%d:0x%x:0x%x: send_count=%d\n",\r
222                                 __FUNCTION__,\r
223                                 __LINE__, GetCurrentProcessId(), GetCurrentThreadId(), g_ibsp.send_count));\r
224                 }\r
225 \r
226                 memset( wr, 0x33, sizeof(struct _wr) );\r
227         }\r
228 #endif\r
229 \r
230         IBSP_TRACE4( IBSP_DBG_IO,\r
231                 ("overlapped=%p, InternalHigh=%d, hEvent=%x\n",\r
232                 lpOverlapped, lpOverlapped->InternalHigh,\r
233                 (uintptr_t) lpOverlapped->hEvent) );\r
234 \r
235         /* Don't notify the switch for that completion only if:\r
236          *   - the switch don't want a notification\r
237          *   - the wq completed with success\r
238          *   - the socket is still connected\r
239          */\r
240         if( ((uintptr_t) lpOverlapped->hEvent) & 0x00000001 )\r
241         {\r
242                 /* Indicate this operation is complete. The switch will poll\r
243                  * with calls to WSPGetOverlappedResult(). */\r
244 \r
245 #ifdef _DEBUG_\r
246                 cl_atomic_dec( &g_ibsp.overlap_h1_comp_count );\r
247 \r
248                 fzprint(("%s():%d:0x%x:0x%x: ov=0x%p h0=%d h1=%d h1_c=%d send=%d recv=%d\n",\r
249                                  __FUNCTION__, __LINE__, GetCurrentProcessId(),\r
250                                  GetCurrentThreadId(), lpOverlapped,\r
251                                  g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count,\r
252                                  g_ibsp.overlap_h1_comp_count, g_ibsp.send_count, g_ibsp.recv_count));\r
253 #endif\r
254 \r
255                 IBSP_TRACE1( IBSP_DBG_IO,\r
256                         ("Not calling lpWPUCompleteOverlappedRequest: "\r
257                         "socket=%p, ov=%p OffsetHigh=%d, InternalHigh=%d hEvent=%p\n",\r
258                         socket_info, lpOverlapped, lpOverlapped->OffsetHigh,\r
259                         lpOverlapped->InternalHigh, lpOverlapped->hEvent) );\r
260 \r
261                 lpOverlapped->Internal = 0;\r
262                 p_io_info->p_ov = NULL;\r
263         }\r
264         else\r
265         {\r
266 #ifdef _DEBUG_\r
267                 cl_atomic_dec( &g_ibsp.overlap_h0_count );\r
268 \r
269                 fzprint(("%s():%d:0x%x:0x%x: ov=0x%p h0=%d h1=%d h1_c=%d send=%d recv=%d\n",\r
270                                  __FUNCTION__, __LINE__, GetCurrentProcessId(),\r
271                                  GetCurrentThreadId(), lpOverlapped,\r
272                                  g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count,\r
273                                  g_ibsp.overlap_h1_comp_count, g_ibsp.send_count, g_ibsp.recv_count));\r
274 #endif\r
275 \r
276                 IBSP_TRACE1( IBSP_DBG_IO,\r
277                         ("Calling lpWPUCompleteOverlappedRequest: "\r
278                         "socket=%p, ov=%p OffsetHigh=%d InternalHigh=%d hEvent=%p\n",\r
279                         socket_info, lpOverlapped, lpOverlapped->OffsetHigh,\r
280                         lpOverlapped->InternalHigh, lpOverlapped->hEvent) );\r
281 \r
282                 p_io_info->p_ov = lpOverlapped;\r
283         }\r
284 \r
285         if( wc->wc_type == IB_WC_RECV )\r
286                 p_io_info->p_io_cnt = &socket_info->recv_cnt;\r
287         else\r
288                 p_io_info->p_io_cnt = &socket_info->send_cnt;\r
289 \r
290         IBSP_EXIT( IBSP_DBG_IO );\r
291 }\r
292 \r
293 \r
294 /* CQ completion handler. */\r
295 void\r
296 ib_cq_comp(\r
297                                         void                                            *cq_context )\r
298 {\r
299         struct cq_thread_info   *cq_tinfo = cq_context;\r
300         ib_api_status_t                 status;\r
301         ib_wc_t                                 wclist[WC_LIST_SIZE];\r
302         ib_wc_t                                 *free_wclist;\r
303         ib_wc_t                                 *done_wclist;\r
304         io_comp_info_t                  info[WC_LIST_SIZE];\r
305         int                                             cb_idx;\r
306         int                                             i;\r
307 #ifdef _DEBUG_\r
308         int                                             comp_count;\r
309 #endif\r
310 \r
311         CL_ENTER( IBSP_DBG_WQ, gdbg_lvl );\r
312 \r
313         CL_ASSERT( WC_LIST_SIZE >= 1 );\r
314 \r
315         do\r
316         {\r
317                 /* Try to retrieve up to WC_LIST_SIZE completions at a time. */\r
318                 for( i = 0; i < (WC_LIST_SIZE - 1); i++ )\r
319                 {\r
320                         wclist[i].p_next = &wclist[i + 1];\r
321                 }\r
322                 wclist[(WC_LIST_SIZE - 1)].p_next = NULL;\r
323 \r
324                 free_wclist = &wclist[0];\r
325                 done_wclist = NULL;\r
326 \r
327                 status = ib_poll_cq( cq_tinfo->cq, &free_wclist, &done_wclist );\r
328 \r
329                 CL_TRACE( IBSP_DBG_WQ, gdbg_lvl,\r
330                         ("%s():%d:0x%x:0x%x: poll CQ got status %d, free=%p, done=%p\n",\r
331                         __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(),\r
332                         status, free_wclist, done_wclist) );\r
333 \r
334                 switch( status )\r
335                 {\r
336                 case IB_NOT_FOUND:\r
337                 case IB_SUCCESS:\r
338                         break;\r
339 \r
340                 case IB_INVALID_CQ_HANDLE:\r
341                         /* This happens when the switch closes the socket while the \r
342                          * execution thread was calling lpWPUCompleteOverlappedRequest. */\r
343                         CL_ERROR( IBSP_DBG_WQ, gdbg_lvl,\r
344                                 ("ib_poll_cq returned IB_INVLALID_CQ_HANDLE\n") );\r
345                         goto done;\r
346 \r
347                 default:\r
348                         CL_ERROR( IBSP_DBG_WQ, gdbg_lvl,\r
349                                 ("ib_poll_cq failed returned %s\n", ib_get_err_str( status )) );\r
350                         break;\r
351                 }\r
352 \r
353 #ifdef _DEBUG_\r
354                 comp_count = 0;\r
355 #endif\r
356 \r
357                 /* We have some completions. */\r
358                 cb_idx = 0;\r
359                 while( done_wclist )\r
360                 {\r
361 #ifdef _DEBUG_\r
362                         comp_count++;\r
363 #endif\r
364                         complete_wq( done_wclist, &info[cb_idx++] );\r
365 \r
366                         done_wclist = done_wclist->p_next;\r
367                 }\r
368 \r
369                 while( cb_idx-- )\r
370                 {\r
371                         int error;\r
372                         int ret;\r
373 \r
374                         if( info[cb_idx].p_ov )\r
375                         {\r
376                                 ret = g_ibsp.up_call_table.lpWPUCompleteOverlappedRequest(\r
377                                         info[cb_idx].socket, info[cb_idx].p_ov,\r
378                                         info[cb_idx].p_ov->OffsetHigh,\r
379                                         (DWORD)info[cb_idx].p_ov->InternalHigh, &error );\r
380                                 if( ret != 0 )\r
381                                 {\r
382                                         IBSP_ERROR( ("WPUCompleteOverlappedRequest for ov=%p "\r
383                                                 "returned %d err %d\n", info[cb_idx].p_ov, ret, error) );\r
384                                 }\r
385                         }\r
386 \r
387                         cl_atomic_dec( info[cb_idx].p_io_cnt );\r
388                 }\r
389 \r
390 #ifdef _DEBUG_\r
391                 if( comp_count > g_ibsp.max_comp_count )\r
392                 {\r
393                         g_ibsp.max_comp_count = comp_count;\r
394                 }\r
395 #endif\r
396         } while( !free_wclist );\r
397 \r
398         status = ib_rearm_cq( cq_tinfo->cq, FALSE );\r
399         if( status != IB_SUCCESS )\r
400         {\r
401                 CL_ERROR( IBSP_DBG_WQ, gdbg_lvl,\r
402                         ("ib_rearm_cq returned %s)\n", ib_get_err_str( status )) );\r
403         }\r
404 \r
405 done:\r
406 \r
407 #ifdef _DEBUG_\r
408         fzprint(("%s():%d:0x%x:0x%x: overlap_h0_count=%d overlap_h1_count=%d\n",\r
409                          __FUNCTION__,\r
410                          __LINE__, GetCurrentProcessId(),\r
411                          GetCurrentThreadId(), g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count));\r
412 #endif\r
413 \r
414         CL_EXIT( IBSP_DBG_WQ, gdbg_lvl );\r
415 }\r
416 \r
417 \r
418 /* IB completion thread */\r
419 static DWORD WINAPI\r
420 ib_cq_thread(\r
421                                         LPVOID                                          lpParameter )\r
422 {\r
423         struct cq_thread_info *cq_tinfo = (struct cq_thread_info *)lpParameter;\r
424         cl_status_t cl_status;\r
425 \r
426         CL_ENTER( IBSP_DBG_HW, gdbg_lvl );\r
427 \r
428 \r
429         fzprint(("%s():%d:0x%x:0x%x: cq_tinfo=0x%p\n", __FUNCTION__,\r
430                          __LINE__, GetCurrentProcessId(), GetCurrentThreadId(), cq_tinfo));\r
431 \r
432         do\r
433         {\r
434                 cl_status = cl_waitobj_wait_on( cq_tinfo->cq_waitobj, EVENT_NO_TIMEOUT, TRUE );\r
435                 if( cl_status != CL_SUCCESS )\r
436                 {\r
437                         CL_ERROR( IBSP_DBG_EP, gdbg_lvl,\r
438                                 ("cl_waitobj_wait_on() (%d)\n", cl_status) );\r
439                 }\r
440 \r
441                 /* \r
442                  * TODO: By rearanging thread creation and cq creation, this check\r
443                  * may be eliminated.\r
444                  */\r
445                 if( cq_tinfo->cq != NULL )\r
446                 {\r
447                         fzprint(("%s():%d:0x%x:0x%x: Calling ib_cq_comp().\n", __FUNCTION__,\r
448                                          __LINE__, GetCurrentProcessId(), GetCurrentThreadId()));\r
449 \r
450                         ib_cq_comp( cq_tinfo );\r
451                         fzprint(("%s():%d:0x%x:0x%x: Done calling ib_cq_comp().\n", __FUNCTION__,\r
452                                          __LINE__, GetCurrentProcessId(), GetCurrentThreadId()));\r
453                 }\r
454 \r
455         } while( (cq_tinfo->ib_cq_thread_exit_wanted != TRUE) ||\r
456                         cl_qlist_count( &cq_tinfo->done_wr_list ) );\r
457 \r
458         cl_status = cl_waitobj_destroy( cq_tinfo->cq_waitobj );\r
459         if( cl_status != CL_SUCCESS )\r
460         {\r
461                 CL_ERROR( IBSP_DBG_EP, gdbg_lvl, ("cl_waitobj_destroy() (%d)\n", cl_status) );\r
462         }\r
463         HeapFree( g_ibsp.heap, 0, cq_tinfo );\r
464 \r
465         /* No special exit code, even on errors. */\r
466         CL_EXIT( IBSP_DBG_HW, gdbg_lvl );\r
467         ExitThread( 0 );\r
468 }\r
469 \r
470 \r
471 static struct cq_thread_info *\r
472 ib_alloc_cq_tinfo(\r
473                                         struct ibsp_hca                         *hca )\r
474 {\r
475         struct cq_thread_info *cq_tinfo = NULL;\r
476         ib_cq_create_t cq_create;\r
477         ib_api_status_t status;\r
478         cl_status_t cl_status;\r
479         int error;\r
480 \r
481         CL_ENTER( IBSP_DBG_HW, gdbg_lvl );\r
482 \r
483         cq_tinfo = HeapAlloc( g_ibsp.heap, HEAP_ZERO_MEMORY, sizeof(struct cq_thread_info) );\r
484 \r
485         if( cq_tinfo == NULL )\r
486         {\r
487                 CL_ERROR( IBSP_DBG_EP, gdbg_lvl, ("HeapAlloc() Failed.\n") );\r
488                 error = TRUE;\r
489                 goto done;\r
490         }\r
491 \r
492         cl_status = cl_waitobj_create( FALSE, &cq_tinfo->cq_waitobj );\r
493         if( cl_status != CL_SUCCESS )\r
494         {\r
495                 cq_tinfo->cq_waitobj = NULL;\r
496                 CL_ERROR( IBSP_DBG_EP, gdbg_lvl, ("cl_waitobj_create() (%d)\n", cl_status) );\r
497                 error = TRUE;\r
498                 goto done;\r
499         }\r
500 \r
501         cq_tinfo->hca = hca;\r
502         cq_tinfo->ib_cq_thread_exit_wanted = FALSE;\r
503 \r
504         /* Create a cleanup thread */\r
505         cq_tinfo->ib_cq_thread = CreateThread( NULL, 0, ib_cq_thread, cq_tinfo, 0, (LPDWORD)&cq_tinfo->ib_cq_thread_id );\r
506 \r
507         if( cq_tinfo->ib_cq_thread == NULL )\r
508         {\r
509                 CL_ERROR( IBSP_DBG_HW, gdbg_lvl, ("CreateThread failed.") );\r
510                 error = TRUE;\r
511                 goto done;\r
512         }\r
513 \r
514         STAT_INC( thread_num );\r
515 \r
516         /* Completion queue */\r
517         cq_create.size = IB_CQ_SIZE;\r
518 \r
519         cq_create.pfn_comp_cb = NULL;\r
520         cq_create.h_wait_obj = cq_tinfo->cq_waitobj;\r
521 \r
522         status = ib_create_cq( hca->hca_handle, &cq_create, cq_tinfo,   /* context */\r
523                 NULL,   /* async handler */\r
524                 &cq_tinfo->cq );\r
525         if( status )\r
526         {\r
527                 CL_ERROR( IBSP_DBG_EP, gdbg_lvl, ("ib_create_cq failed (%d)\n", status) );\r
528                 error = TRUE;\r
529                 goto done;\r
530         }\r
531 \r
532         STAT_INC( cq_num );\r
533 \r
534         status = ib_rearm_cq( cq_tinfo->cq, FALSE );\r
535         if( status )\r
536         {\r
537                 CL_ERROR( IBSP_DBG_EP, gdbg_lvl, ("ib_rearm_cq failed (%d)\n", status) );\r
538                 error = TRUE;\r
539                 goto done;\r
540         }\r
541 \r
542         cl_spinlock_init( &cq_tinfo->wr_mutex );\r
543         cl_qlist_init( &cq_tinfo->done_wr_list );\r
544         cq_tinfo->cqe_size = IB_CQ_SIZE;\r
545 \r
546         /* Only one CQ per HCA now */\r
547         hca->cq_tinfo = cq_tinfo;\r
548 \r
549         error = FALSE;\r
550 \r
551 done:\r
552         if( error == TRUE )\r
553         {\r
554                 ib_destroy_cq_tinfo( cq_tinfo );\r
555                 cq_tinfo = NULL;\r
556         }\r
557 \r
558         CL_EXIT( IBSP_DBG_HW, gdbg_lvl );\r
559 \r
560         return (cq_tinfo);\r
561 }\r
562 \r
563 \r
564 static void\r
565 ib_destroy_cq_tinfo(\r
566                                         struct cq_thread_info           *cq_tinfo )\r
567 {\r
568         ib_wc_t wclist;\r
569         ib_wc_t *free_wclist;\r
570         ib_wc_t *done_wclist;\r
571         ib_api_status_t status;\r
572         HANDLE h_cq_thread;\r
573 \r
574         CL_ENTER( IBSP_DBG_HW, gdbg_lvl );\r
575 \r
576         if( cq_tinfo == NULL )\r
577         {\r
578                 return;\r
579         }\r
580 \r
581         if( cq_tinfo->cq )\r
582         {\r
583                 wclist.p_next = NULL;\r
584                 free_wclist = &wclist;\r
585 \r
586                 while( ib_poll_cq( cq_tinfo->cq, &free_wclist, &done_wclist ) == IB_SUCCESS )\r
587                 {\r
588                         CL_TRACE( IBSP_DBG_WQ, gdbg_lvl, ("%s():%d:0x%x:0x%x: free=%p, done=%p\n",\r
589                                 __FUNCTION__,\r
590                                 __LINE__, GetCurrentProcessId(),\r
591                                 GetCurrentThreadId(),\r
592                                 free_wclist, done_wclist) );\r
593                 }\r
594 \r
595                 CL_TRACE( IBSP_DBG_WQ, gdbg_lvl, ("%s():%d:0x%x:0x%x: ib_destroy_cq() start..\n",\r
596                         __FUNCTION__,\r
597                         __LINE__, GetCurrentProcessId(),\r
598                         GetCurrentThreadId()) );\r
599 \r
600                 /*\r
601                  * Called from cleanup thread, okay to block.\r
602                  */\r
603                 status = ib_destroy_cq( cq_tinfo->cq, ib_sync_destroy );\r
604                 if( status )\r
605                 {\r
606                         CL_ERROR( IBSP_DBG_EP, gdbg_lvl, ("ib_destroy_cq failed (%d)\n", status) );\r
607                 }\r
608                 else\r
609                 {\r
610                         CL_TRACE( IBSP_DBG_WQ, gdbg_lvl,\r
611                                 ("%s():%d:0x%x:0x%x: ib_destroy_cq() finished.\n", __FUNCTION__,\r
612                                 __LINE__, GetCurrentProcessId(), GetCurrentThreadId()) );\r
613 \r
614                         cq_tinfo->cq = NULL;\r
615 \r
616                         STAT_DEC( cq_num );\r
617                 }\r
618         }\r
619 \r
620         /* Currently only 1 CQ per HCA */\r
621         cq_tinfo->hca = NULL;\r
622 \r
623         if( cq_tinfo->ib_cq_thread )\r
624         {\r
625                 /* ib_cq_thread() will release the cq_tinfo before exit. Don't\r
626                    reference cq_tinfo after signaling  */\r
627                 h_cq_thread = cq_tinfo->ib_cq_thread;\r
628                 cq_tinfo->ib_cq_thread = NULL;\r
629 \r
630                 cq_tinfo->ib_cq_thread_exit_wanted = TRUE;\r
631                 cl_waitobj_signal( cq_tinfo->cq_waitobj );\r
632 \r
633                 /* Wait for ib_cq_thread to die, if we are not running on it */\r
634                 if( GetCurrentThreadId() != cq_tinfo->ib_cq_thread_id )\r
635                 {\r
636                         fzprint(("%s():%d:0x%x:0x%x: Waiting for ib_cq_thread=0x%x to die\n",\r
637                                          __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(),\r
638                                          cq_tinfo->ib_cq_thread_id ));\r
639                         if( WaitForSingleObject( h_cq_thread, INFINITE ) != WAIT_OBJECT_0 )\r
640                         {\r
641                                 CL_ERROR( IBSP_DBG_CM, gdbg_lvl, ("WaitForSingleObject failed\n") );\r
642                         }\r
643                         else\r
644                         {\r
645                                 STAT_DEC( thread_num );\r
646                         }\r
647                 }\r
648                 else\r
649                 {\r
650                         fzprint(("%s():%d:0x%x:0x%x: Currently on ib_cq_thread.\n", __FUNCTION__,\r
651                                          __LINE__, GetCurrentProcessId(), GetCurrentThreadId()));\r
652                         STAT_DEC( thread_num );\r
653                 }\r
654                 CloseHandle( h_cq_thread );\r
655         }\r
656         else\r
657         {\r
658                 /* There was no thread created, destroy cq_waitobj and\r
659                    free memory */\r
660                 if( cq_tinfo->cq_waitobj )\r
661                 {\r
662                         cl_waitobj_destroy( cq_tinfo->cq_waitobj );\r
663                         cq_tinfo->cq_waitobj = NULL;\r
664                 }\r
665                 HeapFree( g_ibsp.heap, 0, cq_tinfo );\r
666         }\r
667 \r
668         CL_EXIT( IBSP_DBG_HW, gdbg_lvl );\r
669 }\r
670 \r
671 \r
672 static struct cq_thread_info *\r
673 ib_acquire_cq_tinfo(\r
674                                         struct ibsp_hca                         *hca )\r
675 {\r
676         struct cq_thread_info *cq_tinfo = NULL;\r
677         uint32_t current_cqe_size;\r
678 \r
679         CL_ENTER( IBSP_DBG_HW, gdbg_lvl );\r
680 \r
681         /* \r
682          * TODO: If future implementations require more than 1 cq_tinfo per HCA, then\r
683          *   search HCA cq_tinfo list for optimal cq_tinfo \r
684          */\r
685         if( hca->cq_tinfo == NULL )\r
686         {\r
687                 cq_tinfo = ib_alloc_cq_tinfo( hca );\r
688                 if( cq_tinfo == NULL )\r
689                 {\r
690                         CL_ERROR( IBSP_DBG_CM, gdbg_lvl, ("ib_alloc_cq_tinfo() failed\n") );\r
691                         return (NULL);\r
692                 }\r
693         }\r
694         else\r
695         {\r
696                 cq_tinfo = hca->cq_tinfo;\r
697         }\r
698 \r
699         CL_ASSERT( cq_tinfo != NULL );\r
700 \r
701         current_cqe_size = cq_tinfo->qp_count * IB_CQ_SIZE;\r
702 \r
703         cl_atomic_inc( &cq_tinfo->qp_count );\r
704 \r
705         if( cq_tinfo->cqe_size < current_cqe_size )\r
706         {\r
707                 ib_api_status_t status;\r
708                 status = ib_modify_cq( cq_tinfo->cq, &current_cqe_size );\r
709                 if( status )\r
710                 {\r
711                         /* \r
712                          * TODO: This could mean we are out of cqe and need to have\r
713                          * more than one cq per HCA in the future.\r
714                          */\r
715                         cl_atomic_dec( &cq_tinfo->qp_count );\r
716                         CL_EXIT_ERROR( IBSP_DBG_EP, gdbg_lvl,\r
717                                 ("ib_modify_cq() failed. (%d)\n", status) );\r
718                         return (NULL);\r
719                 }\r
720                 else\r
721                 {\r
722                         cq_tinfo->cqe_size = current_cqe_size;\r
723                         fzprint(("%s():%d:0x%x:0x%x: New cq size=%d.\n",\r
724                                          __FUNCTION__,\r
725                                          __LINE__, GetCurrentProcessId(),\r
726                                          GetCurrentThreadId(), cq_tinfo->cqe_size));\r
727 \r
728                 }\r
729         }\r
730 \r
731         CL_EXIT( IBSP_DBG_HW, gdbg_lvl );\r
732 \r
733         return (cq_tinfo);\r
734 }\r
735 \r
736 void\r
737 ib_release_cq_tinfo(\r
738                                         struct cq_thread_info           *cq_tinfo )\r
739 {\r
740         CL_ENTER( IBSP_DBG_HW, gdbg_lvl );\r
741 \r
742         cl_atomic_dec( &cq_tinfo->qp_count );\r
743 \r
744         /* TODO: downsize the cq  */\r
745 \r
746         CL_EXIT( IBSP_DBG_HW, gdbg_lvl );\r
747 }\r
748 \r
749 \r
750 /* Release IB ressources. */\r
751 void\r
752 ib_release(void)\r
753 {\r
754         cl_fmap_item_t                  *p_item;\r
755 \r
756         CL_ENTER( IBSP_DBG_HW, gdbg_lvl );\r
757 \r
758         if( g_ibsp.al_handle )\r
759         {\r
760                 cl_list_item_t *item;\r
761                 ib_api_status_t status;\r
762 \r
763                 unregister_pnp();\r
764 \r
765                 if( g_ibsp.ib_cleanup_thread )\r
766                 {\r
767                         /* Let thread know it's okay to exit after resources are freed */\r
768                         g_ibsp.ib_cleanup_thread_exit_wanted = TRUE;\r
769                         SetEvent( g_ibsp.ib_cleanup_event );\r
770 \r
771                         fzprint(("%s():%d:0x%x:0x%x: Waiting for ib_cleanup_thread to die.\n",\r
772                                          __FUNCTION__, __LINE__, GetCurrentProcessId(),\r
773                                          GetCurrentThreadId()));\r
774 \r
775                         /* Wait for ib_cleanup_thread to die */\r
776                         if( WaitForSingleObject( g_ibsp.ib_cleanup_thread, INFINITE ) != WAIT_OBJECT_0 )\r
777                         {\r
778                                 CL_ERROR( IBSP_DBG_CM, gdbg_lvl, ("WaitForSingleObject failed\n") );\r
779                         }\r
780                         else\r
781                         {\r
782                                 STAT_DEC( thread_num );\r
783                         }\r
784 \r
785                         fzprint(("%s():%d:0x%x:0x%x: ib_cleanup_thread exited.\n", __FUNCTION__,\r
786                                          __LINE__, GetCurrentProcessId(), GetCurrentThreadId()));\r
787                         CloseHandle( g_ibsp.ib_cleanup_thread );\r
788                         g_ibsp.ib_cleanup_thread = NULL;\r
789                 }\r
790 \r
791                 if( g_ibsp.ib_cleanup_event )\r
792                 {\r
793                         CloseHandle( g_ibsp.ib_cleanup_event );\r
794                         g_ibsp.ib_cleanup_event = NULL;\r
795                 }\r
796 \r
797                 while( (item = cl_qlist_head( &g_ibsp.hca_list )) != cl_qlist_end( &g_ibsp.hca_list ) )\r
798                 {\r
799                         struct ibsp_hca *hca = PARENT_STRUCT(item, struct ibsp_hca, item);\r
800 \r
801                         if( hca->cq_tinfo )\r
802                         {\r
803                                 CL_ASSERT( hca->cq_tinfo->qp_count == 0 );\r
804                                 ib_destroy_cq_tinfo( hca->cq_tinfo );\r
805                         }\r
806 \r
807                         pnp_ca_remove( hca );\r
808                 }\r
809 \r
810                 fzprint(("%s():%d:0x%x:0x%x: Calling ib_close_al...\n", __FUNCTION__,\r
811                                  __LINE__, GetCurrentProcessId(), GetCurrentThreadId()));\r
812 \r
813                 status = ib_close_al( g_ibsp.al_handle );\r
814 \r
815                 fzprint(("%s():%d:0x%x:0x%x: Done calling ib_close_al, status=%d.\n",\r
816                                  __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(),\r
817                                  status));\r
818                 if( status != IB_SUCCESS )\r
819                 {\r
820                         CL_ERROR( IBSP_DBG_HW, gdbg_lvl, ("ib_close_al failed (%d)\n", status) );\r
821                 }\r
822                 else\r
823                 {\r
824                         CL_TRACE( IBSP_DBG_HW, gdbg_lvl, ("ib_close_al success\n") );\r
825                         STAT_DEC( al_num );\r
826                 }\r
827                 g_ibsp.al_handle = NULL;\r
828         }\r
829 \r
830         for( p_item = cl_fmap_head( &g_ibsp.ip_map );\r
831                 p_item != cl_fmap_end( &g_ibsp.ip_map );\r
832                 p_item = cl_fmap_head( &g_ibsp.ip_map ) )\r
833         {\r
834                 cl_fmap_remove_item( &g_ibsp.ip_map, p_item );\r
835 \r
836                 HeapFree( g_ibsp.heap, 0,\r
837                         PARENT_STRUCT(p_item, struct ibsp_ip_addr, item) );\r
838         }\r
839 \r
840         CL_EXIT( IBSP_DBG_HW, gdbg_lvl );\r
841 }\r
842 \r
843 \r
844 /* IP notify thread */\r
845 static DWORD WINAPI\r
846 ib_cleanup_thread(\r
847                                         LPVOID                                          lpParameter )\r
848 {\r
849         cl_list_item_t *socket_item = NULL;\r
850 \r
851         CL_ENTER( IBSP_DBG_HW, gdbg_lvl );\r
852 \r
853         UNUSED_PARAM( lpParameter );\r
854 \r
855         while( !g_ibsp.ib_cleanup_thread_exit_wanted ||\r
856                 cl_qlist_count( &g_ibsp.socket_info_list ) )\r
857         {\r
858                 if( g_ibsp.ib_cleanup_thread_exit_wanted == FALSE )\r
859                 {\r
860                         if( WaitForSingleObject( g_ibsp.ib_cleanup_event, INFINITE ) != WAIT_OBJECT_0 )\r
861                         {\r
862                                 CL_ERROR( IBSP_DBG_CM, gdbg_lvl, ("WaitForSingleObject failed\n") );\r
863                         }\r
864                         ResetEvent( g_ibsp.ib_cleanup_event );\r
865                 }\r
866                 else\r
867                 {\r
868                         fzprint(("%s():%d:0x%x:0x%x: socket_info_list cnt=%d\n", __FUNCTION__,\r
869                                                 __LINE__, GetCurrentProcessId(),\r
870                                                 GetCurrentThreadId(),\r
871                                                 cl_qlist_count( &g_ibsp.socket_info_list) == 0));\r
872                         Sleep( 1000 );\r
873                 }\r
874 \r
875                 CL_TRACE( IBSP_DBG_WQ, gdbg_lvl, ("%s():%d:0x%x:0x%x: Wakeup\n",\r
876                                                                                  __FUNCTION__,\r
877                                                                                  __LINE__, GetCurrentProcessId(),\r
878                                                                                  GetCurrentThreadId()));\r
879 \r
880                 cl_spinlock_acquire( &g_ibsp.closed_socket_info_mutex );\r
881                 while( (socket_item = cl_qlist_remove_head( &g_ibsp.closed_socket_info_list )) !=\r
882                         cl_qlist_end( &g_ibsp.closed_socket_info_list ) )\r
883                 {\r
884                         struct ibsp_socket_info *socket_info = NULL;\r
885 \r
886                         cl_spinlock_release( &g_ibsp.closed_socket_info_mutex );\r
887 \r
888                         socket_info = PARENT_STRUCT(socket_item, struct ibsp_socket_info, item);\r
889 \r
890 #ifdef _DEBUG_\r
891                         {\r
892                                 uint8_t                 idx, i;\r
893                                 LPOVERLAPPED    lpOverlapped;\r
894 \r
895                                 idx = socket_info->send_idx - (uint8_t)socket_info->send_cnt;\r
896                                 if( idx >= QP_ATTRIB_SQ_DEPTH )\r
897                                         idx += QP_ATTRIB_SQ_DEPTH;\r
898 \r
899                                 for( i = 0; i < socket_info->send_cnt; i++ )\r
900                                 {\r
901                                         lpOverlapped = socket_info->send_wr[idx].lpOverlapped;\r
902                                         fzprint(("%s():%d:0x%x:0x%x: socket=0x%p wr=0x%p overlapped=0x%p Internal=%d InternalHigh=%d hEvent=%d\n",\r
903                                                 __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(), socket_info, &socket_info->send_wr[idx], lpOverlapped, lpOverlapped->Internal, lpOverlapped->InternalHigh, lpOverlapped->hEvent));\r
904 \r
905                                         if( ++idx == QP_ATTRIB_SQ_DEPTH )\r
906                                                 idx = 0;\r
907                                 }\r
908 \r
909                                 idx = socket_info->recv_idx - (uint8_t)socket_info->recv_cnt;\r
910                                 if( idx >= QP_ATTRIB_RQ_DEPTH )\r
911                                         idx += QP_ATTRIB_RQ_DEPTH;\r
912 \r
913                                 for( i = 0; i < socket_info->recv_cnt; i++ )\r
914                                 {\r
915                                         lpOverlapped = socket_info->recv_wr[idx].wr.lpOverlapped;\r
916                                         fzprint(("%s():%d:0x%x:0x%x: socket=0x%p wr=0x%p overlapped=0x%p Internal=%d InternalHigh=%d hEvent=%d\n",\r
917                                                 __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(), socket_info, &socket_info->recv_wr[idx], lpOverlapped, lpOverlapped->Internal, lpOverlapped->InternalHigh, lpOverlapped->hEvent));\r
918 \r
919                                         if( ++idx == QP_ATTRIB_RQ_DEPTH )\r
920                                                 idx = 0;\r
921                                 }\r
922                         }\r
923 #endif\r
924                         fzprint(("%s():%d:0x%x:0x%x: socket=0x%p\n",\r
925                                          __FUNCTION__,\r
926                                          __LINE__, GetCurrentProcessId(), GetCurrentThreadId(), socket_info));\r
927 \r
928                         wait_cq_drain( socket_info );\r
929 \r
930                         if( socket_info->dup_cnt )\r
931                                 ibsp_dup_overlap_abort( socket_info );\r
932 \r
933                         /* Destroy the switch socket. */\r
934                         if( socket_info->switch_socket != INVALID_SOCKET )\r
935                         {\r
936                                 int ret;\r
937                                 int error;\r
938 \r
939                                 fzprint(("%s():%d:0x%x:0x%x: socket=0x%p calling lpWPUCloseSocketHandle=0x%p\n", __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(), socket_info, socket_info->switch_socket));\r
940 \r
941                                 ret = g_ibsp.up_call_table.lpWPUCloseSocketHandle(\r
942                                         socket_info->switch_socket, &error );\r
943                                 if( ret == SOCKET_ERROR )\r
944                                 {\r
945                                         CL_ERROR( IBSP_DBG_EP, gdbg_lvl,\r
946                                                 ("WPUCloseSocketHandle failed: %d\n", error) );\r
947                                 }\r
948                                 else\r
949                                 {\r
950                                         STAT_DEC( wpusocket_num );\r
951                                 }\r
952 \r
953                                 socket_info->switch_socket = INVALID_SOCKET;\r
954                         }\r
955 \r
956                         ib_destroy_socket( socket_info );\r
957 \r
958                         ib_deregister_all_mr( &socket_info->buf_mem_list );\r
959                         free_socket_info( socket_info );\r
960                         cl_spinlock_acquire( &g_ibsp.closed_socket_info_mutex );\r
961                 }\r
962                 cl_spinlock_release( &g_ibsp.closed_socket_info_mutex );\r
963         }\r
964 \r
965         /* No special exit code, even on errors. */\r
966         CL_EXIT( IBSP_DBG_HW, gdbg_lvl );\r
967         ExitThread( 0 );\r
968 }\r
969 \r
970 \r
971 \r
972 /* Initialize IB ressources. */\r
973 int\r
974 ibsp_initialize(void)\r
975 {\r
976         ib_api_status_t status;\r
977         int ret;\r
978 \r
979         CL_ENTER( IBSP_DBG_HW, gdbg_lvl );\r
980 \r
981         CL_ASSERT( g_ibsp.al_handle == NULL );\r
982         CL_ASSERT( cl_qlist_count( &g_ibsp.hca_list ) == 0 );\r
983 \r
984         /* Open the IB library */\r
985         status = ib_open_al( &g_ibsp.al_handle );\r
986 \r
987         CL_TRACE( IBSP_DBG_HW, gdbg_lvl, ("open is %d %p\n", status, g_ibsp.al_handle) );\r
988 \r
989         if( status != IB_SUCCESS )\r
990         {\r
991                 CL_ERROR( IBSP_DBG_HW, gdbg_lvl, ("ib_open_al failed (%d)\n", status) );\r
992                 ret = WSAEPROVIDERFAILEDINIT;\r
993                 goto done;\r
994         }\r
995 \r
996         STAT_INC( al_num );\r
997 \r
998         /* Register for PNP events */\r
999         status = register_pnp();\r
1000         if( status )\r
1001         {\r
1002                 CL_ERROR( IBSP_DBG_HW, gdbg_lvl, ("register_pnp failed (%d)\n", status) );\r
1003                 ret = WSAEPROVIDERFAILEDINIT;\r
1004                 goto done;\r
1005         }\r
1006 \r
1007         /* Populate IP list. */\r
1008         update_all_ip_addrs();\r
1009 \r
1010         /* Create a cleanup event */\r
1011         g_ibsp.ib_cleanup_event = CreateEvent( NULL, TRUE, FALSE, NULL );\r
1012         if( g_ibsp.ib_cleanup_event == NULL )\r
1013         {\r
1014                 CL_ERROR( IBSP_DBG_HW, gdbg_lvl, ("CreateEvent failed."));\r
1015                 ret = WSAEPROVIDERFAILEDINIT;\r
1016                 goto done;\r
1017         }\r
1018 \r
1019         /* Create a cleanup thread */\r
1020         g_ibsp.ib_cleanup_thread = CreateThread( NULL, 0, ib_cleanup_thread, NULL, 0, NULL );\r
1021 \r
1022         if( g_ibsp.ib_cleanup_thread == NULL )\r
1023         {\r
1024                 CL_ERROR( IBSP_DBG_HW, gdbg_lvl, ("CreateThread failed.") );\r
1025                 ret = WSAEPROVIDERFAILEDINIT;\r
1026                 goto done;\r
1027         }\r
1028 \r
1029         STAT_INC( thread_num );\r
1030 \r
1031         ret = 0;\r
1032 done:\r
1033         if( ret )\r
1034         {\r
1035                 /* Free up resources. */\r
1036                 ib_release();\r
1037         }\r
1038 \r
1039         CL_EXIT( IBSP_DBG_HW, gdbg_lvl );\r
1040 \r
1041         return ret;\r
1042 }\r
1043 \r
1044 \r
1045 /* Destroys the infiniband ressources of a socket. */\r
1046 void\r
1047 ib_destroy_socket(\r
1048         IN      OUT                     struct ibsp_socket_info         *socket_info )\r
1049 {\r
1050         ib_api_status_t status;\r
1051 \r
1052         CL_ENTER( IBSP_DBG_EP, gdbg_lvl );\r
1053 \r
1054         if( socket_info->qp )\r
1055         {\r
1056                 status = ib_destroy_qp( socket_info->qp, ib_sync_destroy );\r
1057                 if( status )\r
1058                 {\r
1059                         CL_ERROR( IBSP_DBG_EP, gdbg_lvl, ("ib_destroy_qp failed (%d)\n", status) );\r
1060                 }\r
1061                 else\r
1062                 {\r
1063                         CL_TRACE( IBSP_DBG_WQ, gdbg_lvl,\r
1064                                 ("%s():%d:0x%x:0x%x: ib_destroy_qp() finished\n", __FUNCTION__,\r
1065                                 __LINE__, GetCurrentProcessId(), GetCurrentThreadId()) );\r
1066 \r
1067                         socket_info->qp = NULL;\r
1068 \r
1069                         STAT_DEC( qp_num );\r
1070 \r
1071                         ib_release_cq_tinfo( socket_info->cq_tinfo );\r
1072                 }\r
1073         }\r
1074 \r
1075         CL_EXIT( IBSP_DBG_EP, gdbg_lvl );\r
1076 }\r
1077 \r
1078 \r
1079 /*\r
1080  * Creates the necessary IB ressources for a socket\r
1081  */\r
1082 int\r
1083 ib_create_socket(\r
1084         IN      OUT                     struct ibsp_socket_info         *socket_info)\r
1085 {\r
1086         struct cq_thread_info   *cq_tinfo;\r
1087         ib_qp_create_t                  qp_create;\r
1088         ib_api_status_t                 status;\r
1089         int                                             ret;\r
1090         struct ibsp_hca                 *hca;\r
1091         ib_qp_attr_t                    qp_attr;\r
1092 \r
1093         CL_ENTER( IBSP_DBG_EP, gdbg_lvl );\r
1094 \r
1095         CL_ASSERT( socket_info != NULL );\r
1096         CL_ASSERT( socket_info->port != NULL );\r
1097         CL_ASSERT( socket_info->qp == NULL );\r
1098 \r
1099         hca = socket_info->port->hca;\r
1100         socket_info->hca_pd = hca->pd;\r
1101 \r
1102         /* Get the completion queue and thread info for this socket */\r
1103         cq_tinfo = ib_acquire_cq_tinfo( hca );\r
1104         if( cq_tinfo == NULL )\r
1105         {\r
1106                 CL_ERROR( IBSP_DBG_EP, gdbg_lvl, ("ib_acquire_cq_tinfo failed\n") );\r
1107                 ret = WSAEPROVIDERFAILEDINIT;\r
1108                 goto done;\r
1109         }\r
1110         socket_info->cq_tinfo = cq_tinfo;\r
1111 \r
1112         /* Queue pair */\r
1113         qp_create.qp_type = IB_QPT_RELIABLE_CONN;\r
1114         qp_create.sq_depth = QP_ATTRIB_SQ_DEPTH;\r
1115         qp_create.rq_depth = QP_ATTRIB_RQ_DEPTH;\r
1116         qp_create.sq_sge = QP_ATTRIB_SQ_SGE;\r
1117         qp_create.rq_sge = 1;\r
1118         qp_create.h_rq_cq = cq_tinfo->cq;\r
1119         qp_create.h_sq_cq = cq_tinfo->cq;\r
1120         qp_create.sq_signaled = TRUE;\r
1121 \r
1122         status = ib_create_qp( socket_info->hca_pd, &qp_create, socket_info,    /* context */\r
1123                 NULL,   /* async handler */\r
1124                 &socket_info->qp );\r
1125         if( status )\r
1126         {\r
1127                 CL_ERROR( IBSP_DBG_EP, gdbg_lvl, ("ib_create_qp failed (%d)\n", status));\r
1128                 ret = WSAEPROVIDERFAILEDINIT;\r
1129                 goto done;\r
1130         }\r
1131 \r
1132         status = ib_query_qp( socket_info->qp, &qp_attr );\r
1133         if( status == IB_SUCCESS )\r
1134         {\r
1135                 socket_info->max_inline = min( g_max_inline, qp_attr.sq_max_inline );\r
1136         }\r
1137         else\r
1138         {\r
1139                 CL_ERROR( IBSP_DBG_EP, gdbg_lvl,\r
1140                         ("ib_query_qp returned %s\n", ib_get_err_str( status )) );\r
1141                 socket_info->max_inline = 0;\r
1142         }\r
1143 \r
1144         STAT_INC( qp_num );\r
1145 \r
1146         ret = 0;\r
1147 \r
1148   done:\r
1149         if( ret )\r
1150         {\r
1151                 ib_destroy_socket( socket_info );\r
1152         }\r
1153 \r
1154         CL_EXIT( IBSP_DBG_EP, gdbg_lvl );\r
1155 \r
1156         return ret;\r
1157 }\r
1158 \r
1159 \r
1160 void\r
1161 wait_cq_drain(\r
1162         IN      OUT                     struct ibsp_socket_info         *socket_info )\r
1163 {\r
1164         CL_ENTER( IBSP_DBG_EP, gdbg_lvl );\r
1165 \r
1166         if( socket_info->cq_tinfo == NULL )\r
1167         {\r
1168                 CL_EXIT( IBSP_DBG_EP, gdbg_lvl );\r
1169                 return;\r
1170         }\r
1171 \r
1172         /* Wait for the QP to be drained. */\r
1173         while( socket_info->send_cnt || socket_info->recv_cnt )\r
1174         {\r
1175                 fzprint(("%s():%d:0x%x:0x%x: socket=0x%p wr_list_count=%d qp state=%d\n",\r
1176                                  __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(),\r
1177                                  socket_info, cl_qlist_count(&socket_info->wr_list)));\r
1178 \r
1179                 Sleep(100);\r
1180         }\r
1181 \r
1182         CL_EXIT( IBSP_DBG_EP, gdbg_lvl );\r
1183 }\r
1184 \r
1185 \r
1186 void\r
1187 ibsp_dup_overlap_abort(\r
1188         IN      OUT                     struct ibsp_socket_info         *socket_info )\r
1189 {\r
1190         LPWSAOVERLAPPED lpOverlapped = NULL;\r
1191         int error;\r
1192         int ret;\r
1193         uint8_t                         idx;\r
1194 \r
1195         CL_ENTER( IBSP_DBG_EP, gdbg_lvl );\r
1196         CL_ASSERT( !socket_info->send_cnt && !socket_info->recv_cnt );\r
1197 \r
1198         /* Browse the list of all posted overlapped structures\r
1199          * to mark them as aborted. */\r
1200         idx = socket_info->dup_idx - (uint8_t)socket_info->dup_cnt;\r
1201         if( idx >= QP_ATTRIB_RQ_DEPTH )\r
1202                 idx += QP_ATTRIB_RQ_DEPTH;\r
1203 \r
1204         while( socket_info->dup_cnt )\r
1205         {\r
1206                 lpOverlapped = socket_info->dup_wr[idx].wr.lpOverlapped;\r
1207 \r
1208                 fzprint(("%s():%d:0x%x:0x%x: socket=0x%p wr=0x%p overlapped=0x%p Internal=%d InternalHigh=%d hEvent=%d\n",\r
1209                         __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(), socket_info, &socket_info->dup_wr[idx], lpOverlapped, lpOverlapped->Internal, lpOverlapped->InternalHigh, lpOverlapped->hEvent));\r
1210 \r
1211                 lpOverlapped->OffsetHigh = WSAECONNABORTED;\r
1212                 lpOverlapped->InternalHigh = 0;\r
1213 \r
1214                 if( ((uintptr_t) lpOverlapped->hEvent) & 0x00000001 )\r
1215                 {\r
1216                         /* Indicate this operation is complete. The switch will poll\r
1217                          * with calls to WSPGetOverlappedResult(). */\r
1218 #ifdef _DEBUG_\r
1219                         cl_atomic_dec(&g_ibsp.overlap_h1_comp_count);\r
1220 \r
1221                         fzprint(("%s():%d:0x%x:0x%x: ov=0x%p h0=%d h1=%d h1_c=%d send=%d recv=%d\n",\r
1222                                          __FUNCTION__, __LINE__, GetCurrentProcessId(),\r
1223                                          GetCurrentThreadId(), lpOverlapped,\r
1224                                          g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count,\r
1225                                          g_ibsp.overlap_h1_comp_count, g_ibsp.send_count, g_ibsp.recv_count));\r
1226 #endif\r
1227 \r
1228                         CL_TRACE(IBSP_DBG_WQ, gdbg_lvl,\r
1229                                          ("%s: set internal overlapped=0x%p Internal=%d OffsetHigh=%d\n",\r
1230                                           __FUNCTION__, lpOverlapped, lpOverlapped->Internal,\r
1231                                           lpOverlapped->OffsetHigh));\r
1232 \r
1233                         lpOverlapped->Internal = 0;\r
1234                 }\r
1235                 else\r
1236                 {\r
1237 #ifdef _DEBUG_\r
1238                         cl_atomic_dec(&g_ibsp.overlap_h0_count);\r
1239 \r
1240 \r
1241                         fzprint(("%s():%d:0x%x:0x%x: ov=0x%p h0=%d h1=%d h1_c=%d send=%d recv=%d\n",\r
1242                                          __FUNCTION__, __LINE__, GetCurrentProcessId(),\r
1243                                          GetCurrentThreadId(), lpOverlapped,\r
1244                                          g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count,\r
1245                                          g_ibsp.overlap_h1_comp_count, g_ibsp.send_count, g_ibsp.recv_count));\r
1246 #endif\r
1247                         CL_TRACE(IBSP_DBG_WQ, gdbg_lvl,\r
1248                                          ("%s: calls lpWPUCompleteOverlappedRequest, overlapped=0x%p OffsetHigh=%d InternalHigh=%d hEvent=%d\n",\r
1249                                           __FUNCTION__, lpOverlapped, lpOverlapped->OffsetHigh,\r
1250                                           lpOverlapped->InternalHigh, lpOverlapped->hEvent));\r
1251 \r
1252                         ret = g_ibsp.up_call_table.lpWPUCompleteOverlappedRequest\r
1253                                 (socket_info->switch_socket,\r
1254                                  lpOverlapped,\r
1255                                  lpOverlapped->OffsetHigh, (DWORD) lpOverlapped->InternalHigh, &error);\r
1256 \r
1257                         if( ret != 0 )\r
1258                         {\r
1259                                 CL_ERROR(IBSP_DBG_EP, gdbg_lvl,\r
1260                                                  ("lpWPUCompleteOverlappedRequest failed with %d/%d\n", ret,\r
1261                                                   error));\r
1262                         }\r
1263                 }\r
1264                 cl_atomic_dec( &socket_info->dup_cnt );\r
1265         }\r
1266 \r
1267         CL_EXIT( IBSP_DBG_EP, gdbg_lvl );\r
1268 }\r
1269 \r
1270 \r
1271 /* Closes a connection and release its ressources. */\r
1272 void\r
1273 shutdown_and_destroy_socket_info(\r
1274         IN      OUT                     struct ibsp_socket_info         *socket_info,\r
1275         IN                              int                                                     old_state )\r
1276 {\r
1277         CL_ENTER( IBSP_DBG_EP, gdbg_lvl );\r
1278 \r
1279         if( socket_info->duplicate.mmap_handle )\r
1280         {\r
1281                 CloseHandle( socket_info->duplicate.mmap_handle );\r
1282                 socket_info->duplicate.mmap_handle = NULL;\r
1283         }\r
1284 \r
1285         if( socket_info->info.listen.handle )\r
1286         {\r
1287                 /* Stop listening and reject queued connections. */\r
1288                 ib_listen_cancel( socket_info );\r
1289         }\r
1290 \r
1291         switch( old_state )\r
1292         {\r
1293         case IBSP_CLOSING:\r
1294                 /* This function has already been called. Should not happen. */\r
1295                 CL_ERROR( IBSP_DBG_EP, gdbg_lvl,\r
1296                                  ("shutdown_and_destroy_socket_info already in closing socket_state\n") );\r
1297                 return;\r
1298                 break;\r
1299 \r
1300         case IBSP_CREATE:\r
1301                 /* Nothing to do. */\r
1302                 break;\r
1303 \r
1304         case IBSP_CONNECT:\r
1305                 // TODO\r
1306                 break;\r
1307 \r
1308         case IBSP_ACCEPT:\r
1309                 // TODO\r
1310                 break;\r
1311 \r
1312         case IBSP_LISTEN:\r
1313                 break;\r
1314 \r
1315         case IBSP_CONNECTED:\r
1316                 {\r
1317                         struct disconnect_reason reason;\r
1318                         memset( &reason, 0, sizeof(reason) );\r
1319                         reason.type = DISC_SHUTDOWN;\r
1320                         ib_disconnect( socket_info, &reason );\r
1321                 }\r
1322                 break;\r
1323 \r
1324         case IBSP_DISCONNECTED:\r
1325                 /* Nothing to do. */\r
1326                 break;\r
1327         }\r
1328 \r
1329         CL_EXIT( IBSP_DBG_EP, gdbg_lvl );\r
1330 }\r
1331 \r
1332 \r
1333 boolean_t\r
1334 ibsp_conn_insert(\r
1335         IN                              struct ibsp_socket_info         *s )\r
1336 {\r
1337         struct ibsp_socket_info         *p_sock;\r
1338         cl_rbmap_item_t                         *p_item, *p_insert_at;\r
1339         boolean_t                                       left = TRUE;\r
1340 \r
1341         p_item = cl_rbmap_root( &g_ibsp.conn_map );\r
1342         p_insert_at = p_item;\r
1343 \r
1344         cl_spinlock_acquire( &g_ibsp.socket_info_mutex );\r
1345         CL_ASSERT( !s->conn_item.p_map );\r
1346         while( p_item != cl_rbmap_end( &g_ibsp.conn_map ) )\r
1347         {\r
1348                 p_insert_at = p_item;\r
1349                 p_sock = PARENT_STRUCT( p_item, struct ibsp_socket_info, conn_item );\r
1350                 if( p_sock->local_addr.sin_family < s->local_addr.sin_family )\r
1351                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1352                 else if( p_sock->local_addr.sin_family > s->local_addr.sin_family )\r
1353                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1354                 else if( p_sock->local_addr.sin_addr.S_un.S_addr < s->local_addr.sin_addr.S_un.S_addr )\r
1355                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1356                 else if( p_sock->local_addr.sin_addr.S_un.S_addr > s->local_addr.sin_addr.S_un.S_addr )\r
1357                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1358                 else if( p_sock->local_addr.sin_port < s->local_addr.sin_port )\r
1359                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1360                 else if( p_sock->local_addr.sin_port > s->local_addr.sin_port )\r
1361                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1362                 else if( p_sock->peer_addr.sin_family < s->peer_addr.sin_family )\r
1363                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1364                 else if( p_sock->peer_addr.sin_family > s->peer_addr.sin_family )\r
1365                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1366                 else if( p_sock->peer_addr.sin_addr.S_un.S_addr < s->peer_addr.sin_addr.S_un.S_addr )\r
1367                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1368                 else if( p_sock->peer_addr.sin_addr.S_un.S_addr > s->peer_addr.sin_addr.S_un.S_addr )\r
1369                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1370                 else if( p_sock->peer_addr.sin_port < s->peer_addr.sin_port )\r
1371                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1372                 else if( p_sock->peer_addr.sin_port > s->peer_addr.sin_port )\r
1373                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1374                 else\r
1375                         goto done;\r
1376         }\r
1377 \r
1378         cl_rbmap_insert( &g_ibsp.conn_map, p_insert_at, &s->conn_item, left );\r
1379 \r
1380 done:\r
1381         cl_spinlock_release( &g_ibsp.socket_info_mutex );\r
1382         return p_item == cl_rbmap_end( &g_ibsp.conn_map );\r
1383 }\r
1384 \r
1385 \r
1386 void\r
1387 ibsp_conn_remove(\r
1388         IN                              struct ibsp_socket_info         *s )\r
1389 {\r
1390         cl_spinlock_acquire( &g_ibsp.socket_info_mutex );\r
1391         CL_ASSERT( s->conn_item.p_map );\r
1392         cl_rbmap_remove_item( &g_ibsp.conn_map, &s->conn_item );\r
1393         cl_spinlock_release( &g_ibsp.socket_info_mutex );\r
1394 }\r