[WSD] Rename some structures to make the code more readable.
[mirror/winof/.git] / ulp / wsd / user / ibsp_iblow.c
1 /*\r
2  * Copyright (c) 2005 SilverStorm Technologies.  All rights reserved.\r
3  * Copyright (c) 2006 Mellanox Technologies.  All rights reserved.\r
4  *\r
5  * This software is available to you under the OpenIB.org BSD license\r
6  * below:\r
7  *\r
8  *     Redistribution and use in source and binary forms, with or\r
9  *     without modification, are permitted provided that the following\r
10  *     conditions are met:\r
11  *\r
12  *      - Redistributions of source code must retain the above\r
13  *        copyright notice, this list of conditions and the following\r
14  *        disclaimer.\r
15  *\r
16  *      - Redistributions in binary form must reproduce the above\r
17  *        copyright notice, this list of conditions and the following\r
18  *        disclaimer in the documentation and/or other materials\r
19  *        provided with the distribution.\r
20  *\r
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
22  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
23  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\r
24  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
25  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
26  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
27  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
28  * SOFTWARE.\r
29  *\r
30  * $Id$\r
31  */\r
32 #include "ibspdebug.h"\r
33 \r
34 #if defined(EVENT_TRACING)\r
35 #ifdef offsetof\r
36 #undef offsetof\r
37 #endif\r
38 #include "ibsp_iblow.tmh"\r
39 #endif\r
40 \r
41 #include "ibspdll.h"\r
42 \r
43 #ifdef PERFMON_ENABLED\r
44 #include "ibsp_perfmon.h"\r
45 #endif\r
46 \r
47 \r
48 typedef struct _io_comp_info\r
49 {\r
50         struct ibsp_socket_info *p_socket;\r
51         LPWSAOVERLAPPED                 p_ov;\r
52 \r
53 } io_comp_info_t;\r
54 \r
55 \r
56 /* Work queue entry completion routine. */\r
57 static void\r
58 complete_wq(\r
59         IN              const   ib_wc_t                                         *wc,\r
60                 OUT                     io_comp_info_t                          *p_io_info )\r
61 {\r
62         struct _wr                              *wr = NULL;\r
63         struct _recv_wr                 *p_recv_wr = NULL;\r
64         LPWSAOVERLAPPED                 lpOverlapped = NULL;\r
65         struct ibsp_socket_info *socket_info = NULL;\r
66 \r
67         IBSP_ENTER( IBSP_DBG_IO );\r
68 \r
69         wr = (struct _wr * __ptr64)wc->wr_id;\r
70         p_recv_wr = (struct _recv_wr * __ptr64)wc->wr_id;\r
71 \r
72         CL_ASSERT( wr );\r
73 \r
74         socket_info = wr->socket_info;\r
75         p_io_info->p_socket = socket_info;\r
76 \r
77         lpOverlapped = wr->lpOverlapped;\r
78 \r
79         IBSP_PRINT(TRACE_LEVEL_INFORMATION, IBSP_DBG_IO,\r
80                 ("socket %p, ov %p, work completion status=%s, wc_type=%s\n",\r
81                 socket_info, lpOverlapped, ib_get_wc_status_str( wc->status ),\r
82                 ib_get_wc_type_str( wc->wc_type )) );\r
83 \r
84         /* Set the windows error code. It's not easy to find an easy\r
85          * correspondence between the IBAL error codes and windows error\r
86          * codes; but it probably does not matter, as long as it returns an\r
87          * error. */\r
88         switch( wc->status )\r
89         {\r
90         case IB_WCS_SUCCESS:\r
91                 /*\r
92                  * Set the length of the operation. Under Infiniband, the work\r
93                  * completion length is only valid for a receive\r
94                  * operation. Fortunately we had already set the length during the\r
95                  * send operation. \r
96                  *\r
97                  * lpWPUCompleteOverlappedRequest is supposed to store the length\r
98                  * into InternalHigh, however it will not be called if the low\r
99                  * order bit of lpOverlapped->hEvent is set. So we do it and hope\r
100                  * for the best. \r
101                  *\r
102                  * NOTE: Without a valid length, the switch doesn't seem to call \r
103                  * GetOverlappedResult() even if we call lpWPUCompleteOverlappedRequest()\r
104                  */\r
105                 switch ( wc->wc_type ) \r
106                 {\r
107                 case IB_WC_RECV:\r
108                         CL_ASSERT(wc->length != 0);\r
109                         lpOverlapped->InternalHigh = wc->length;\r
110 #ifdef IBSP_LOGGING\r
111                         cl_spinlock_acquire( &socket_info->recv_lock );\r
112                         DataLogger_WriteData(&socket_info->RecvDataLogger,\r
113                                 p_recv_wr->idx, (void * __ptr64)p_recv_wr->ds_array[0].vaddr,\r
114                                 wc->length);\r
115                         cl_spinlock_release( &socket_info->recv_lock );\r
116 #endif\r
117 #ifdef PERFMON_ENABLED\r
118                         InterlockedIncrement64( &g_pm_stat.pdata[COMP_RECV] );\r
119                         InterlockedExchangeAdd64( &g_pm_stat.pdata[BYTES_RECV],\r
120                                 lpOverlapped->InternalHigh );\r
121 #endif\r
122 #ifdef _DEBUG_\r
123                         cl_atomic_inc(&g_ibsp.total_recv_compleated);\r
124 #endif\r
125                         break;\r
126 \r
127                 case IB_WC_RDMA_READ:\r
128                         CL_ASSERT(wc->length != 0);\r
129                         lpOverlapped->InternalHigh = wc->length;\r
130 #ifdef PERFMON_ENABLED\r
131                         InterlockedIncrement64( &g_pm_stat.pdata[COMP_RECV] );\r
132                         InterlockedExchangeAdd64( &g_pm_stat.pdata[BYTES_READ],\r
133                                 lpOverlapped->InternalHigh );\r
134 #endif /* PERFMON_ENABLED */\r
135                         break;\r
136 \r
137 #ifdef PERFMON_ENABLED\r
138                 case IB_WC_SEND:\r
139                         InterlockedIncrement64( &g_pm_stat.pdata[COMP_SEND] );\r
140                         InterlockedExchangeAdd64( &g_pm_stat.pdata[BYTES_SEND],\r
141                                 lpOverlapped->InternalHigh );\r
142                         break;\r
143 \r
144                 case IB_WC_RDMA_WRITE:\r
145                         InterlockedIncrement64( &g_pm_stat.pdata[COMP_SEND] );\r
146                         InterlockedExchangeAdd64( &g_pm_stat.pdata[BYTES_WRITE],\r
147                                 lpOverlapped->InternalHigh );\r
148 #endif /* PERFMON_ENABLED */\r
149                 default:\r
150                         break;\r
151                 }\r
152 \r
153 \r
154                 lpOverlapped->OffsetHigh = 0;\r
155                 break;\r
156 \r
157         case IB_WCS_WR_FLUSHED_ERR:\r
158                 cl_spinlock_acquire( &socket_info->mutex1 );\r
159 \r
160                 if( socket_info->socket_state == IBSP_DUPLICATING_REMOTE &&\r
161                         wc->wc_type == IB_WC_RECV )\r
162                 {\r
163                         /*\r
164                          * Take the wr off the wr_list, and place onto the\r
165                          * dup_wr_list.  We will post them later on the new QP. \r
166                          */\r
167                         cl_spinlock_acquire( &socket_info->recv_lock );\r
168 \r
169                         /* Copy to the duplicate WR array. */\r
170                         socket_info->dup_wr[socket_info->dup_idx] = *p_recv_wr;\r
171 \r
172 #if QP_ATTRIB_RQ_DEPTH == 256 || QP_ATTRIB_RQ_DEPTH == 128 || \\r
173         QP_ATTRIB_RQ_DEPTH == 64 || QP_ATTRIB_RQ_DEPTH == 32 || \\r
174         QP_ATTRIB_RQ_DEPTH == 16 || QP_ATTRIB_RQ_DEPTH == 8\r
175                         socket_info->dup_idx++;\r
176                         socket_info->dup_idx &= (QP_ATTRIB_RQ_DEPTH - 1);\r
177 #else\r
178                         if( ++socket_info->dup_idx == QP_ATTRIB_RQ_DEPTH )\r
179                                 socket_info->dup_idx = 0;\r
180 #endif\r
181 \r
182                         cl_atomic_inc( &socket_info->dup_cnt );\r
183                         /* ib_cq_comp will decrement the receive count. */\r
184                         cl_atomic_dec( &socket_info->recv_cnt );\r
185 \r
186                         cl_spinlock_release( &socket_info->recv_lock );\r
187 \r
188                         cl_spinlock_release( &socket_info->mutex1 );\r
189                         p_io_info->p_ov = NULL;\r
190                         IBSP_EXIT( IBSP_DBG_IO );\r
191                         return;\r
192                 }\r
193                 \r
194                 /* Check for flushing the receive buffers on purpose. */\r
195                 if( socket_info->socket_state == IBSP_DUPLICATING_OLD )\r
196                         wr->lpOverlapped->OffsetHigh = 0;\r
197                 else\r
198                         wr->lpOverlapped->OffsetHigh = WSA_OPERATION_ABORTED;\r
199 \r
200                 cl_spinlock_release( &socket_info->mutex1 );\r
201 \r
202                 /* Override the length, as per the WSD specs. */\r
203                 wr->lpOverlapped->InternalHigh = 0;\r
204                 break;\r
205 \r
206         case IB_WCS_LOCAL_LEN_ERR:\r
207         case IB_WCS_LOCAL_OP_ERR:\r
208         case IB_WCS_LOCAL_PROTECTION_ERR:\r
209         case IB_WCS_MEM_WINDOW_BIND_ERR:\r
210         case IB_WCS_REM_ACCESS_ERR:\r
211         case IB_WCS_REM_OP_ERR:\r
212         case IB_WCS_RNR_RETRY_ERR:\r
213         case IB_WCS_TIMEOUT_RETRY_ERR:\r
214         case IB_WCS_REM_INVALID_REQ_ERR:\r
215         default:\r
216                 {\r
217                         char    comp_name[MAX_COMPUTERNAME_LENGTH + 1] = {0};\r
218                         DWORD   len = sizeof(comp_name);\r
219                         GetComputerName( comp_name, &len );\r
220                         IBSP_ERROR( ("%s (%s:%d to ",\r
221                                 comp_name, inet_ntoa( socket_info->local_addr.sin_addr ),\r
222                                 socket_info->local_addr.sin_port) );\r
223                         IBSP_ERROR( ("%s:%d) %s error: %s (vendor specific %I64x)\n",\r
224                                 inet_ntoa( socket_info->peer_addr.sin_addr ),\r
225                                 socket_info->peer_addr.sin_port,\r
226                                 ib_get_wc_type_str( wc->wc_type ),\r
227                                 ib_get_wc_status_str( wc->status ),\r
228                                 wc->vendor_specific) );\r
229                         lpOverlapped->OffsetHigh = WSAECONNABORTED;\r
230                         wr->lpOverlapped->InternalHigh = 0;\r
231                         socket_info->qp_error = WSAECONNABORTED;\r
232                         break;\r
233                 }\r
234         }\r
235 \r
236 #ifdef PERFMON_ENABLED\r
237         InterlockedIncrement64( &g_pm_stat.pdata[COMP_TOTAL] );\r
238 #endif\r
239 \r
240 #ifdef _DEBUG_\r
241         if( wc->wc_type == IB_WC_RECV )\r
242         {\r
243                 // This code requires the recv count to be decremented here, but it needs\r
244                 // to be decremented after any callbacks are invoked so socket destruction\r
245                 // gets delayed until all callbacks have been invoked.\r
246                 //{\r
247                 //      uint8_t idx;\r
248 \r
249                 //      cl_spinlock_acquire( &socket_info->recv_lock );\r
250                 //      idx = socket_info->recv_idx - (uint8_t)socket_info->recv_cnt;\r
251                 //      if( idx >= QP_ATTRIB_RQ_DEPTH )\r
252                 //              idx += QP_ATTRIB_RQ_DEPTH;\r
253 \r
254                 //      CL_ASSERT( wc->wr_id == (uint64_t)(void* __ptr64)&socket_info->recv_wr[idx] );\r
255                 //      cl_atomic_dec( &socket_info->recv_cnt );\r
256                 //      cl_spinlock_release( &socket_info->recv_lock );\r
257                 //}\r
258 \r
259                 if( wc->status == IB_SUCCESS && p_recv_wr->ds_array[0].length >= 40 )\r
260                 {\r
261                         debug_dump_buffer( IBSP_DBG_WQ, "RECV",\r
262                                 (void * __ptr64)p_recv_wr->ds_array[0].vaddr, 40 );\r
263                 }\r
264 \r
265                 cl_atomic_dec( &g_ibsp.recv_count );\r
266                 cl_atomic_inc( &socket_info->recv_comp );\r
267 \r
268                 memset( p_recv_wr, 0x33, sizeof(struct _recv_wr) );\r
269         }\r
270         else\r
271         {\r
272                 // This code requires the send count to be decremented here, but it needs\r
273                 // to be decremented after any callbacks are invoked so socket destruction\r
274                 // gets delayed until all callbacks have been invoked.\r
275                 //{\r
276                 //      uint8_t idx;\r
277 \r
278                 //      cl_spinlock_acquire( &socket_info->send_lock );\r
279                 //      idx = socket_info->send_idx - (uint8_t)socket_info->send_cnt;\r
280                 //      if( idx >= QP_ATTRIB_SQ_DEPTH )\r
281                 //              idx += QP_ATTRIB_SQ_DEPTH;\r
282                 //      CL_ASSERT( wc->wr_id == (uint64_t)(void* __ptr64)&socket_info->send_wr[idx] );\r
283                 //      cl_atomic_dec( &socket_info->send_cnt );\r
284                 //      cl_spinlock_release( &socket_info->send_lock );\r
285                 //}\r
286 \r
287                 if( wc->wc_type == IB_WC_SEND )\r
288                 {\r
289                         cl_atomic_dec( &g_ibsp.send_count );\r
290                         cl_atomic_inc( &socket_info->send_comp );\r
291 \r
292                         fzprint(("%s():%d:0x%x:0x%x: send_count=%d\n",\r
293                                 __FUNCTION__,\r
294                                 __LINE__, GetCurrentProcessId(), GetCurrentThreadId(), g_ibsp.send_count));\r
295                 }\r
296 \r
297                 memset( wr, 0x33, sizeof(struct _wr) );\r
298         }\r
299 #endif\r
300         IBSP_PRINT(TRACE_LEVEL_INFORMATION, IBSP_DBG_IO,\r
301                 ("overlapped=%p, InternalHigh=%Id, hEvent=%p\n",\r
302                 lpOverlapped, lpOverlapped->InternalHigh,\r
303                  lpOverlapped->hEvent) );\r
304         \r
305 \r
306 \r
307         /* Don't notify the switch for that completion only if:\r
308          *   - the switch don't want a notification\r
309          *   - the wq completed with success\r
310          *   - the socket is still connected\r
311          */\r
312         if( ((uintptr_t) lpOverlapped->hEvent) & 0x00000001 )\r
313         {\r
314                 /* Indicate this operation is complete. The switch will poll\r
315                  * with calls to WSPGetOverlappedResult(). */\r
316 \r
317 #ifdef _DEBUG_\r
318                 cl_atomic_dec( &g_ibsp.overlap_h1_comp_count );\r
319 \r
320                 fzprint(("%s():%d:0x%x:0x%x: ov=0x%p h0=%d h1=%d h1_c=%d send=%d recv=%d\n",\r
321                                  __FUNCTION__, __LINE__, GetCurrentProcessId(),\r
322                                  GetCurrentThreadId(), lpOverlapped,\r
323                                  g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count,\r
324                                  g_ibsp.overlap_h1_comp_count, g_ibsp.send_count, g_ibsp.recv_count));\r
325 #endif\r
326 \r
327                 IBSP_PRINT(TRACE_LEVEL_INFORMATION, IBSP_DBG_IO,\r
328                         ("Not calling lpWPUCompleteOverlappedRequest: "\r
329                         "socket=%p, ov=%p OffsetHigh=%d, InternalHigh=%Id hEvent=%p\n",\r
330                         socket_info, lpOverlapped, lpOverlapped->OffsetHigh,\r
331                         lpOverlapped->InternalHigh, lpOverlapped->hEvent) );\r
332 \r
333                 lpOverlapped->Internal = 0;\r
334                 p_io_info->p_ov = NULL;\r
335         }\r
336         else\r
337         {\r
338 #ifdef _DEBUG_\r
339                 cl_atomic_dec( &g_ibsp.overlap_h0_count );\r
340 \r
341                 fzprint(("%s():%d:0x%x:0x%x: ov=0x%p h0=%d h1=%d h1_c=%d send=%d recv=%d\n",\r
342                                  __FUNCTION__, __LINE__, GetCurrentProcessId(),\r
343                                  GetCurrentThreadId(), lpOverlapped,\r
344                                  g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count,\r
345                                  g_ibsp.overlap_h1_comp_count, g_ibsp.send_count, g_ibsp.recv_count));\r
346 #endif\r
347 \r
348                 p_io_info->p_ov = lpOverlapped;\r
349                 cl_atomic_inc( &socket_info->ref_cnt1 );\r
350         }\r
351 \r
352         if( wc->wc_type == IB_WC_RECV )\r
353         {\r
354                 cl_atomic_dec( &socket_info->recv_cnt );\r
355         }\r
356         else\r
357         {\r
358                 cl_atomic_dec( &socket_info->send_cnt );\r
359         }\r
360 \r
361         IBSP_EXIT( IBSP_DBG_IO );\r
362 }\r
363 \r
364 \r
365 /* CQ completion handler. */\r
366 int\r
367 ib_cq_comp(\r
368                                         void                                            *cq_context )\r
369 {\r
370         struct cq_thread_info   *cq_tinfo = cq_context;\r
371         ib_api_status_t                 status;\r
372         ib_wc_t                                 wclist[WC_LIST_SIZE];\r
373         ib_wc_t                                 *free_wclist;\r
374         ib_wc_t                                 *done_wclist;\r
375         io_comp_info_t                  info[WC_LIST_SIZE];\r
376         int                                             cb_idx;\r
377         int                                             i;\r
378         int                                             n_comp = 0;\r
379 #ifdef _DEBUG_\r
380         int                                             comp_count;\r
381 #endif\r
382 \r
383         IBSP_ENTER( IBSP_DBG_WQ );\r
384 \r
385         CL_ASSERT( WC_LIST_SIZE >= 1 );\r
386 \r
387         do\r
388         {\r
389                 /* Try to retrieve up to WC_LIST_SIZE completions at a time. */\r
390                 for( i = 0; i < (WC_LIST_SIZE - 1); i++ )\r
391                 {\r
392                         wclist[i].p_next = &wclist[i + 1];\r
393                 }\r
394                 wclist[(WC_LIST_SIZE - 1)].p_next = NULL;\r
395 \r
396                 free_wclist = &wclist[0];\r
397                 done_wclist = NULL;\r
398 \r
399                 status = ib_poll_cq( cq_tinfo->cq, &free_wclist, &done_wclist );\r
400 \r
401                 IBSP_PRINT(TRACE_LEVEL_INFORMATION, IBSP_DBG_WQ,\r
402                         ("poll CQ got status %d, free=%p, done=%p\n",\r
403                         status, free_wclist, done_wclist) );\r
404 \r
405                 switch( status )\r
406                 {\r
407                 case IB_NOT_FOUND:\r
408                 case IB_SUCCESS:\r
409                         break;\r
410 \r
411                 case IB_INVALID_CQ_HANDLE:\r
412                         /* This happens when the switch closes the socket while the \r
413                          * execution thread was calling lpWPUCompleteOverlappedRequest. */\r
414                         IBSP_ERROR( (\r
415                                 "ib_poll_cq returned IB_INVLALID_CQ_HANDLE\n") );\r
416                         goto done;\r
417 \r
418                 default:\r
419                         IBSP_ERROR( (\r
420                                 "ib_poll_cq failed returned %s\n", ib_get_err_str( status )) );\r
421                         break;\r
422                 }\r
423 \r
424 #ifdef _DEBUG_\r
425                 comp_count = 0;\r
426 #endif\r
427 \r
428                 /* We have some completions. */\r
429                 cb_idx = 0;\r
430                 while( done_wclist )\r
431                 {\r
432 #ifdef _DEBUG_\r
433                         comp_count++;\r
434 #endif\r
435                         complete_wq( done_wclist, &info[cb_idx++] );\r
436 \r
437                         done_wclist = done_wclist->p_next;\r
438                 }\r
439 \r
440                 for( i = 0; i < cb_idx; i++ )\r
441                 {\r
442                         int error;\r
443                         int ret;\r
444 \r
445                         if( info[i].p_ov )\r
446                         {\r
447                                 IBSP_PRINT(TRACE_LEVEL_INFORMATION, IBSP_DBG_IO,\r
448                                         ("Calling lpWPUCompleteOverlappedRequest: "\r
449                                         "socket=%p, ov=%p OffsetHigh=%d "\r
450                                         "InternalHigh=%Id hEvent=%p\n",\r
451                                         info[i].p_socket, info[i].p_ov, info[i].p_ov->OffsetHigh,\r
452                                         info[i].p_ov->InternalHigh, info[i].p_ov->hEvent) );\r
453 \r
454                                 ret = g_ibsp.up_call_table.lpWPUCompleteOverlappedRequest(\r
455                                         info[i].p_socket->switch_socket, info[i].p_ov,\r
456                                         info[i].p_ov->OffsetHigh,\r
457                                         (DWORD)info[i].p_ov->InternalHigh, &error );\r
458                                 if( ret != 0 )\r
459                                 {\r
460                                         IBSP_ERROR( ("WPUCompleteOverlappedRequest for ov=%p "\r
461                                                 "returned %d err %d\n", info[i].p_ov, ret, error) );\r
462                                 }\r
463                                 deref_socket_info( info[i].p_socket );\r
464                         }\r
465                 }\r
466 \r
467                 n_comp += i;\r
468 \r
469 #ifdef _DEBUG_\r
470                 if( comp_count > g_ibsp.max_comp_count )\r
471                 {\r
472                         g_ibsp.max_comp_count = comp_count;\r
473                 }\r
474 #endif\r
475         } while( !free_wclist );\r
476 \r
477 done:\r
478 \r
479 #ifdef _DEBUG_\r
480         fzprint(("%s():%d:0x%x:0x%x: overlap_h0_count=%d overlap_h1_count=%d\n",\r
481                          __FUNCTION__,\r
482                          __LINE__, GetCurrentProcessId(),\r
483                          GetCurrentThreadId(), g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count));\r
484 #endif\r
485 \r
486         IBSP_EXIT( IBSP_DBG_WQ );\r
487         return n_comp;\r
488 }\r
489 \r
490 \r
491 /* IB completion thread */\r
492 static DWORD WINAPI\r
493 ib_cq_thread(\r
494                                         LPVOID                                          lpParameter )\r
495 {\r
496         struct cq_thread_info   *cq_tinfo = (struct cq_thread_info *)lpParameter;\r
497         cl_status_t                             cl_status;\r
498         ib_api_status_t                 status;\r
499         int                                             i;\r
500         DWORD_PTR old_afinity;\r
501 \r
502         IBSP_ENTER( IBSP_DBG_HW );\r
503 \r
504         fzprint(("%s():%d:0x%x:0x%x: cq_tinfo=0x%p\n", __FUNCTION__,\r
505                          __LINE__, GetCurrentProcessId(), GetCurrentThreadId(), cq_tinfo));\r
506 \r
507         old_afinity = SetThreadAffinityMask (GetCurrentThread (),g_dwPollThreadAffinityMask);\r
508         if (old_afinity == 0) {\r
509                 IBSP_ERROR(("SetThreadAffinityMask failed\n"));\r
510         } else {\r
511                 IBSP_PRINT(TRACE_LEVEL_INFORMATION, IBSP_DBG_DLL,("SetThreadAffinityMask succeeded\n"));\r
512         }\r
513 \r
514 \r
515         do\r
516         {\r
517                 cl_status = cl_waitobj_wait_on( cq_tinfo->cq_waitobj, EVENT_NO_TIMEOUT, TRUE );\r
518                 if( cl_status != CL_SUCCESS )\r
519                 {\r
520                         IBSP_ERROR( (\r
521                                 "cl_waitobj_wait_on() (%d)\n", cl_status) );\r
522                 }\r
523 \r
524                 /* \r
525                  * TODO: By rearranging thread creation and cq creation, this check\r
526                  * may be eliminated.\r
527                  */\r
528                 if( cq_tinfo->cq != NULL )\r
529                 {\r
530                         fzprint(("%s():%d:0x%x:0x%x: Calling ib_cq_comp().\n", __FUNCTION__,\r
531                                          __LINE__, GetCurrentProcessId(), GetCurrentThreadId()));\r
532 \r
533 #ifdef PERFMON_ENABLED\r
534                         InterlockedIncrement64( &g_pm_stat.pdata[INTR_TOTAL] );\r
535 #endif\r
536                         i = g_max_poll;\r
537                         do\r
538                         {\r
539                                 if( ib_cq_comp( cq_tinfo ) )\r
540                                         i = g_max_poll;\r
541 \r
542                         } while( i-- );\r
543 \r
544                         fzprint(("%s():%d:0x%x:0x%x: Done calling ib_cq_comp().\n", __FUNCTION__,\r
545                                          __LINE__, GetCurrentProcessId(), GetCurrentThreadId()));\r
546 \r
547                         status = ib_rearm_cq( cq_tinfo->cq, FALSE );\r
548                         if( status != IB_SUCCESS )\r
549                         {\r
550                                 IBSP_ERROR( (\r
551                                         "ib_rearm_cq returned %s)\n", ib_get_err_str( status )) );\r
552                         }\r
553                 }\r
554 \r
555         } while( !cq_tinfo->ib_cq_thread_exit_wanted );\r
556 \r
557         cl_status = cl_waitobj_destroy( cq_tinfo->cq_waitobj );\r
558         if( cl_status != CL_SUCCESS )\r
559         {\r
560                 IBSP_ERROR( (\r
561                         "cl_waitobj_destroy() returned %s\n", CL_STATUS_MSG(cl_status)) );\r
562         }\r
563         HeapFree( g_ibsp.heap, 0, cq_tinfo );\r
564 \r
565         /* No special exit code, even on errors. */\r
566         IBSP_EXIT( IBSP_DBG_HW );\r
567         ExitThread( 0 );\r
568 }\r
569 \r
570 \r
571 /* Called with the HCA's CQ lock held. */\r
572 static struct cq_thread_info *\r
573 ib_alloc_cq_tinfo(\r
574                                         struct ibsp_hca                         *hca )\r
575 {\r
576         struct cq_thread_info *cq_tinfo = NULL;\r
577         ib_cq_create_t cq_create;\r
578         ib_api_status_t status;\r
579         cl_status_t cl_status;\r
580 \r
581         IBSP_ENTER( IBSP_DBG_HW );\r
582 \r
583         cq_tinfo = HeapAlloc(\r
584                 g_ibsp.heap, HEAP_ZERO_MEMORY, sizeof(struct cq_thread_info) );\r
585 \r
586         if( !cq_tinfo )\r
587         {\r
588                 IBSP_ERROR_EXIT( ("HeapAlloc() Failed.\n") );\r
589                 return NULL;\r
590         }\r
591 \r
592         cl_status = cl_waitobj_create( FALSE, &cq_tinfo->cq_waitobj );\r
593         if( cl_status != CL_SUCCESS )\r
594         {\r
595                 cq_tinfo->cq_waitobj = NULL;\r
596                 ib_destroy_cq_tinfo( cq_tinfo );\r
597                 IBSP_ERROR_EXIT( (\r
598                         "cl_waitobj_create() returned %s\n", CL_STATUS_MSG(cl_status)) );\r
599                 return NULL;\r
600         }\r
601 \r
602         cq_tinfo->hca = hca;\r
603         cq_tinfo->ib_cq_thread_exit_wanted = FALSE;\r
604 \r
605         cq_tinfo->ib_cq_thread = CreateThread( NULL, 0, ib_cq_thread, cq_tinfo, 0,\r
606                 (LPDWORD)&cq_tinfo->ib_cq_thread_id );\r
607 \r
608         if( cq_tinfo->ib_cq_thread == NULL )\r
609         {\r
610                 ib_destroy_cq_tinfo( cq_tinfo );\r
611                 IBSP_ERROR_EXIT( ("CreateThread failed (%d)", GetLastError()) );\r
612                 return NULL;\r
613         }\r
614 \r
615         STAT_INC( thread_num );\r
616 \r
617         /* Completion queue */\r
618         cq_create.size = IB_INIT_CQ_SIZE;\r
619 \r
620         cq_create.pfn_comp_cb = NULL;\r
621         cq_create.h_wait_obj = cq_tinfo->cq_waitobj;\r
622 \r
623         status = ib_create_cq( hca->hca_handle, &cq_create, cq_tinfo,\r
624                 NULL, &cq_tinfo->cq );\r
625         if( status )\r
626         {\r
627                 ib_destroy_cq_tinfo( cq_tinfo );\r
628                 IBSP_ERROR_EXIT( (\r
629                         "ib_create_cq returned %s\n", ib_get_err_str( status )) );\r
630                 return NULL;\r
631         }\r
632 \r
633         STAT_INC( cq_num );\r
634 \r
635         status = ib_rearm_cq( cq_tinfo->cq, FALSE );\r
636         if( status )\r
637         {\r
638                 ib_destroy_cq_tinfo( cq_tinfo );\r
639                 IBSP_ERROR_EXIT( (\r
640                         "ib_rearm_cq returned %s\n", ib_get_err_str( status )) );\r
641                 return NULL;\r
642         }\r
643 \r
644         cq_tinfo->cqe_size = cq_create.size;\r
645 \r
646         if( hca->cq_tinfo )\r
647         {\r
648                 __cl_primitive_insert(\r
649                         &hca->cq_tinfo->list_item, &cq_tinfo->list_item );\r
650         }\r
651         else\r
652         {\r
653                 /* Setup the list entry to point to itself. */\r
654                 cq_tinfo->list_item.p_next = &cq_tinfo->list_item;\r
655                 cq_tinfo->list_item.p_prev = &cq_tinfo->list_item;\r
656         }\r
657 \r
658         /* We will be assigned to a QP - set the QP count. */\r
659         cq_tinfo->qp_count = 1;\r
660 \r
661         /* Upon allocation, the new CQ becomes the primary. */\r
662         hca->cq_tinfo = cq_tinfo;\r
663 \r
664         IBSP_EXIT( IBSP_DBG_HW );\r
665         return (cq_tinfo);\r
666 }\r
667 \r
668 \r
669 void\r
670 ib_destroy_cq_tinfo(\r
671                                         struct cq_thread_info           *cq_tinfo )\r
672 {\r
673         ib_wc_t wclist;\r
674         ib_wc_t *free_wclist;\r
675         ib_wc_t *done_wclist;\r
676         ib_api_status_t status;\r
677         HANDLE h_cq_thread;\r
678         DWORD cq_thread_id;\r
679 \r
680         IBSP_ENTER( IBSP_DBG_HW );\r
681 \r
682         CL_ASSERT( cq_tinfo );\r
683         CL_ASSERT( cq_tinfo->qp_count == 0 );\r
684 \r
685         if( cq_tinfo->cq )\r
686         {\r
687                 wclist.p_next = NULL;\r
688                 free_wclist = &wclist;\r
689 \r
690                 while( ib_poll_cq(\r
691                         cq_tinfo->cq, &free_wclist, &done_wclist ) == IB_SUCCESS )\r
692                 {\r
693                         IBSP_PRINT(TRACE_LEVEL_INFORMATION, IBSP_DBG_WQ,\r
694                                 ("free=%p, done=%p\n", free_wclist, done_wclist) );\r
695                 }\r
696 \r
697                 IBSP_PRINT(TRACE_LEVEL_INFORMATION, IBSP_DBG_WQ, ("ib_destroy_cq() start..\n") );\r
698 \r
699                 /*\r
700                  * Called from cleanup thread, okay to block.\r
701                  */\r
702                 status = ib_destroy_cq( cq_tinfo->cq, ib_sync_destroy );\r
703                 if( status )\r
704                 {\r
705                         IBSP_ERROR( (\r
706                                 "ib_destroy_cq returned %s\n", ib_get_err_str( status )) );\r
707                 }\r
708                 else\r
709                 {\r
710                         IBSP_PRINT(TRACE_LEVEL_INFORMATION, IBSP_DBG_WQ, ("ib_destroy_cq() finished.\n") );\r
711 \r
712                         cq_tinfo->cq = NULL;\r
713 \r
714                         STAT_DEC( cq_num );\r
715                 }\r
716         }\r
717 \r
718         if( cq_tinfo->ib_cq_thread )\r
719         {\r
720                 /* ib_cq_thread() will release the cq_tinfo before exit. Don't\r
721                    reference cq_tinfo after signaling  */\r
722                 h_cq_thread = cq_tinfo->ib_cq_thread;\r
723                 cq_tinfo->ib_cq_thread = NULL;\r
724                 cq_thread_id = cq_tinfo->ib_cq_thread_id;\r
725 \r
726                 cq_tinfo->ib_cq_thread_exit_wanted = TRUE;\r
727                 cl_waitobj_signal( cq_tinfo->cq_waitobj );\r
728 \r
729                 /* Wait for ib_cq_thread to die, if we are not running on it */\r
730                 if( GetCurrentThreadId() != cq_thread_id )\r
731                 {\r
732                         fzprint(("%s():%d:0x%x:0x%x: Waiting for ib_cq_thread=0x%x to die\n",\r
733                                          __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(),\r
734                                          cq_thread_id ));\r
735                         if( WaitForSingleObject( h_cq_thread, INFINITE ) != WAIT_OBJECT_0 )\r
736                         {\r
737                                 IBSP_ERROR( ("WaitForSingleObject failed\n") );\r
738                         }\r
739                         else\r
740                         {\r
741                                 STAT_DEC( thread_num );\r
742                         }\r
743                 }\r
744                 else\r
745                 {\r
746                         fzprint(("%s():%d:0x%x:0x%x: Currently on ib_cq_thread.\n", __FUNCTION__,\r
747                                          __LINE__, GetCurrentProcessId(), GetCurrentThreadId()));\r
748                         STAT_DEC( thread_num );\r
749                 }\r
750                 CloseHandle( h_cq_thread );\r
751         }\r
752         else\r
753         {\r
754                 /* There was no thread created, destroy cq_waitobj and\r
755                    free memory */\r
756                 if( cq_tinfo->cq_waitobj )\r
757                 {\r
758                         cl_waitobj_destroy( cq_tinfo->cq_waitobj );\r
759                         cq_tinfo->cq_waitobj = NULL;\r
760                 }\r
761                 HeapFree( g_ibsp.heap, 0, cq_tinfo );\r
762         }\r
763 \r
764         IBSP_EXIT( IBSP_DBG_HW );\r
765 }\r
766 \r
767 \r
768 static struct cq_thread_info *\r
769 ib_acquire_cq_tinfo(\r
770                                         struct ibsp_hca                         *hca )\r
771 {\r
772         struct cq_thread_info   *cq_tinfo = NULL, *cq_end;\r
773         uint32_t                                cqe_size;\r
774         ib_api_status_t                 status;\r
775 \r
776         IBSP_ENTER( IBSP_DBG_HW );\r
777 \r
778         cl_spinlock_acquire( &hca->cq_lock );\r
779 \r
780         if( !hca->cq_tinfo )\r
781         {\r
782                 cq_tinfo = ib_alloc_cq_tinfo( hca );\r
783                 if( !cq_tinfo )\r
784                         IBSP_ERROR( ("ib_alloc_cq_tinfo() failed\n") );\r
785                 cl_spinlock_release( &hca->cq_lock );\r
786                 IBSP_EXIT( IBSP_DBG_HW );\r
787                 return cq_tinfo;\r
788         }\r
789 \r
790         cq_tinfo = hca->cq_tinfo;\r
791         cq_end = cq_tinfo;\r
792         cqe_size = (cq_tinfo->qp_count + 1) * IB_CQ_SIZE;\r
793 \r
794         do\r
795         {\r
796                 if( cq_tinfo->cqe_size >= cqe_size )\r
797                 {\r
798                         cq_tinfo->qp_count++;\r
799                         cl_spinlock_release( &hca->cq_lock );\r
800                         IBSP_EXIT( IBSP_DBG_HW );\r
801                         return (cq_tinfo);\r
802                 }\r
803 \r
804                 status = ib_modify_cq( cq_tinfo->cq, &cqe_size );\r
805                 switch( status )\r
806                 {\r
807                 case IB_SUCCESS:\r
808                         cq_tinfo->cqe_size = cqe_size;\r
809                         cq_tinfo->qp_count++;\r
810                         break;\r
811 \r
812                 default:\r
813                         IBSP_ERROR_EXIT( (\r
814                                 "ib_modify_cq() returned %s\n", ib_get_err_str(status)) );\r
815                 case IB_INVALID_CQ_SIZE:\r
816                 case IB_UNSUPPORTED:\r
817                         cq_tinfo = PARENT_STRUCT(\r
818                                 cl_qlist_next( &cq_tinfo->list_item ), struct cq_thread_info,\r
819                                 list_item );\r
820                         cqe_size = (cq_tinfo->qp_count + 1) * IB_CQ_SIZE;\r
821                 }\r
822 \r
823         } while( cq_tinfo != cq_end );\r
824 \r
825         if( cq_tinfo == cq_end )\r
826                 cq_tinfo = ib_alloc_cq_tinfo( hca );\r
827 \r
828         cl_spinlock_release( &hca->cq_lock );\r
829         IBSP_EXIT( IBSP_DBG_HW );\r
830         return (cq_tinfo);\r
831 }\r
832 \r
833 void\r
834 ib_release_cq_tinfo(\r
835                                         struct cq_thread_info           *cq_tinfo )\r
836 {\r
837         IBSP_ENTER( IBSP_DBG_HW );\r
838 \r
839         CL_ASSERT( cq_tinfo );\r
840         CL_ASSERT( cq_tinfo->hca );\r
841 \r
842         cl_spinlock_acquire( &cq_tinfo->hca->cq_lock );\r
843         /* If this CQ now has fewer QPs than the primary, make it the primary. */\r
844         if( --cq_tinfo->qp_count < cq_tinfo->hca->cq_tinfo->qp_count )\r
845                 cq_tinfo->hca->cq_tinfo = cq_tinfo;\r
846         cl_spinlock_release( &cq_tinfo->hca->cq_lock );\r
847 \r
848         IBSP_EXIT( IBSP_DBG_HW );\r
849 }\r
850 \r
851 \r
852 /* Release IB ressources. */\r
853 void\r
854 ib_release(void)\r
855 {\r
856         cl_fmap_item_t                  *p_item;\r
857 \r
858         IBSP_ENTER( IBSP_DBG_HW );\r
859 \r
860         if( g_ibsp.al_handle )\r
861         {\r
862                 cl_list_item_t *item;\r
863                 ib_api_status_t status;\r
864 \r
865                 unregister_pnp();\r
866 \r
867                 while( (item = cl_qlist_head( &g_ibsp.hca_list )) != cl_qlist_end( &g_ibsp.hca_list ) )\r
868                 {\r
869                         struct ibsp_hca *hca = PARENT_STRUCT(item, struct ibsp_hca, item);\r
870 \r
871                         pnp_ca_remove( hca );\r
872                 }\r
873 \r
874                 fzprint(("%s():%d:0x%x:0x%x: Calling ib_close_al...\n", __FUNCTION__,\r
875                                  __LINE__, GetCurrentProcessId(), GetCurrentThreadId()));\r
876 \r
877                 status = ib_close_al( g_ibsp.al_handle );\r
878 \r
879                 fzprint(("%s():%d:0x%x:0x%x: Done calling ib_close_al, status=%d.\n",\r
880                                  __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(),\r
881                                  status));\r
882                 if( status != IB_SUCCESS )\r
883                 {\r
884                         IBSP_ERROR( (\r
885                                 "ib_close_al returned %s\n", ib_get_err_str( status )) );\r
886                 }\r
887                 else\r
888                 {\r
889                         IBSP_PRINT(TRACE_LEVEL_INFORMATION, IBSP_DBG_HW, ("ib_close_al success\n") );\r
890                         STAT_DEC( al_num );\r
891                 }\r
892                 g_ibsp.al_handle = NULL;\r
893         }\r
894 \r
895         for( p_item = cl_fmap_head( &g_ibsp.ip_map );\r
896                 p_item != cl_fmap_end( &g_ibsp.ip_map );\r
897                 p_item = cl_fmap_head( &g_ibsp.ip_map ) )\r
898         {\r
899                 cl_fmap_remove_item( &g_ibsp.ip_map, p_item );\r
900 \r
901                 HeapFree( g_ibsp.heap, 0,\r
902                         PARENT_STRUCT(p_item, struct ibsp_ip_addr, item) );\r
903         }\r
904 \r
905         IBSP_EXIT( IBSP_DBG_HW );\r
906 }\r
907 \r
908 \r
909 /* Initialize IB ressources. */\r
910 int\r
911 ibsp_initialize(void)\r
912 {\r
913         ib_api_status_t status;\r
914         int ret;\r
915 \r
916         IBSP_ENTER( IBSP_DBG_HW );\r
917 \r
918         CL_ASSERT( g_ibsp.al_handle == NULL );\r
919         CL_ASSERT( cl_qlist_count( &g_ibsp.hca_list ) == 0 );\r
920 \r
921         /* Open the IB library */\r
922         status = ib_open_al( &g_ibsp.al_handle );\r
923 \r
924         IBSP_PRINT(TRACE_LEVEL_INFORMATION, IBSP_DBG_HW, ("open is %d %p\n", status, g_ibsp.al_handle) );\r
925 \r
926         if( status != IB_SUCCESS )\r
927         {\r
928                 IBSP_ERROR( ("ib_open_al failed (%d)\n", status) );\r
929                 ret = WSAEPROVIDERFAILEDINIT;\r
930                 goto done;\r
931         }\r
932 \r
933         STAT_INC( al_num );\r
934 \r
935         /* Register for PNP events */\r
936         status = register_pnp();\r
937         if( status )\r
938         {\r
939                 IBSP_ERROR( ("register_pnp failed (%d)\n", status) );\r
940                 ret = WSAEPROVIDERFAILEDINIT;\r
941                 goto done;\r
942         }\r
943 \r
944         STAT_INC( thread_num );\r
945 \r
946         ret = 0;\r
947 done:\r
948         if( ret )\r
949         {\r
950                 /* Free up resources. */\r
951                 ib_release();\r
952         }\r
953 \r
954         IBSP_EXIT( IBSP_DBG_HW );\r
955 \r
956         return ret;\r
957 }\r
958 \r
959 \r
960 /* Destroys the infiniband ressources of a socket. */\r
961 void\r
962 ib_destroy_socket(\r
963         IN      OUT                     struct ibsp_socket_info         *socket_info )\r
964 {\r
965         ib_api_status_t status;\r
966 \r
967         IBSP_ENTER( IBSP_DBG_EP );\r
968 \r
969         if( socket_info->qp )\r
970         {\r
971                 cl_atomic_inc( &socket_info->ref_cnt1 );\r
972                 status = ib_destroy_qp( socket_info->qp, deref_socket_info );\r
973                 if( status != IB_SUCCESS )\r
974                 {\r
975                         IBSP_ERROR( ("ib_destroy_qp returned %s\n",\r
976                                 ib_get_err_str( status )) );\r
977                         deref_socket_info( socket_info );\r
978                 }\r
979 \r
980                 ib_release_cq_tinfo( socket_info->cq_tinfo );\r
981 \r
982                 socket_info->qp = NULL;\r
983         }\r
984 \r
985         IBSP_EXIT( IBSP_DBG_EP );\r
986 }\r
987 \r
988 \r
989 /*\r
990  * Creates the necessary IB ressources for a socket\r
991  */\r
992 int\r
993 ib_create_socket(\r
994         IN      OUT                     struct ibsp_socket_info         *socket_info)\r
995 {\r
996         ib_qp_create_t                  qp_create;\r
997         ib_api_status_t                 status;\r
998         ib_qp_attr_t                    qp_attr;\r
999 \r
1000         IBSP_ENTER( IBSP_DBG_EP );\r
1001 \r
1002         CL_ASSERT( socket_info != NULL );\r
1003         CL_ASSERT( socket_info->port != NULL );\r
1004         CL_ASSERT( socket_info->qp == NULL );\r
1005 \r
1006         socket_info->hca_pd = socket_info->port->hca->pd;\r
1007 \r
1008         /* Get the completion queue and thread info for this socket */\r
1009         socket_info->cq_tinfo = ib_acquire_cq_tinfo( socket_info->port->hca );\r
1010         if( !socket_info->cq_tinfo )\r
1011         {\r
1012                 IBSP_ERROR_EXIT( ("ib_acquire_cq_tinfo failed\n") );\r
1013                 return WSAENOBUFS;\r
1014         }\r
1015 \r
1016         /* Queue pair */\r
1017         cl_memclr(&qp_create, sizeof(ib_qp_create_t));\r
1018         qp_create.qp_type = IB_QPT_RELIABLE_CONN;\r
1019         qp_create.sq_depth = QP_ATTRIB_SQ_DEPTH;\r
1020         qp_create.rq_depth = QP_ATTRIB_RQ_DEPTH;\r
1021         qp_create.sq_sge = QP_ATTRIB_SQ_SGE;\r
1022         qp_create.rq_sge = 1;\r
1023         qp_create.h_rq_cq = socket_info->cq_tinfo->cq;\r
1024         qp_create.h_sq_cq = socket_info->cq_tinfo->cq;\r
1025         qp_create.sq_signaled = TRUE;\r
1026 \r
1027         status = ib_create_qp( socket_info->hca_pd, &qp_create, socket_info,    /* context */\r
1028                 NULL,   /* async handler */\r
1029                 &socket_info->qp );\r
1030         if( status )\r
1031         {\r
1032                 ib_release_cq_tinfo( socket_info->cq_tinfo );\r
1033                 IBSP_ERROR_EXIT( (\r
1034                         "ib_create_qp returned %s\n", ib_get_err_str( status )) );\r
1035                 return WSAENOBUFS;\r
1036         }\r
1037 \r
1038         status = ib_query_qp( socket_info->qp, &qp_attr );\r
1039         if( status == IB_SUCCESS )\r
1040         {\r
1041                 socket_info->max_inline = min( g_max_inline, qp_attr.sq_max_inline );\r
1042         }\r
1043         else\r
1044         {\r
1045                 IBSP_ERROR( ("ib_query_qp returned %s\n", ib_get_err_str( status )) );\r
1046                 socket_info->max_inline = 0;\r
1047         }\r
1048 \r
1049         STAT_INC( qp_num );\r
1050 \r
1051         IBSP_EXIT( IBSP_DBG_EP );\r
1052         return 0;\r
1053 }\r
1054 \r
1055 \r
1056 void\r
1057 wait_cq_drain(\r
1058         IN      OUT                     struct ibsp_socket_info         *socket_info )\r
1059 {\r
1060         IBSP_ENTER( IBSP_DBG_EP );\r
1061 \r
1062         if( socket_info->cq_tinfo == NULL )\r
1063         {\r
1064                 IBSP_EXIT( IBSP_DBG_EP );\r
1065                 return;\r
1066         }\r
1067 \r
1068         /* Wait for the QP to be drained. */\r
1069         while( socket_info->send_cnt || socket_info->recv_cnt )\r
1070         {\r
1071                 fzprint(("%s():%d:0x%x:0x%x: socket=0x%p wr_list_count=%d qp state=%d\n",\r
1072                                  __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(),\r
1073                                  socket_info, cl_qlist_count(&socket_info->wr_list)));\r
1074 \r
1075                 Sleep(100);\r
1076         }\r
1077 \r
1078         IBSP_EXIT( IBSP_DBG_EP );\r
1079 }\r
1080 \r
1081 \r
1082 void\r
1083 ibsp_dup_overlap_abort(\r
1084         IN      OUT                     struct ibsp_socket_info         *socket_info )\r
1085 {\r
1086         LPWSAOVERLAPPED lpOverlapped = NULL;\r
1087         int error;\r
1088         int ret;\r
1089         uint8_t                         idx;\r
1090 \r
1091         IBSP_ENTER( IBSP_DBG_EP );\r
1092         CL_ASSERT( !socket_info->send_cnt && !socket_info->recv_cnt );\r
1093 \r
1094         /* Browse the list of all posted overlapped structures\r
1095          * to mark them as aborted. */\r
1096         idx = socket_info->dup_idx - (uint8_t)socket_info->dup_cnt;\r
1097         if( idx >= QP_ATTRIB_RQ_DEPTH )\r
1098                 idx += QP_ATTRIB_RQ_DEPTH;\r
1099 \r
1100         while( socket_info->dup_cnt )\r
1101         {\r
1102                 lpOverlapped = socket_info->dup_wr[idx].wr.lpOverlapped;\r
1103 \r
1104                 fzprint(("%s():%d:0x%x:0x%x: socket=0x%p wr=0x%p overlapped=0x%p Internal=%d InternalHigh=%d hEvent=%d\n",\r
1105                         __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(), socket_info, &socket_info->dup_wr[idx], lpOverlapped, lpOverlapped->Internal, lpOverlapped->InternalHigh, lpOverlapped->hEvent));\r
1106 \r
1107                 lpOverlapped->OffsetHigh = WSAECONNABORTED;\r
1108                 lpOverlapped->InternalHigh = 0;\r
1109 \r
1110                 if( ((uintptr_t) lpOverlapped->hEvent) & 0x00000001 )\r
1111                 {\r
1112                         /* Indicate this operation is complete. The switch will poll\r
1113                          * with calls to WSPGetOverlappedResult(). */\r
1114 #ifdef _DEBUG_\r
1115                         cl_atomic_dec(&g_ibsp.overlap_h1_comp_count);\r
1116 \r
1117                         fzprint(("%s():%d:0x%x:0x%x: ov=0x%p h0=%d h1=%d h1_c=%d send=%d recv=%d\n",\r
1118                                          __FUNCTION__, __LINE__, GetCurrentProcessId(),\r
1119                                          GetCurrentThreadId(), lpOverlapped,\r
1120                                          g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count,\r
1121                                          g_ibsp.overlap_h1_comp_count, g_ibsp.send_count, g_ibsp.recv_count));\r
1122 #endif\r
1123 \r
1124                         IBSP_PRINT(TRACE_LEVEL_INFORMATION,IBSP_DBG_WQ, \r
1125                                          ("set internal overlapped=0x%p Internal=%Id OffsetHigh=%d\n",\r
1126                                           lpOverlapped, lpOverlapped->Internal,\r
1127                                           lpOverlapped->OffsetHigh));\r
1128 \r
1129                         lpOverlapped->Internal = 0;\r
1130                 }\r
1131                 else\r
1132                 {\r
1133 #ifdef _DEBUG_\r
1134                         cl_atomic_dec(&g_ibsp.overlap_h0_count);\r
1135 \r
1136 \r
1137                         fzprint(("%s():%d:0x%x:0x%x: ov=0x%p h0=%d h1=%d h1_c=%d send=%d recv=%d\n",\r
1138                                          __FUNCTION__, __LINE__, GetCurrentProcessId(),\r
1139                                          GetCurrentThreadId(), lpOverlapped,\r
1140                                          g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count,\r
1141                                          g_ibsp.overlap_h1_comp_count, g_ibsp.send_count, g_ibsp.recv_count));\r
1142 #endif\r
1143                         IBSP_PRINT(TRACE_LEVEL_INFORMATION,IBSP_DBG_WQ,\r
1144                                          (" calls lpWPUCompleteOverlappedRequest, overlapped=0x%p OffsetHigh=%d "\r
1145                                          "InternalHigh=%Id hEvent=%p\n",\r
1146                                            lpOverlapped, lpOverlapped->OffsetHigh,\r
1147                                           lpOverlapped->InternalHigh, lpOverlapped->hEvent));\r
1148 \r
1149                         ret = g_ibsp.up_call_table.lpWPUCompleteOverlappedRequest\r
1150                                 (socket_info->switch_socket,\r
1151                                  lpOverlapped,\r
1152                                  lpOverlapped->OffsetHigh, (DWORD) lpOverlapped->InternalHigh, &error);\r
1153 \r
1154                         if( ret != 0 )\r
1155                         {\r
1156                                 IBSP_ERROR( ("lpWPUCompleteOverlappedRequest failed with %d/%d\n", ret, error) );\r
1157                         }\r
1158                 }\r
1159                 cl_atomic_dec( &socket_info->dup_cnt );\r
1160         }\r
1161 \r
1162         IBSP_EXIT( IBSP_DBG_EP );\r
1163 }\r
1164 \r
1165 \r
1166 /* Closes a connection and release its ressources. */\r
1167 void\r
1168 shutdown_and_destroy_socket_info(\r
1169         IN      OUT                     struct ibsp_socket_info         *socket_info )\r
1170 {\r
1171         enum ibsp_socket_state  old_state;\r
1172 \r
1173         IBSP_ENTER( IBSP_DBG_EP );\r
1174 \r
1175         cl_spinlock_acquire( &socket_info->mutex1 );\r
1176         old_state = socket_info->socket_state;\r
1177         IBSP_CHANGE_SOCKET_STATE( socket_info, IBSP_CLOSED );\r
1178         cl_spinlock_release( &socket_info->mutex1 );\r
1179 \r
1180         if( socket_info->listen.handle )\r
1181         {\r
1182                 /* Stop listening and reject queued connections. */\r
1183                 ib_listen_cancel( socket_info );\r
1184         }\r
1185 \r
1186         cl_spinlock_acquire( &g_ibsp.socket_info_mutex );\r
1187         cl_qlist_remove_item( &g_ibsp.socket_info_list, &socket_info->item );\r
1188 \r
1189         switch( old_state )\r
1190         {\r
1191         case IBSP_CREATE:\r
1192         case IBSP_LISTEN:\r
1193                 /* Nothing to do. */\r
1194                 break;\r
1195 \r
1196         case IBSP_CONNECTED:\r
1197                 {\r
1198                         struct disconnect_reason reason;\r
1199 \r
1200                         memset( &reason, 0, sizeof(reason) );\r
1201                         reason.type = DISC_SHUTDOWN;\r
1202                         ib_disconnect( socket_info, &reason );\r
1203                 }\r
1204                 /* Fall through. */\r
1205 \r
1206         case IBSP_CONNECT:\r
1207         case IBSP_DISCONNECTED:\r
1208                 /* We changed the state - remove from connection map. */\r
1209                 CL_ASSERT( socket_info->conn_item.p_map );\r
1210                 cl_rbmap_remove_item( &g_ibsp.conn_map, &socket_info->conn_item );\r
1211                 break;\r
1212         }\r
1213         cl_spinlock_release( &g_ibsp.socket_info_mutex );\r
1214 \r
1215         /* Flush all completions. */\r
1216         if( socket_info->dup_cnt )\r
1217                 ibsp_dup_overlap_abort( socket_info );\r
1218 \r
1219         while( socket_info->send_cnt || socket_info->recv_cnt )\r
1220                 ib_cq_comp( socket_info->cq_tinfo );\r
1221 \r
1222         ibsp_dereg_socket( socket_info );\r
1223 \r
1224         ib_destroy_socket( socket_info );\r
1225 \r
1226 #ifdef IBSP_LOGGING\r
1227         DataLogger_Shutdown(&socket_info->SendDataLogger);\r
1228         DataLogger_Shutdown(&socket_info->RecvDataLogger);\r
1229 #endif\r
1230 \r
1231         /* Release the initial reference and clean up. */\r
1232         deref_socket_info( socket_info );\r
1233 \r
1234         IBSP_EXIT( IBSP_DBG_EP );\r
1235 }\r
1236 \r
1237 \r
1238 boolean_t\r
1239 ibsp_conn_insert(\r
1240         IN                              struct ibsp_socket_info         *s )\r
1241 {\r
1242         struct ibsp_socket_info         *p_sock;\r
1243         cl_rbmap_item_t                         *p_item, *p_insert_at;\r
1244         boolean_t                                       left = TRUE;\r
1245 \r
1246         cl_spinlock_acquire( &g_ibsp.socket_info_mutex );\r
1247         p_item = cl_rbmap_root( &g_ibsp.conn_map );\r
1248         p_insert_at = p_item;\r
1249 \r
1250         CL_ASSERT( !s->conn_item.p_map );\r
1251         while( p_item != cl_rbmap_end( &g_ibsp.conn_map ) )\r
1252         {\r
1253                 p_insert_at = p_item;\r
1254                 p_sock = PARENT_STRUCT( p_item, struct ibsp_socket_info, conn_item );\r
1255                 if( p_sock->local_addr.sin_family < s->local_addr.sin_family )\r
1256                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1257                 else if( p_sock->local_addr.sin_family > s->local_addr.sin_family )\r
1258                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1259                 else if( p_sock->local_addr.sin_addr.S_un.S_addr < s->local_addr.sin_addr.S_un.S_addr )\r
1260                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1261                 else if( p_sock->local_addr.sin_addr.S_un.S_addr > s->local_addr.sin_addr.S_un.S_addr )\r
1262                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1263                 else if( p_sock->local_addr.sin_port < s->local_addr.sin_port )\r
1264                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1265                 else if( p_sock->local_addr.sin_port > s->local_addr.sin_port )\r
1266                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1267                 else if( p_sock->peer_addr.sin_family < s->peer_addr.sin_family )\r
1268                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1269                 else if( p_sock->peer_addr.sin_family > s->peer_addr.sin_family )\r
1270                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1271                 else if( p_sock->peer_addr.sin_addr.S_un.S_addr < s->peer_addr.sin_addr.S_un.S_addr )\r
1272                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1273                 else if( p_sock->peer_addr.sin_addr.S_un.S_addr > s->peer_addr.sin_addr.S_un.S_addr )\r
1274                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1275                 else if( p_sock->peer_addr.sin_port < s->peer_addr.sin_port )\r
1276                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1277                 else if( p_sock->peer_addr.sin_port > s->peer_addr.sin_port )\r
1278                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1279                 else\r
1280                         goto done;\r
1281         }\r
1282 \r
1283         cl_rbmap_insert( &g_ibsp.conn_map, p_insert_at, &s->conn_item, left );\r
1284 \r
1285 done:\r
1286         cl_spinlock_release( &g_ibsp.socket_info_mutex );\r
1287         return p_item == cl_rbmap_end( &g_ibsp.conn_map );\r
1288 }\r
1289 \r
1290 \r
1291 void\r
1292 ibsp_conn_remove(\r
1293         IN                              struct ibsp_socket_info         *s )\r
1294 {\r
1295         cl_spinlock_acquire( &g_ibsp.socket_info_mutex );\r
1296         CL_ASSERT( s->conn_item.p_map );\r
1297         cl_rbmap_remove_item( &g_ibsp.conn_map, &s->conn_item );\r
1298         cl_spinlock_release( &g_ibsp.socket_info_mutex );\r
1299 }\r