[WSD] Add perfmon counter object implementation. WSD installation now
[mirror/winof/.git] / ulp / wsd / user / ibsp_iblow.c
1 /*\r
2  * Copyright (c) 2005 SilverStorm Technologies.  All rights reserved.\r
3  *\r
4  * This software is available to you under the OpenIB.org BSD license\r
5  * below:\r
6  *\r
7  *     Redistribution and use in source and binary forms, with or\r
8  *     without modification, are permitted provided that the following\r
9  *     conditions are met:\r
10  *\r
11  *      - Redistributions of source code must retain the above\r
12  *        copyright notice, this list of conditions and the following\r
13  *        disclaimer.\r
14  *\r
15  *      - Redistributions in binary form must reproduce the above\r
16  *        copyright notice, this list of conditions and the following\r
17  *        disclaimer in the documentation and/or other materials\r
18  *        provided with the distribution.\r
19  *\r
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
21  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
22  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\r
23  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
24  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
25  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
26  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
27  * SOFTWARE.\r
28  *\r
29  * $Id$\r
30  */\r
31 \r
32 #include "ibspdll.h"\r
33 \r
34 #ifdef PERFMON_ENABLED\r
35 #include "ibsp_perfmon.h"\r
36 #endif\r
37 \r
38 \r
39 typedef struct _io_comp_info\r
40 {\r
41         struct ibsp_socket_info *p_socket;\r
42         LPWSAOVERLAPPED                 p_ov;\r
43 \r
44 } io_comp_info_t;\r
45 \r
46 \r
47 /* Work queue entry completion routine. */\r
48 static void\r
49 complete_wq(\r
50         IN              const   ib_wc_t                                         *wc,\r
51                 OUT                     io_comp_info_t                          *p_io_info )\r
52 {\r
53         struct _wr                              *wr = NULL;\r
54         struct _recv_wr                 *p_recv_wr = NULL;\r
55         LPWSAOVERLAPPED                 lpOverlapped = NULL;\r
56         struct ibsp_socket_info *socket_info = NULL;\r
57 \r
58         IBSP_ENTER( IBSP_DBG_IO );\r
59 \r
60         wr = (struct _wr * __ptr64)wc->wr_id;\r
61         p_recv_wr = (struct _recv_wr * __ptr64)wc->wr_id;\r
62 \r
63         CL_ASSERT( wr );\r
64 \r
65         socket_info = wr->socket_info;\r
66         p_io_info->p_socket = socket_info;\r
67 \r
68         lpOverlapped = wr->lpOverlapped;\r
69 \r
70         IBSP_TRACE4( IBSP_DBG_IO,\r
71                 ("socket %p, ov %p, work completion status=%s, wc_type=%s\n",\r
72                 socket_info, lpOverlapped, ib_get_wc_status_str( wc->status ),\r
73                 ib_get_wc_type_str( wc->wc_type )) );\r
74 \r
75         /* Set the windows error code. It's not easy to find an easy\r
76          * correspondence between the IBAL error codes and windows error\r
77          * codes; but it probably does not matter, as long as it returns an\r
78          * error. */\r
79         switch( wc->status )\r
80         {\r
81         case IB_WCS_SUCCESS:\r
82                 /*\r
83                  * Set the length of the operation. Under Infiniband, the work\r
84                  * completion length is only valid for a receive\r
85                  * operation. Fortunately we had already set the length during the\r
86                  * send operation. \r
87                  *\r
88                  * lpWPUCompleteOverlappedRequest is supposed to store the length\r
89                  * into InternalHigh, however it will not be called if the low\r
90                  * order bit of lpOverlapped->hEvent is set. So we do it and hope\r
91                  * for the best. \r
92                  *\r
93                  * NOTE: Without a valid length, the switch doesn't seem to call \r
94                  * GetOverlappedResult() even if we call lpWPUCompleteOverlappedRequest()\r
95                  */\r
96                 switch ( wc->wc_type ) \r
97                 {\r
98                 case IB_WC_RECV:\r
99                         lpOverlapped->InternalHigh = wc->length;\r
100 #ifdef IBSP_LOGGING\r
101                         cl_spinlock_acquire( &socket_info->recv_lock );\r
102                         DataLogger_WriteData(&socket_info->RecvDataLogger,\r
103                                 p_recv_wr->idx, (void * __ptr64)p_recv_wr->ds_array[0].vaddr,\r
104                                 wc->length);\r
105                         cl_spinlock_release( &socket_info->recv_lock );\r
106 #endif\r
107 #ifdef PERFMON_ENABLED\r
108                         InterlockedIncrement64( &g_pm_stat.pdata[COMP_RECV] );\r
109                         InterlockedExchangeAdd64( &g_pm_stat.pdata[BYTES_RECV],\r
110                                 lpOverlapped->InternalHigh );\r
111 #endif\r
112                         break;\r
113 #ifdef PERFMON_ENABLED\r
114 \r
115                 case IB_WC_RDMA_READ:\r
116                         lpOverlapped->InternalHigh = wc->length;\r
117                         InterlockedIncrement64( &g_pm_stat.pdata[COMP_RECV] );\r
118                         InterlockedExchangeAdd64( &g_pm_stat.pdata[BYTES_READ],\r
119                                 lpOverlapped->InternalHigh );\r
120                         break;\r
121 \r
122                 case IB_WC_SEND:\r
123                         InterlockedIncrement64( &g_pm_stat.pdata[COMP_SEND] );\r
124                         InterlockedExchangeAdd64( &g_pm_stat.pdata[BYTES_SEND],\r
125                                 lpOverlapped->InternalHigh );\r
126                         break;\r
127 \r
128                 case IB_WC_RDMA_WRITE:\r
129                         InterlockedIncrement64( &g_pm_stat.pdata[COMP_SEND] );\r
130                         InterlockedExchangeAdd64( &g_pm_stat.pdata[BYTES_WRITE],\r
131                                 lpOverlapped->InternalHigh );\r
132                 default:\r
133                         break;\r
134                 }\r
135 \r
136 #endif /* PERFMON_ENABLED */\r
137 \r
138                 lpOverlapped->OffsetHigh = 0;\r
139                 break;\r
140 \r
141         case IB_WCS_WR_FLUSHED_ERR:\r
142                 cl_spinlock_acquire( &socket_info->mutex );\r
143 \r
144                 if( socket_info->socket_state == IBSP_DUPLICATING_REMOTE &&\r
145                         wc->wc_type == IB_WC_RECV )\r
146                 {\r
147                         /*\r
148                          * Take the wr off the wr_list, and place onto the\r
149                          * dup_wr_list.  We will post them later on the new QP. \r
150                          */\r
151                         cl_spinlock_acquire( &socket_info->recv_lock );\r
152 \r
153                         /* Copy to the duplicate WR array. */\r
154                         socket_info->dup_wr[socket_info->dup_idx] = *p_recv_wr;\r
155 \r
156 #if QP_ATTRIB_RQ_DEPTH == 256 || QP_ATTRIB_RQ_DEPTH == 128 || \\r
157         QP_ATTRIB_RQ_DEPTH == 64 || QP_ATTRIB_RQ_DEPTH == 32 || \\r
158         QP_ATTRIB_RQ_DEPTH == 16 || QP_ATTRIB_RQ_DEPTH == 8\r
159                         socket_info->dup_idx++;\r
160                         socket_info->dup_idx &= (QP_ATTRIB_RQ_DEPTH - 1);\r
161 #else\r
162                         if( ++socket_info->dup_idx == QP_ATTRIB_RQ_DEPTH )\r
163                                 socket_info->dup_idx = 0;\r
164 #endif\r
165 \r
166                         cl_atomic_inc( &socket_info->dup_cnt );\r
167                         /* ib_cq_comp will decrement the receive count. */\r
168                         cl_atomic_dec( &socket_info->recv_cnt );\r
169 \r
170                         cl_spinlock_release( &socket_info->recv_lock );\r
171 \r
172                         cl_spinlock_release( &socket_info->mutex );\r
173                         p_io_info->p_ov = NULL;\r
174                         IBSP_EXIT( IBSP_DBG_IO );\r
175                         return;\r
176                 }\r
177                 \r
178                 /* Check for flushing the receive buffers on purpose. */\r
179                 if( socket_info->socket_state == IBSP_DUPLICATING_OLD )\r
180                         wr->lpOverlapped->OffsetHigh = 0;\r
181                 else\r
182                         wr->lpOverlapped->OffsetHigh = WSA_OPERATION_ABORTED;\r
183 \r
184                 cl_spinlock_release( &socket_info->mutex );\r
185 \r
186                 /* Override the length, as per the WSD specs. */\r
187                 wr->lpOverlapped->InternalHigh = 0;\r
188                 break;\r
189 \r
190         case IB_WCS_LOCAL_LEN_ERR:\r
191         case IB_WCS_LOCAL_OP_ERR:\r
192         case IB_WCS_LOCAL_PROTECTION_ERR:\r
193         case IB_WCS_MEM_WINDOW_BIND_ERR:\r
194         case IB_WCS_REM_ACCESS_ERR:\r
195         case IB_WCS_REM_OP_ERR:\r
196         case IB_WCS_RNR_RETRY_ERR:\r
197         case IB_WCS_TIMEOUT_RETRY_ERR:\r
198         case IB_WCS_REM_INVALID_REQ_ERR:\r
199         default:\r
200                 IBSP_ERROR( ("%s error: %s\n",\r
201                         ib_get_wc_type_str( wc->wc_type ),\r
202                         ib_get_wc_status_str( wc->status )) );\r
203                 lpOverlapped->OffsetHigh = WSAECONNABORTED;\r
204                 wr->lpOverlapped->InternalHigh = 0;\r
205                 socket_info->qp_error = WSAECONNABORTED;\r
206                 break;\r
207         }\r
208 \r
209 #ifdef PERFMON_ENABLED\r
210         InterlockedIncrement64( &g_pm_stat.pdata[COMP_TOTAL] );\r
211 #endif\r
212 \r
213 #ifdef _DEBUG_\r
214         if( wc->wc_type == IB_WC_RECV )\r
215         {\r
216                 // This code requires the recv count to be decremented here, but it needs\r
217                 // to be decremented after any callbacks are invoked so socket destruction\r
218                 // gets delayed until all callbacks have been invoked.\r
219                 //{\r
220                 //      uint8_t idx;\r
221 \r
222                 //      cl_spinlock_acquire( &socket_info->recv_lock );\r
223                 //      idx = socket_info->recv_idx - (uint8_t)socket_info->recv_cnt;\r
224                 //      if( idx >= QP_ATTRIB_RQ_DEPTH )\r
225                 //              idx += QP_ATTRIB_RQ_DEPTH;\r
226 \r
227                 //      CL_ASSERT( wc->wr_id == (uint64_t)(void* __ptr64)&socket_info->recv_wr[idx] );\r
228                 //      cl_atomic_dec( &socket_info->recv_cnt );\r
229                 //      cl_spinlock_release( &socket_info->recv_lock );\r
230                 //}\r
231 \r
232                 if( wc->status == IB_SUCCESS && p_recv_wr->ds_array[0].length >= 40 )\r
233                 {\r
234                         debug_dump_buffer( IBSP_DBG_WQ | IBSP_DBG_LEVEL4, "RECV",\r
235                                 (void * __ptr64)p_recv_wr->ds_array[0].vaddr, 40 );\r
236                 }\r
237 \r
238                 cl_atomic_dec( &g_ibsp.recv_count );\r
239                 cl_atomic_inc( &socket_info->recv_comp );\r
240 \r
241                 memset( p_recv_wr, 0x33, sizeof(struct _recv_wr) );\r
242         }\r
243         else\r
244         {\r
245                 // This code requires the send count to be decremented here, but it needs\r
246                 // to be decremented after any callbacks are invoked so socket destruction\r
247                 // gets delayed until all callbacks have been invoked.\r
248                 //{\r
249                 //      uint8_t idx;\r
250 \r
251                 //      cl_spinlock_acquire( &socket_info->send_lock );\r
252                 //      idx = socket_info->send_idx - (uint8_t)socket_info->send_cnt;\r
253                 //      if( idx >= QP_ATTRIB_SQ_DEPTH )\r
254                 //              idx += QP_ATTRIB_SQ_DEPTH;\r
255                 //      CL_ASSERT( wc->wr_id == (uint64_t)(void* __ptr64)&socket_info->send_wr[idx] );\r
256                 //      cl_atomic_dec( &socket_info->send_cnt );\r
257                 //      cl_spinlock_release( &socket_info->send_lock );\r
258                 //}\r
259 \r
260                 if( wc->wc_type == IB_WC_SEND )\r
261                 {\r
262                         cl_atomic_dec( &g_ibsp.send_count );\r
263                         cl_atomic_inc( &socket_info->send_comp );\r
264 \r
265                         fzprint(("%s():%d:0x%x:0x%x: send_count=%d\n",\r
266                                 __FUNCTION__,\r
267                                 __LINE__, GetCurrentProcessId(), GetCurrentThreadId(), g_ibsp.send_count));\r
268                 }\r
269 \r
270                 memset( wr, 0x33, sizeof(struct _wr) );\r
271         }\r
272 #endif\r
273 \r
274         IBSP_TRACE4( IBSP_DBG_IO,\r
275                 ("overlapped=%p, InternalHigh=%d, hEvent=%x\n",\r
276                 lpOverlapped, lpOverlapped->InternalHigh,\r
277                 (uintptr_t) lpOverlapped->hEvent) );\r
278 \r
279         /* Don't notify the switch for that completion only if:\r
280          *   - the switch don't want a notification\r
281          *   - the wq completed with success\r
282          *   - the socket is still connected\r
283          */\r
284         if( ((uintptr_t) lpOverlapped->hEvent) & 0x00000001 )\r
285         {\r
286                 /* Indicate this operation is complete. The switch will poll\r
287                  * with calls to WSPGetOverlappedResult(). */\r
288 \r
289 #ifdef _DEBUG_\r
290                 cl_atomic_dec( &g_ibsp.overlap_h1_comp_count );\r
291 \r
292                 fzprint(("%s():%d:0x%x:0x%x: ov=0x%p h0=%d h1=%d h1_c=%d send=%d recv=%d\n",\r
293                                  __FUNCTION__, __LINE__, GetCurrentProcessId(),\r
294                                  GetCurrentThreadId(), lpOverlapped,\r
295                                  g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count,\r
296                                  g_ibsp.overlap_h1_comp_count, g_ibsp.send_count, g_ibsp.recv_count));\r
297 #endif\r
298 \r
299                 IBSP_TRACE1( IBSP_DBG_IO,\r
300                         ("Not calling lpWPUCompleteOverlappedRequest: "\r
301                         "socket=%p, ov=%p OffsetHigh=%d, InternalHigh=%d hEvent=%p\n",\r
302                         socket_info, lpOverlapped, lpOverlapped->OffsetHigh,\r
303                         lpOverlapped->InternalHigh, lpOverlapped->hEvent) );\r
304 \r
305                 lpOverlapped->Internal = 0;\r
306                 p_io_info->p_ov = NULL;\r
307         }\r
308         else\r
309         {\r
310 #ifdef _DEBUG_\r
311                 cl_atomic_dec( &g_ibsp.overlap_h0_count );\r
312 \r
313                 fzprint(("%s():%d:0x%x:0x%x: ov=0x%p h0=%d h1=%d h1_c=%d send=%d recv=%d\n",\r
314                                  __FUNCTION__, __LINE__, GetCurrentProcessId(),\r
315                                  GetCurrentThreadId(), lpOverlapped,\r
316                                  g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count,\r
317                                  g_ibsp.overlap_h1_comp_count, g_ibsp.send_count, g_ibsp.recv_count));\r
318 #endif\r
319 \r
320                 p_io_info->p_ov = lpOverlapped;\r
321                 cl_atomic_inc( &socket_info->ref_cnt );\r
322         }\r
323 \r
324         if( wc->wc_type == IB_WC_RECV )\r
325         {\r
326                 cl_atomic_dec( &socket_info->recv_cnt );\r
327         }\r
328         else\r
329         {\r
330                 cl_atomic_dec( &socket_info->send_cnt );\r
331         }\r
332 \r
333         IBSP_EXIT( IBSP_DBG_IO );\r
334 }\r
335 \r
336 \r
337 /* CQ completion handler. */\r
338 int\r
339 ib_cq_comp(\r
340                                         void                                            *cq_context )\r
341 {\r
342         struct cq_thread_info   *cq_tinfo = cq_context;\r
343         ib_api_status_t                 status;\r
344         ib_wc_t                                 wclist[WC_LIST_SIZE];\r
345         ib_wc_t                                 *free_wclist;\r
346         ib_wc_t                                 *done_wclist;\r
347         io_comp_info_t                  info[WC_LIST_SIZE];\r
348         int                                             cb_idx;\r
349         int                                             i;\r
350         int                                             n_comp = 0;\r
351 #ifdef _DEBUG_\r
352         int                                             comp_count;\r
353 #endif\r
354 \r
355         IBSP_ENTER( IBSP_DBG_WQ );\r
356 \r
357         CL_ASSERT( WC_LIST_SIZE >= 1 );\r
358 \r
359         do\r
360         {\r
361                 /* Try to retrieve up to WC_LIST_SIZE completions at a time. */\r
362                 for( i = 0; i < (WC_LIST_SIZE - 1); i++ )\r
363                 {\r
364                         wclist[i].p_next = &wclist[i + 1];\r
365                 }\r
366                 wclist[(WC_LIST_SIZE - 1)].p_next = NULL;\r
367 \r
368                 free_wclist = &wclist[0];\r
369                 done_wclist = NULL;\r
370 \r
371                 status = ib_poll_cq( cq_tinfo->cq, &free_wclist, &done_wclist );\r
372 \r
373                 IBSP_TRACE( IBSP_DBG_WQ,\r
374                         ("%s():%d:0x%x:0x%x: poll CQ got status %d, free=%p, done=%p\n",\r
375                         __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(),\r
376                         status, free_wclist, done_wclist) );\r
377 \r
378                 switch( status )\r
379                 {\r
380                 case IB_NOT_FOUND:\r
381                 case IB_SUCCESS:\r
382                         break;\r
383 \r
384                 case IB_INVALID_CQ_HANDLE:\r
385                         /* This happens when the switch closes the socket while the \r
386                          * execution thread was calling lpWPUCompleteOverlappedRequest. */\r
387                         IBSP_ERROR(\r
388                                 ("ib_poll_cq returned IB_INVLALID_CQ_HANDLE\n") );\r
389                         goto done;\r
390 \r
391                 default:\r
392                         IBSP_ERROR(\r
393                                 ("ib_poll_cq failed returned %s\n", ib_get_err_str( status )) );\r
394                         break;\r
395                 }\r
396 \r
397 #ifdef _DEBUG_\r
398                 comp_count = 0;\r
399 #endif\r
400 \r
401                 /* We have some completions. */\r
402                 cb_idx = 0;\r
403                 while( done_wclist )\r
404                 {\r
405 #ifdef _DEBUG_\r
406                         comp_count++;\r
407 #endif\r
408                         complete_wq( done_wclist, &info[cb_idx++] );\r
409 \r
410                         done_wclist = done_wclist->p_next;\r
411                 }\r
412 \r
413                 for( i = 0; i < cb_idx; i++ )\r
414                 {\r
415                         int error;\r
416                         int ret;\r
417 \r
418                         if( info[i].p_ov )\r
419                         {\r
420                                 IBSP_TRACE1( IBSP_DBG_IO,\r
421                                         ("Calling lpWPUCompleteOverlappedRequest: "\r
422                                         "socket=%p, ov=%p OffsetHigh=%d "\r
423                                         "InternalHigh=%d hEvent=%p\n",\r
424                                         info[i].p_socket, info[i].p_ov, info[i].p_ov->OffsetHigh,\r
425                                         info[i].p_ov->InternalHigh, info[i].p_ov->hEvent) );\r
426 \r
427                                 ret = g_ibsp.up_call_table.lpWPUCompleteOverlappedRequest(\r
428                                         info[i].p_socket->switch_socket, info[i].p_ov,\r
429                                         info[i].p_ov->OffsetHigh,\r
430                                         (DWORD)info[i].p_ov->InternalHigh, &error );\r
431                                 if( ret != 0 )\r
432                                 {\r
433                                         IBSP_ERROR( ("WPUCompleteOverlappedRequest for ov=%p "\r
434                                                 "returned %d err %d\n", info[i].p_ov, ret, error) );\r
435                                 }\r
436                                 deref_socket_info( info[i].p_socket );\r
437                         }\r
438                 }\r
439 \r
440                 n_comp += i;\r
441 \r
442 #ifdef _DEBUG_\r
443                 if( comp_count > g_ibsp.max_comp_count )\r
444                 {\r
445                         g_ibsp.max_comp_count = comp_count;\r
446                 }\r
447 #endif\r
448         } while( !free_wclist );\r
449 \r
450 done:\r
451 \r
452 #ifdef _DEBUG_\r
453         fzprint(("%s():%d:0x%x:0x%x: overlap_h0_count=%d overlap_h1_count=%d\n",\r
454                          __FUNCTION__,\r
455                          __LINE__, GetCurrentProcessId(),\r
456                          GetCurrentThreadId(), g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count));\r
457 #endif\r
458 \r
459         IBSP_EXIT( IBSP_DBG_WQ );\r
460         return n_comp;\r
461 }\r
462 \r
463 \r
464 /* IB completion thread */\r
465 static DWORD WINAPI\r
466 ib_cq_thread(\r
467                                         LPVOID                                          lpParameter )\r
468 {\r
469         struct cq_thread_info   *cq_tinfo = (struct cq_thread_info *)lpParameter;\r
470         cl_status_t                             cl_status;\r
471         ib_api_status_t                 status;\r
472         int                                             i;\r
473 \r
474         IBSP_ENTER( IBSP_DBG_HW );\r
475 \r
476         fzprint(("%s():%d:0x%x:0x%x: cq_tinfo=0x%p\n", __FUNCTION__,\r
477                          __LINE__, GetCurrentProcessId(), GetCurrentThreadId(), cq_tinfo));\r
478 \r
479         do\r
480         {\r
481                 cl_status = cl_waitobj_wait_on( cq_tinfo->cq_waitobj, EVENT_NO_TIMEOUT, TRUE );\r
482                 if( cl_status != CL_SUCCESS )\r
483                 {\r
484                         IBSP_ERROR(\r
485                                 ("cl_waitobj_wait_on() (%d)\n", cl_status) );\r
486                 }\r
487 \r
488                 /* \r
489                  * TODO: By rearranging thread creation and cq creation, this check\r
490                  * may be eliminated.\r
491                  */\r
492                 if( cq_tinfo->cq != NULL )\r
493                 {\r
494                         fzprint(("%s():%d:0x%x:0x%x: Calling ib_cq_comp().\n", __FUNCTION__,\r
495                                          __LINE__, GetCurrentProcessId(), GetCurrentThreadId()));\r
496 \r
497 #ifdef PERFMON_ENABLED\r
498                         InterlockedIncrement64( &g_pm_stat.pdata[INTR_TOTAL] );\r
499 #endif\r
500                         i = g_max_poll;\r
501                         do\r
502                         {\r
503                                 if( ib_cq_comp( cq_tinfo ) )\r
504                                         i = g_max_poll;\r
505 \r
506                         } while( i-- );\r
507 \r
508                         fzprint(("%s():%d:0x%x:0x%x: Done calling ib_cq_comp().\n", __FUNCTION__,\r
509                                          __LINE__, GetCurrentProcessId(), GetCurrentThreadId()));\r
510 \r
511                         status = ib_rearm_cq( cq_tinfo->cq, FALSE );\r
512                         if( status != IB_SUCCESS )\r
513                         {\r
514                                 IBSP_ERROR(\r
515                                         ("ib_rearm_cq returned %s)\n", ib_get_err_str( status )) );\r
516                         }\r
517                 }\r
518 \r
519         } while( !cq_tinfo->ib_cq_thread_exit_wanted );\r
520 \r
521         cl_status = cl_waitobj_destroy( cq_tinfo->cq_waitobj );\r
522         if( cl_status != CL_SUCCESS )\r
523         {\r
524                 IBSP_ERROR(\r
525                         ("cl_waitobj_destroy() returned %s\n", CL_STATUS_MSG(cl_status)) );\r
526         }\r
527         HeapFree( g_ibsp.heap, 0, cq_tinfo );\r
528 \r
529         /* No special exit code, even on errors. */\r
530         IBSP_EXIT( IBSP_DBG_HW );\r
531         ExitThread( 0 );\r
532 }\r
533 \r
534 \r
535 /* Called with the HCA's CQ lock held. */\r
536 static struct cq_thread_info *\r
537 ib_alloc_cq_tinfo(\r
538                                         struct ibsp_hca                         *hca )\r
539 {\r
540         struct cq_thread_info *cq_tinfo = NULL;\r
541         ib_cq_create_t cq_create;\r
542         ib_api_status_t status;\r
543         cl_status_t cl_status;\r
544 \r
545         IBSP_ENTER( IBSP_DBG_HW );\r
546 \r
547         cq_tinfo = HeapAlloc(\r
548                 g_ibsp.heap, HEAP_ZERO_MEMORY, sizeof(struct cq_thread_info) );\r
549 \r
550         if( !cq_tinfo )\r
551         {\r
552                 IBSP_ERROR_EXIT( ("HeapAlloc() Failed.\n") );\r
553                 return NULL;\r
554         }\r
555 \r
556         cl_status = cl_waitobj_create( FALSE, &cq_tinfo->cq_waitobj );\r
557         if( cl_status != CL_SUCCESS )\r
558         {\r
559                 cq_tinfo->cq_waitobj = NULL;\r
560                 ib_destroy_cq_tinfo( cq_tinfo );\r
561                 IBSP_ERROR_EXIT(\r
562                         ("cl_waitobj_create() returned %s\n", CL_STATUS_MSG(cl_status)) );\r
563                 return NULL;\r
564         }\r
565 \r
566         cq_tinfo->hca = hca;\r
567         cq_tinfo->ib_cq_thread_exit_wanted = FALSE;\r
568 \r
569         cq_tinfo->ib_cq_thread = CreateThread( NULL, 0, ib_cq_thread, cq_tinfo, 0, (LPDWORD)&cq_tinfo->ib_cq_thread_id );\r
570 \r
571         if( cq_tinfo->ib_cq_thread == NULL )\r
572         {\r
573                 ib_destroy_cq_tinfo( cq_tinfo );\r
574                 IBSP_ERROR_EXIT( ("CreateThread failed (%d)", GetLastError()) );\r
575                 return NULL;\r
576         }\r
577 \r
578         STAT_INC( thread_num );\r
579 \r
580         /* Completion queue */\r
581         cq_create.size = IB_CQ_SIZE;\r
582 \r
583         cq_create.pfn_comp_cb = NULL;\r
584         cq_create.h_wait_obj = cq_tinfo->cq_waitobj;\r
585 \r
586         status = ib_create_cq( hca->hca_handle, &cq_create, cq_tinfo,\r
587                 NULL, &cq_tinfo->cq );\r
588         if( status )\r
589         {\r
590                 ib_destroy_cq_tinfo( cq_tinfo );\r
591                 IBSP_ERROR_EXIT(\r
592                         ("ib_create_cq returned %s\n", ib_get_err_str( status )) );\r
593                 return NULL;\r
594         }\r
595 \r
596         STAT_INC( cq_num );\r
597 \r
598         status = ib_rearm_cq( cq_tinfo->cq, FALSE );\r
599         if( status )\r
600         {\r
601                 ib_destroy_cq_tinfo( cq_tinfo );\r
602                 IBSP_ERROR_EXIT(\r
603                         ("ib_rearm_cq returned %s\n", ib_get_err_str( status )) );\r
604                 return NULL;\r
605         }\r
606 \r
607         cq_tinfo->cqe_size = IB_CQ_SIZE;\r
608 \r
609         if( hca->cq_tinfo )\r
610         {\r
611                 __cl_primitive_insert(\r
612                         &hca->cq_tinfo->list_item, &cq_tinfo->list_item );\r
613         }\r
614         else\r
615         {\r
616                 /* Setup the list entry to point to itself. */\r
617                 cq_tinfo->list_item.p_next = &cq_tinfo->list_item;\r
618                 cq_tinfo->list_item.p_prev = &cq_tinfo->list_item;\r
619         }\r
620 \r
621         /* Upon allocation, the new CQ becomes the primary. */\r
622         hca->cq_tinfo = cq_tinfo;\r
623 \r
624         IBSP_EXIT( IBSP_DBG_HW );\r
625         return (cq_tinfo);\r
626 }\r
627 \r
628 \r
629 void\r
630 ib_destroy_cq_tinfo(\r
631                                         struct cq_thread_info           *cq_tinfo )\r
632 {\r
633         ib_wc_t wclist;\r
634         ib_wc_t *free_wclist;\r
635         ib_wc_t *done_wclist;\r
636         ib_api_status_t status;\r
637         HANDLE h_cq_thread;\r
638 \r
639         IBSP_ENTER( IBSP_DBG_HW );\r
640 \r
641         CL_ASSERT( cq_tinfo );\r
642         CL_ASSERT( cq_tinfo->qp_count == 0 );\r
643 \r
644         if( cq_tinfo->cq )\r
645         {\r
646                 wclist.p_next = NULL;\r
647                 free_wclist = &wclist;\r
648 \r
649                 while( ib_poll_cq(\r
650                         cq_tinfo->cq, &free_wclist, &done_wclist ) == IB_SUCCESS )\r
651                 {\r
652                         IBSP_TRACE1( IBSP_DBG_WQ,\r
653                                 ("free=%p, done=%p\n", free_wclist, done_wclist) );\r
654                 }\r
655 \r
656                 IBSP_TRACE4( IBSP_DBG_WQ, ("ib_destroy_cq() start..\n") );\r
657 \r
658                 /*\r
659                  * Called from cleanup thread, okay to block.\r
660                  */\r
661                 status = ib_destroy_cq( cq_tinfo->cq, ib_sync_destroy );\r
662                 if( status )\r
663                 {\r
664                         IBSP_ERROR(\r
665                                 ("ib_destroy_cq returned %s\n", ib_get_err_str( status )) );\r
666                 }\r
667                 else\r
668                 {\r
669                         IBSP_TRACE4( IBSP_DBG_WQ, ("ib_destroy_cq() finished.\n") );\r
670 \r
671                         cq_tinfo->cq = NULL;\r
672 \r
673                         STAT_DEC( cq_num );\r
674                 }\r
675         }\r
676 \r
677         if( cq_tinfo->ib_cq_thread )\r
678         {\r
679                 /* ib_cq_thread() will release the cq_tinfo before exit. Don't\r
680                    reference cq_tinfo after signaling  */\r
681                 h_cq_thread = cq_tinfo->ib_cq_thread;\r
682                 cq_tinfo->ib_cq_thread = NULL;\r
683 \r
684                 cq_tinfo->ib_cq_thread_exit_wanted = TRUE;\r
685                 cl_waitobj_signal( cq_tinfo->cq_waitobj );\r
686 \r
687                 /* Wait for ib_cq_thread to die, if we are not running on it */\r
688                 if( GetCurrentThreadId() != cq_tinfo->ib_cq_thread_id )\r
689                 {\r
690                         fzprint(("%s():%d:0x%x:0x%x: Waiting for ib_cq_thread=0x%x to die\n",\r
691                                          __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(),\r
692                                          cq_tinfo->ib_cq_thread_id ));\r
693                         if( WaitForSingleObject( h_cq_thread, INFINITE ) != WAIT_OBJECT_0 )\r
694                         {\r
695                                 IBSP_ERROR( ("WaitForSingleObject failed\n") );\r
696                         }\r
697                         else\r
698                         {\r
699                                 STAT_DEC( thread_num );\r
700                         }\r
701                 }\r
702                 else\r
703                 {\r
704                         fzprint(("%s():%d:0x%x:0x%x: Currently on ib_cq_thread.\n", __FUNCTION__,\r
705                                          __LINE__, GetCurrentProcessId(), GetCurrentThreadId()));\r
706                         STAT_DEC( thread_num );\r
707                 }\r
708                 CloseHandle( h_cq_thread );\r
709         }\r
710         else\r
711         {\r
712                 /* There was no thread created, destroy cq_waitobj and\r
713                    free memory */\r
714                 if( cq_tinfo->cq_waitobj )\r
715                 {\r
716                         cl_waitobj_destroy( cq_tinfo->cq_waitobj );\r
717                         cq_tinfo->cq_waitobj = NULL;\r
718                 }\r
719                 HeapFree( g_ibsp.heap, 0, cq_tinfo );\r
720         }\r
721 \r
722         IBSP_EXIT( IBSP_DBG_HW );\r
723 }\r
724 \r
725 \r
726 static struct cq_thread_info *\r
727 ib_acquire_cq_tinfo(\r
728                                         struct ibsp_hca                         *hca )\r
729 {\r
730         struct cq_thread_info   *cq_tinfo = NULL;\r
731         uint32_t                                cqe_size;\r
732         ib_api_status_t                 status;\r
733 \r
734         IBSP_ENTER( IBSP_DBG_HW );\r
735 \r
736         cl_spinlock_acquire( &hca->cq_lock );\r
737 \r
738         if( !hca->cq_tinfo )\r
739         {\r
740                 cq_tinfo = ib_alloc_cq_tinfo( hca );\r
741                 if( !cq_tinfo )\r
742                 {\r
743                         IBSP_ERROR_EXIT( ("ib_alloc_cq_tinfo() failed\n") );\r
744                         cl_spinlock_release( &hca->cq_lock );\r
745                         return (NULL);\r
746                 }\r
747         }\r
748         else\r
749         {\r
750                 cq_tinfo = hca->cq_tinfo;\r
751         }\r
752 \r
753         CL_ASSERT( cq_tinfo != NULL );\r
754 \r
755         cqe_size = (cq_tinfo->qp_count + 1) * IB_CQ_SIZE;\r
756 \r
757         if( cq_tinfo->cqe_size < cqe_size )\r
758         {\r
759                 status = ib_modify_cq( cq_tinfo->cq, &cqe_size );\r
760                 switch( status )\r
761                 {\r
762                 case IB_INVALID_CQ_SIZE:\r
763                         cq_tinfo = ib_alloc_cq_tinfo( hca );\r
764                         if( !cq_tinfo )\r
765                                 break;\r
766 \r
767                         cq_tinfo->qp_count++;\r
768                         break;\r
769 \r
770                 case IB_SUCCESS:\r
771                         cq_tinfo->cqe_size = cqe_size;\r
772 \r
773                         cq_tinfo->qp_count++;\r
774 \r
775                         fzprint(("%s():%d:0x%x:0x%x: New cq size=%d.\n",\r
776                                          __FUNCTION__,\r
777                                          __LINE__, GetCurrentProcessId(),\r
778                                          GetCurrentThreadId(), cq_tinfo->cqe_size));\r
779                         break;\r
780 \r
781                 default:\r
782                         IBSP_ERROR_EXIT(\r
783                                 ("ib_modify_cq() returned %s\n", ib_get_err_str(status)) );\r
784                         cq_tinfo = NULL;\r
785                 }\r
786         }\r
787         else\r
788         {\r
789                 cq_tinfo->qp_count++;\r
790         }\r
791 \r
792         cl_spinlock_release( &hca->cq_lock );\r
793         IBSP_EXIT( IBSP_DBG_HW );\r
794         return (cq_tinfo);\r
795 }\r
796 \r
797 void\r
798 ib_release_cq_tinfo(\r
799                                         struct cq_thread_info           *cq_tinfo )\r
800 {\r
801         IBSP_ENTER( IBSP_DBG_HW );\r
802 \r
803         CL_ASSERT( cq_tinfo );\r
804         CL_ASSERT( cq_tinfo->hca );\r
805 \r
806         cl_spinlock_acquire( &cq_tinfo->hca->cq_lock );\r
807         /* If this CQ now has fewer QPs than the primary, make it the primary. */\r
808         if( --cq_tinfo->qp_count < cq_tinfo->hca->cq_tinfo->qp_count )\r
809                 cq_tinfo->hca->cq_tinfo = cq_tinfo;\r
810         cl_spinlock_release( &cq_tinfo->hca->cq_lock );\r
811 \r
812         IBSP_EXIT( IBSP_DBG_HW );\r
813 }\r
814 \r
815 \r
816 /* Release IB ressources. */\r
817 void\r
818 ib_release(void)\r
819 {\r
820         cl_fmap_item_t                  *p_item;\r
821 \r
822         IBSP_ENTER( IBSP_DBG_HW );\r
823 \r
824         if( g_ibsp.al_handle )\r
825         {\r
826                 cl_list_item_t *item;\r
827                 ib_api_status_t status;\r
828 \r
829                 unregister_pnp();\r
830 \r
831                 while( (item = cl_qlist_head( &g_ibsp.hca_list )) != cl_qlist_end( &g_ibsp.hca_list ) )\r
832                 {\r
833                         struct ibsp_hca *hca = PARENT_STRUCT(item, struct ibsp_hca, item);\r
834 \r
835                         pnp_ca_remove( hca );\r
836                 }\r
837 \r
838                 fzprint(("%s():%d:0x%x:0x%x: Calling ib_close_al...\n", __FUNCTION__,\r
839                                  __LINE__, GetCurrentProcessId(), GetCurrentThreadId()));\r
840 \r
841                 status = ib_close_al( g_ibsp.al_handle );\r
842 \r
843                 fzprint(("%s():%d:0x%x:0x%x: Done calling ib_close_al, status=%d.\n",\r
844                                  __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(),\r
845                                  status));\r
846                 if( status != IB_SUCCESS )\r
847                 {\r
848                         IBSP_ERROR(\r
849                                 ("ib_close_al returned %s\n", ib_get_err_str( status )) );\r
850                 }\r
851                 else\r
852                 {\r
853                         IBSP_TRACE( IBSP_DBG_HW, ("ib_close_al success\n") );\r
854                         STAT_DEC( al_num );\r
855                 }\r
856                 g_ibsp.al_handle = NULL;\r
857         }\r
858 \r
859         for( p_item = cl_fmap_head( &g_ibsp.ip_map );\r
860                 p_item != cl_fmap_end( &g_ibsp.ip_map );\r
861                 p_item = cl_fmap_head( &g_ibsp.ip_map ) )\r
862         {\r
863                 cl_fmap_remove_item( &g_ibsp.ip_map, p_item );\r
864 \r
865                 HeapFree( g_ibsp.heap, 0,\r
866                         PARENT_STRUCT(p_item, struct ibsp_ip_addr, item) );\r
867         }\r
868 \r
869         IBSP_EXIT( IBSP_DBG_HW );\r
870 }\r
871 \r
872 \r
873 /* Initialize IB ressources. */\r
874 int\r
875 ibsp_initialize(void)\r
876 {\r
877         ib_api_status_t status;\r
878         int ret;\r
879 \r
880         IBSP_ENTER( IBSP_DBG_HW );\r
881 \r
882         CL_ASSERT( g_ibsp.al_handle == NULL );\r
883         CL_ASSERT( cl_qlist_count( &g_ibsp.hca_list ) == 0 );\r
884 \r
885         /* Open the IB library */\r
886         status = ib_open_al( &g_ibsp.al_handle );\r
887 \r
888         IBSP_TRACE( IBSP_DBG_HW, ("open is %d %p\n", status, g_ibsp.al_handle) );\r
889 \r
890         if( status != IB_SUCCESS )\r
891         {\r
892                 IBSP_ERROR( ("ib_open_al failed (%d)\n", status) );\r
893                 ret = WSAEPROVIDERFAILEDINIT;\r
894                 goto done;\r
895         }\r
896 \r
897         STAT_INC( al_num );\r
898 \r
899         /* Register for PNP events */\r
900         status = register_pnp();\r
901         if( status )\r
902         {\r
903                 IBSP_ERROR( ("register_pnp failed (%d)\n", status) );\r
904                 ret = WSAEPROVIDERFAILEDINIT;\r
905                 goto done;\r
906         }\r
907 \r
908         STAT_INC( thread_num );\r
909 \r
910         ret = 0;\r
911 done:\r
912         if( ret )\r
913         {\r
914                 /* Free up resources. */\r
915                 ib_release();\r
916         }\r
917 \r
918         IBSP_EXIT( IBSP_DBG_HW );\r
919 \r
920         return ret;\r
921 }\r
922 \r
923 \r
924 /* Destroys the infiniband ressources of a socket. */\r
925 void\r
926 ib_destroy_socket(\r
927         IN      OUT                     struct ibsp_socket_info         *socket_info )\r
928 {\r
929         ib_api_status_t status;\r
930 \r
931         IBSP_ENTER( IBSP_DBG_EP );\r
932 \r
933         if( socket_info->qp )\r
934         {\r
935                 cl_atomic_inc( &socket_info->ref_cnt );\r
936                 status = ib_destroy_qp( socket_info->qp, deref_socket_info );\r
937                 if( status != IB_SUCCESS )\r
938                 {\r
939                         IBSP_ERROR( ("ib_destroy_qp returned %s\n",\r
940                                 ib_get_err_str( status )) );\r
941                         deref_socket_info( socket_info );\r
942                 }\r
943 \r
944                 ib_release_cq_tinfo( socket_info->cq_tinfo );\r
945 \r
946                 socket_info->qp = NULL;\r
947         }\r
948 \r
949         IBSP_EXIT( IBSP_DBG_EP );\r
950 }\r
951 \r
952 \r
953 /*\r
954  * Creates the necessary IB ressources for a socket\r
955  */\r
956 int\r
957 ib_create_socket(\r
958         IN      OUT                     struct ibsp_socket_info         *socket_info)\r
959 {\r
960         ib_qp_create_t                  qp_create;\r
961         ib_api_status_t                 status;\r
962         ib_qp_attr_t                    qp_attr;\r
963 \r
964         IBSP_ENTER( IBSP_DBG_EP );\r
965 \r
966         CL_ASSERT( socket_info != NULL );\r
967         CL_ASSERT( socket_info->port != NULL );\r
968         CL_ASSERT( socket_info->qp == NULL );\r
969 \r
970         socket_info->hca_pd = socket_info->port->hca->pd;\r
971 \r
972         /* Get the completion queue and thread info for this socket */\r
973         socket_info->cq_tinfo = ib_acquire_cq_tinfo( socket_info->port->hca );\r
974         if( !socket_info->cq_tinfo )\r
975         {\r
976                 IBSP_ERROR_EXIT( ("ib_acquire_cq_tinfo failed\n") );\r
977                 return WSAENOBUFS;\r
978         }\r
979 \r
980         /* Queue pair */\r
981         qp_create.qp_type = IB_QPT_RELIABLE_CONN;\r
982         qp_create.sq_depth = QP_ATTRIB_SQ_DEPTH;\r
983         qp_create.rq_depth = QP_ATTRIB_RQ_DEPTH;\r
984         qp_create.sq_sge = QP_ATTRIB_SQ_SGE;\r
985         qp_create.rq_sge = 1;\r
986         qp_create.h_rq_cq = socket_info->cq_tinfo->cq;\r
987         qp_create.h_sq_cq = socket_info->cq_tinfo->cq;\r
988         qp_create.sq_signaled = TRUE;\r
989 \r
990         status = ib_create_qp( socket_info->hca_pd, &qp_create, socket_info,    /* context */\r
991                 NULL,   /* async handler */\r
992                 &socket_info->qp );\r
993         if( status )\r
994         {\r
995                 ib_release_cq_tinfo( socket_info->cq_tinfo );\r
996                 IBSP_ERROR_EXIT(\r
997                         ("ib_create_qp returned %s\n", ib_get_err_str( status )) );\r
998                 return WSAENOBUFS;\r
999         }\r
1000 \r
1001         status = ib_query_qp( socket_info->qp, &qp_attr );\r
1002         if( status == IB_SUCCESS )\r
1003         {\r
1004                 socket_info->max_inline = min( g_max_inline, qp_attr.sq_max_inline );\r
1005         }\r
1006         else\r
1007         {\r
1008                 IBSP_ERROR( ("ib_query_qp returned %s\n", ib_get_err_str( status )) );\r
1009                 socket_info->max_inline = 0;\r
1010         }\r
1011 \r
1012         STAT_INC( qp_num );\r
1013 \r
1014         IBSP_EXIT( IBSP_DBG_EP );\r
1015         return 0;\r
1016 }\r
1017 \r
1018 \r
1019 void\r
1020 wait_cq_drain(\r
1021         IN      OUT                     struct ibsp_socket_info         *socket_info )\r
1022 {\r
1023         IBSP_ENTER( IBSP_DBG_EP );\r
1024 \r
1025         if( socket_info->cq_tinfo == NULL )\r
1026         {\r
1027                 IBSP_EXIT( IBSP_DBG_EP );\r
1028                 return;\r
1029         }\r
1030 \r
1031         /* Wait for the QP to be drained. */\r
1032         while( socket_info->send_cnt || socket_info->recv_cnt )\r
1033         {\r
1034                 fzprint(("%s():%d:0x%x:0x%x: socket=0x%p wr_list_count=%d qp state=%d\n",\r
1035                                  __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(),\r
1036                                  socket_info, cl_qlist_count(&socket_info->wr_list)));\r
1037 \r
1038                 Sleep(100);\r
1039         }\r
1040 \r
1041         IBSP_EXIT( IBSP_DBG_EP );\r
1042 }\r
1043 \r
1044 \r
1045 void\r
1046 ibsp_dup_overlap_abort(\r
1047         IN      OUT                     struct ibsp_socket_info         *socket_info )\r
1048 {\r
1049         LPWSAOVERLAPPED lpOverlapped = NULL;\r
1050         int error;\r
1051         int ret;\r
1052         uint8_t                         idx;\r
1053 \r
1054         IBSP_ENTER( IBSP_DBG_EP );\r
1055         CL_ASSERT( !socket_info->send_cnt && !socket_info->recv_cnt );\r
1056 \r
1057         /* Browse the list of all posted overlapped structures\r
1058          * to mark them as aborted. */\r
1059         idx = socket_info->dup_idx - (uint8_t)socket_info->dup_cnt;\r
1060         if( idx >= QP_ATTRIB_RQ_DEPTH )\r
1061                 idx += QP_ATTRIB_RQ_DEPTH;\r
1062 \r
1063         while( socket_info->dup_cnt )\r
1064         {\r
1065                 lpOverlapped = socket_info->dup_wr[idx].wr.lpOverlapped;\r
1066 \r
1067                 fzprint(("%s():%d:0x%x:0x%x: socket=0x%p wr=0x%p overlapped=0x%p Internal=%d InternalHigh=%d hEvent=%d\n",\r
1068                         __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(), socket_info, &socket_info->dup_wr[idx], lpOverlapped, lpOverlapped->Internal, lpOverlapped->InternalHigh, lpOverlapped->hEvent));\r
1069 \r
1070                 lpOverlapped->OffsetHigh = WSAECONNABORTED;\r
1071                 lpOverlapped->InternalHigh = 0;\r
1072 \r
1073                 if( ((uintptr_t) lpOverlapped->hEvent) & 0x00000001 )\r
1074                 {\r
1075                         /* Indicate this operation is complete. The switch will poll\r
1076                          * with calls to WSPGetOverlappedResult(). */\r
1077 #ifdef _DEBUG_\r
1078                         cl_atomic_dec(&g_ibsp.overlap_h1_comp_count);\r
1079 \r
1080                         fzprint(("%s():%d:0x%x:0x%x: ov=0x%p h0=%d h1=%d h1_c=%d send=%d recv=%d\n",\r
1081                                          __FUNCTION__, __LINE__, GetCurrentProcessId(),\r
1082                                          GetCurrentThreadId(), lpOverlapped,\r
1083                                          g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count,\r
1084                                          g_ibsp.overlap_h1_comp_count, g_ibsp.send_count, g_ibsp.recv_count));\r
1085 #endif\r
1086 \r
1087                         CL_TRACE(IBSP_DBG_WQ, gdbg_lvl,\r
1088                                          ("%s: set internal overlapped=0x%p Internal=%d OffsetHigh=%d\n",\r
1089                                           __FUNCTION__, lpOverlapped, lpOverlapped->Internal,\r
1090                                           lpOverlapped->OffsetHigh));\r
1091 \r
1092                         lpOverlapped->Internal = 0;\r
1093                 }\r
1094                 else\r
1095                 {\r
1096 #ifdef _DEBUG_\r
1097                         cl_atomic_dec(&g_ibsp.overlap_h0_count);\r
1098 \r
1099 \r
1100                         fzprint(("%s():%d:0x%x:0x%x: ov=0x%p h0=%d h1=%d h1_c=%d send=%d recv=%d\n",\r
1101                                          __FUNCTION__, __LINE__, GetCurrentProcessId(),\r
1102                                          GetCurrentThreadId(), lpOverlapped,\r
1103                                          g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count,\r
1104                                          g_ibsp.overlap_h1_comp_count, g_ibsp.send_count, g_ibsp.recv_count));\r
1105 #endif\r
1106                         CL_TRACE(IBSP_DBG_WQ, gdbg_lvl,\r
1107                                          ("%s: calls lpWPUCompleteOverlappedRequest, overlapped=0x%p OffsetHigh=%d InternalHigh=%d hEvent=%d\n",\r
1108                                           __FUNCTION__, lpOverlapped, lpOverlapped->OffsetHigh,\r
1109                                           lpOverlapped->InternalHigh, lpOverlapped->hEvent));\r
1110 \r
1111                         ret = g_ibsp.up_call_table.lpWPUCompleteOverlappedRequest\r
1112                                 (socket_info->switch_socket,\r
1113                                  lpOverlapped,\r
1114                                  lpOverlapped->OffsetHigh, (DWORD) lpOverlapped->InternalHigh, &error);\r
1115 \r
1116                         if( ret != 0 )\r
1117                         {\r
1118                                 CL_ERROR(IBSP_DBG_EP, gdbg_lvl,\r
1119                                                  ("lpWPUCompleteOverlappedRequest failed with %d/%d\n", ret,\r
1120                                                   error));\r
1121                         }\r
1122                 }\r
1123                 cl_atomic_dec( &socket_info->dup_cnt );\r
1124         }\r
1125 \r
1126         IBSP_EXIT( IBSP_DBG_EP );\r
1127 }\r
1128 \r
1129 \r
1130 /* Closes a connection and release its ressources. */\r
1131 void\r
1132 shutdown_and_destroy_socket_info(\r
1133         IN      OUT                     struct ibsp_socket_info         *socket_info )\r
1134 {\r
1135         enum ibsp_socket_state  old_state;\r
1136 \r
1137         IBSP_ENTER( IBSP_DBG_EP );\r
1138 \r
1139         cl_spinlock_acquire( &socket_info->mutex );\r
1140         old_state = socket_info->socket_state;\r
1141         IBSP_CHANGE_SOCKET_STATE( socket_info, IBSP_CLOSED );\r
1142         cl_spinlock_release( &socket_info->mutex );\r
1143 \r
1144         if( socket_info->listen.handle )\r
1145         {\r
1146                 /* Stop listening and reject queued connections. */\r
1147                 ib_listen_cancel( socket_info );\r
1148         }\r
1149 \r
1150         cl_spinlock_acquire( &g_ibsp.socket_info_mutex );\r
1151         cl_qlist_remove_item( &g_ibsp.socket_info_list, &socket_info->item );\r
1152 \r
1153         switch( old_state )\r
1154         {\r
1155         case IBSP_CREATE:\r
1156         case IBSP_LISTEN:\r
1157                 /* Nothing to do. */\r
1158                 break;\r
1159 \r
1160         case IBSP_CONNECTED:\r
1161                 {\r
1162                         struct disconnect_reason reason;\r
1163 \r
1164                         memset( &reason, 0, sizeof(reason) );\r
1165                         reason.type = DISC_SHUTDOWN;\r
1166                         ib_disconnect( socket_info, &reason );\r
1167                 }\r
1168                 /* Fall through. */\r
1169 \r
1170         case IBSP_CONNECT:\r
1171         case IBSP_DISCONNECTED:\r
1172                 /* We changed the state - remove from connection map. */\r
1173                 CL_ASSERT( socket_info->conn_item.p_map );\r
1174                 cl_rbmap_remove_item( &g_ibsp.conn_map, &socket_info->conn_item );\r
1175                 break;\r
1176         }\r
1177         cl_spinlock_release( &g_ibsp.socket_info_mutex );\r
1178 \r
1179         /* Flush all completions. */\r
1180         if( socket_info->dup_cnt )\r
1181                 ibsp_dup_overlap_abort( socket_info );\r
1182 \r
1183         while( socket_info->send_cnt || socket_info->recv_cnt )\r
1184                 ib_cq_comp( socket_info->cq_tinfo );\r
1185 \r
1186         ibsp_dereg_socket( socket_info );\r
1187 \r
1188         ib_destroy_socket( socket_info );\r
1189 \r
1190 #ifdef IBSP_LOGGING\r
1191         DataLogger_Shutdown(&socket_info->SendDataLogger);\r
1192         DataLogger_Shutdown(&socket_info->RecvDataLogger);\r
1193 #endif\r
1194 \r
1195         /* Release the initial reference and clean up. */\r
1196         deref_socket_info( socket_info );\r
1197 \r
1198         IBSP_EXIT( IBSP_DBG_EP );\r
1199 }\r
1200 \r
1201 \r
1202 boolean_t\r
1203 ibsp_conn_insert(\r
1204         IN                              struct ibsp_socket_info         *s )\r
1205 {\r
1206         struct ibsp_socket_info         *p_sock;\r
1207         cl_rbmap_item_t                         *p_item, *p_insert_at;\r
1208         boolean_t                                       left = TRUE;\r
1209 \r
1210         p_item = cl_rbmap_root( &g_ibsp.conn_map );\r
1211         p_insert_at = p_item;\r
1212 \r
1213         cl_spinlock_acquire( &g_ibsp.socket_info_mutex );\r
1214         CL_ASSERT( !s->conn_item.p_map );\r
1215         while( p_item != cl_rbmap_end( &g_ibsp.conn_map ) )\r
1216         {\r
1217                 p_insert_at = p_item;\r
1218                 p_sock = PARENT_STRUCT( p_item, struct ibsp_socket_info, conn_item );\r
1219                 if( p_sock->local_addr.sin_family < s->local_addr.sin_family )\r
1220                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1221                 else if( p_sock->local_addr.sin_family > s->local_addr.sin_family )\r
1222                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1223                 else if( p_sock->local_addr.sin_addr.S_un.S_addr < s->local_addr.sin_addr.S_un.S_addr )\r
1224                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1225                 else if( p_sock->local_addr.sin_addr.S_un.S_addr > s->local_addr.sin_addr.S_un.S_addr )\r
1226                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1227                 else if( p_sock->local_addr.sin_port < s->local_addr.sin_port )\r
1228                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1229                 else if( p_sock->local_addr.sin_port > s->local_addr.sin_port )\r
1230                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1231                 else if( p_sock->peer_addr.sin_family < s->peer_addr.sin_family )\r
1232                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1233                 else if( p_sock->peer_addr.sin_family > s->peer_addr.sin_family )\r
1234                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1235                 else if( p_sock->peer_addr.sin_addr.S_un.S_addr < s->peer_addr.sin_addr.S_un.S_addr )\r
1236                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1237                 else if( p_sock->peer_addr.sin_addr.S_un.S_addr > s->peer_addr.sin_addr.S_un.S_addr )\r
1238                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1239                 else if( p_sock->peer_addr.sin_port < s->peer_addr.sin_port )\r
1240                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1241                 else if( p_sock->peer_addr.sin_port > s->peer_addr.sin_port )\r
1242                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1243                 else\r
1244                         goto done;\r
1245         }\r
1246 \r
1247         cl_rbmap_insert( &g_ibsp.conn_map, p_insert_at, &s->conn_item, left );\r
1248 \r
1249 done:\r
1250         cl_spinlock_release( &g_ibsp.socket_info_mutex );\r
1251         return p_item == cl_rbmap_end( &g_ibsp.conn_map );\r
1252 }\r
1253 \r
1254 \r
1255 void\r
1256 ibsp_conn_remove(\r
1257         IN                              struct ibsp_socket_info         *s )\r
1258 {\r
1259         cl_spinlock_acquire( &g_ibsp.socket_info_mutex );\r
1260         CL_ASSERT( s->conn_item.p_map );\r
1261         cl_rbmap_remove_item( &g_ibsp.conn_map, &s->conn_item );\r
1262         cl_spinlock_release( &g_ibsp.socket_info_mutex );\r
1263 }\r