87c318e10e67802533e149adcb95ef20d6fbd977
[mirror/winof/.git] / ulp / wsd / user / ibsp_iblow.c
1 /*\r
2  * Copyright (c) 2005 SilverStorm Technologies.  All rights reserved.\r
3  *\r
4  * This software is available to you under the OpenIB.org BSD license\r
5  * below:\r
6  *\r
7  *     Redistribution and use in source and binary forms, with or\r
8  *     without modification, are permitted provided that the following\r
9  *     conditions are met:\r
10  *\r
11  *      - Redistributions of source code must retain the above\r
12  *        copyright notice, this list of conditions and the following\r
13  *        disclaimer.\r
14  *\r
15  *      - Redistributions in binary form must reproduce the above\r
16  *        copyright notice, this list of conditions and the following\r
17  *        disclaimer in the documentation and/or other materials\r
18  *        provided with the distribution.\r
19  *\r
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
21  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
22  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\r
23  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
24  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
25  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
26  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
27  * SOFTWARE.\r
28  *\r
29  * $Id$\r
30  */\r
31 \r
32 #include "ibspdll.h"\r
33 \r
34 \r
35 typedef struct _io_comp_info\r
36 {\r
37         struct ibsp_socket_info *p_socket;\r
38         LPWSAOVERLAPPED                 p_ov;\r
39 \r
40 } io_comp_info_t;\r
41 \r
42 \r
43 /* Work queue entry completion routine. */\r
44 static void\r
45 complete_wq(\r
46         IN              const   ib_wc_t                                         *wc,\r
47                 OUT                     io_comp_info_t                          *p_io_info )\r
48 {\r
49         struct _wr                              *wr = NULL;\r
50         struct _recv_wr                 *p_recv_wr = NULL;\r
51         LPWSAOVERLAPPED                 lpOverlapped = NULL;\r
52         struct ibsp_socket_info *socket_info = NULL;\r
53 \r
54         IBSP_ENTER( IBSP_DBG_IO );\r
55 \r
56         wr = (struct _wr * __ptr64)wc->wr_id;\r
57         p_recv_wr = (struct _recv_wr * __ptr64)wc->wr_id;\r
58 \r
59         CL_ASSERT( wr );\r
60 \r
61         socket_info = wr->socket_info;\r
62         p_io_info->p_socket = socket_info;\r
63 \r
64         lpOverlapped = wr->lpOverlapped;\r
65 \r
66         IBSP_TRACE4( IBSP_DBG_IO,\r
67                 ("socket %p, ov %p, work completion status=%s, wc_type=%s\n",\r
68                 socket_info, lpOverlapped, ib_get_wc_status_str( wc->status ),\r
69                 ib_get_wc_type_str( wc->wc_type )) );\r
70 \r
71         /* Set the windows error code. It's not easy to find an easy\r
72          * correspondence between the IBAL error codes and windows error\r
73          * codes; but it probably does not matter, as long as it returns an\r
74          * error. */\r
75         switch( wc->status )\r
76         {\r
77         case IB_WCS_SUCCESS:\r
78                 /*\r
79                  * Set the length of the operation. Under Infiniband, the work\r
80                  * completion length is only valid for a receive\r
81                  * operation. Fortunately we had already set the length during the\r
82                  * send operation. \r
83                  *\r
84                  * lpWPUCompleteOverlappedRequest is supposed to store the length\r
85                  * into InternalHigh, however it will not be called if the low\r
86                  * order bit of lpOverlapped->hEvent is set. So we do it and hope\r
87                  * for the best. \r
88                  *\r
89                  * NOTE: Without a valid length, the switch doesn't seem to call \r
90                  * GetOverlappedResult() even if we call lpWPUCompleteOverlappedRequest()\r
91                  */\r
92                 if( wc->wc_type == IB_WC_RECV )\r
93                 {\r
94                         lpOverlapped->InternalHigh = wc->length;\r
95 \r
96 #ifdef IBSP_LOGGING\r
97                         cl_spinlock_acquire( &socket_info->recv_lock );\r
98                         DataLogger_WriteData(&socket_info->RecvDataLogger,\r
99                                 p_recv_wr->idx, (void * __ptr64)p_recv_wr->ds_array[0].vaddr,\r
100                                 wc->length);\r
101                         cl_spinlock_release( &socket_info->recv_lock );\r
102 #endif\r
103                 }\r
104 \r
105                 lpOverlapped->OffsetHigh = 0;\r
106                 break;\r
107 \r
108         case IB_WCS_WR_FLUSHED_ERR:\r
109                 cl_spinlock_acquire( &socket_info->mutex );\r
110 \r
111                 if( socket_info->socket_state == IBSP_DUPLICATING_REMOTE &&\r
112                         wc->wc_type == IB_WC_RECV )\r
113                 {\r
114                         /*\r
115                          * Take the wr off the wr_list, and place onto the\r
116                          * dup_wr_list.  We will post them later on the new QP. \r
117                          */\r
118                         cl_spinlock_acquire( &socket_info->recv_lock );\r
119 \r
120                         /* Copy to the duplicate WR array. */\r
121                         socket_info->dup_wr[socket_info->dup_idx] = *p_recv_wr;\r
122 \r
123 #if QP_ATTRIB_RQ_DEPTH == 256 || QP_ATTRIB_RQ_DEPTH == 128 || \\r
124         QP_ATTRIB_RQ_DEPTH == 64 || QP_ATTRIB_RQ_DEPTH == 32 || \\r
125         QP_ATTRIB_RQ_DEPTH == 16 || QP_ATTRIB_RQ_DEPTH == 8\r
126                         socket_info->dup_idx++;\r
127                         socket_info->dup_idx &= (QP_ATTRIB_RQ_DEPTH - 1);\r
128 #else\r
129                         if( ++socket_info->dup_idx == QP_ATTRIB_RQ_DEPTH )\r
130                                 socket_info->dup_idx = 0;\r
131 #endif\r
132 \r
133                         cl_atomic_inc( &socket_info->dup_cnt );\r
134                         /* ib_cq_comp will decrement the receive count. */\r
135                         cl_atomic_dec( &socket_info->recv_cnt );\r
136 \r
137                         cl_spinlock_release( &socket_info->recv_lock );\r
138 \r
139                         cl_spinlock_release( &socket_info->mutex );\r
140                         p_io_info->p_ov = NULL;\r
141                         IBSP_EXIT( IBSP_DBG_IO );\r
142                         return;\r
143                 }\r
144                 \r
145                 /* Check for flushing the receive buffers on purpose. */\r
146                 if( socket_info->socket_state == IBSP_DUPLICATING_OLD )\r
147                         wr->lpOverlapped->OffsetHigh = 0;\r
148                 else\r
149                         wr->lpOverlapped->OffsetHigh = WSA_OPERATION_ABORTED;\r
150 \r
151                 cl_spinlock_release( &socket_info->mutex );\r
152 \r
153                 /* Override the length, as per the WSD specs. */\r
154                 wr->lpOverlapped->InternalHigh = 0;\r
155                 break;\r
156 \r
157         case IB_WCS_LOCAL_LEN_ERR:\r
158         case IB_WCS_LOCAL_OP_ERR:\r
159         case IB_WCS_LOCAL_PROTECTION_ERR:\r
160         case IB_WCS_MEM_WINDOW_BIND_ERR:\r
161         case IB_WCS_REM_ACCESS_ERR:\r
162         case IB_WCS_REM_OP_ERR:\r
163         case IB_WCS_RNR_RETRY_ERR:\r
164         case IB_WCS_TIMEOUT_RETRY_ERR:\r
165         case IB_WCS_REM_INVALID_REQ_ERR:\r
166         default:\r
167                 IBSP_ERROR( ("%s error: %s\n",\r
168                         ib_get_wc_type_str( wc->wc_type ),\r
169                         ib_get_wc_status_str( wc->status )) );\r
170                 lpOverlapped->OffsetHigh = WSAECONNABORTED;\r
171                 wr->lpOverlapped->InternalHigh = 0;\r
172                 socket_info->qp_error = WSAECONNABORTED;\r
173                 break;\r
174         }\r
175 \r
176 #ifdef _DEBUG_\r
177         if( wc->wc_type == IB_WC_RECV )\r
178         {\r
179                 // This code requires the recv count to be decremented here, but it needs\r
180                 // to be decremented after any callbacks are invoked so socket destruction\r
181                 // gets delayed until all callbacks have been invoked.\r
182                 //{\r
183                 //      uint8_t idx;\r
184 \r
185                 //      cl_spinlock_acquire( &socket_info->recv_lock );\r
186                 //      idx = socket_info->recv_idx - (uint8_t)socket_info->recv_cnt;\r
187                 //      if( idx >= QP_ATTRIB_RQ_DEPTH )\r
188                 //              idx += QP_ATTRIB_RQ_DEPTH;\r
189 \r
190                 //      CL_ASSERT( wc->wr_id == (uint64_t)(void* __ptr64)&socket_info->recv_wr[idx] );\r
191                 //      cl_atomic_dec( &socket_info->recv_cnt );\r
192                 //      cl_spinlock_release( &socket_info->recv_lock );\r
193                 //}\r
194 \r
195                 if( wc->status == IB_SUCCESS && p_recv_wr->ds_array[0].length >= 40 )\r
196                 {\r
197                         debug_dump_buffer( IBSP_DBG_WQ | IBSP_DBG_LEVEL4, "RECV",\r
198                                 (void * __ptr64)p_recv_wr->ds_array[0].vaddr, 40 );\r
199                 }\r
200 \r
201                 cl_atomic_dec( &g_ibsp.recv_count );\r
202                 cl_atomic_inc( &socket_info->recv_comp );\r
203 \r
204                 memset( p_recv_wr, 0x33, sizeof(struct _recv_wr) );\r
205         }\r
206         else\r
207         {\r
208                 // This code requires the send count to be decremented here, but it needs\r
209                 // to be decremented after any callbacks are invoked so socket destruction\r
210                 // gets delayed until all callbacks have been invoked.\r
211                 //{\r
212                 //      uint8_t idx;\r
213 \r
214                 //      cl_spinlock_acquire( &socket_info->send_lock );\r
215                 //      idx = socket_info->send_idx - (uint8_t)socket_info->send_cnt;\r
216                 //      if( idx >= QP_ATTRIB_SQ_DEPTH )\r
217                 //              idx += QP_ATTRIB_SQ_DEPTH;\r
218                 //      CL_ASSERT( wc->wr_id == (uint64_t)(void* __ptr64)&socket_info->send_wr[idx] );\r
219                 //      cl_atomic_dec( &socket_info->send_cnt );\r
220                 //      cl_spinlock_release( &socket_info->send_lock );\r
221                 //}\r
222 \r
223                 if( wc->wc_type == IB_WC_SEND )\r
224                 {\r
225                         cl_atomic_dec( &g_ibsp.send_count );\r
226                         cl_atomic_inc( &socket_info->send_comp );\r
227 \r
228                         fzprint(("%s():%d:0x%x:0x%x: send_count=%d\n",\r
229                                 __FUNCTION__,\r
230                                 __LINE__, GetCurrentProcessId(), GetCurrentThreadId(), g_ibsp.send_count));\r
231                 }\r
232 \r
233                 memset( wr, 0x33, sizeof(struct _wr) );\r
234         }\r
235 #endif\r
236 \r
237         IBSP_TRACE4( IBSP_DBG_IO,\r
238                 ("overlapped=%p, InternalHigh=%d, hEvent=%x\n",\r
239                 lpOverlapped, lpOverlapped->InternalHigh,\r
240                 (uintptr_t) lpOverlapped->hEvent) );\r
241 \r
242         /* Don't notify the switch for that completion only if:\r
243          *   - the switch don't want a notification\r
244          *   - the wq completed with success\r
245          *   - the socket is still connected\r
246          */\r
247         if( ((uintptr_t) lpOverlapped->hEvent) & 0x00000001 )\r
248         {\r
249                 /* Indicate this operation is complete. The switch will poll\r
250                  * with calls to WSPGetOverlappedResult(). */\r
251 \r
252 #ifdef _DEBUG_\r
253                 cl_atomic_dec( &g_ibsp.overlap_h1_comp_count );\r
254 \r
255                 fzprint(("%s():%d:0x%x:0x%x: ov=0x%p h0=%d h1=%d h1_c=%d send=%d recv=%d\n",\r
256                                  __FUNCTION__, __LINE__, GetCurrentProcessId(),\r
257                                  GetCurrentThreadId(), lpOverlapped,\r
258                                  g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count,\r
259                                  g_ibsp.overlap_h1_comp_count, g_ibsp.send_count, g_ibsp.recv_count));\r
260 #endif\r
261 \r
262                 IBSP_TRACE1( IBSP_DBG_IO,\r
263                         ("Not calling lpWPUCompleteOverlappedRequest: "\r
264                         "socket=%p, ov=%p OffsetHigh=%d, InternalHigh=%d hEvent=%p\n",\r
265                         socket_info, lpOverlapped, lpOverlapped->OffsetHigh,\r
266                         lpOverlapped->InternalHigh, lpOverlapped->hEvent) );\r
267 \r
268                 lpOverlapped->Internal = 0;\r
269                 p_io_info->p_ov = NULL;\r
270         }\r
271         else\r
272         {\r
273 #ifdef _DEBUG_\r
274                 cl_atomic_dec( &g_ibsp.overlap_h0_count );\r
275 \r
276                 fzprint(("%s():%d:0x%x:0x%x: ov=0x%p h0=%d h1=%d h1_c=%d send=%d recv=%d\n",\r
277                                  __FUNCTION__, __LINE__, GetCurrentProcessId(),\r
278                                  GetCurrentThreadId(), lpOverlapped,\r
279                                  g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count,\r
280                                  g_ibsp.overlap_h1_comp_count, g_ibsp.send_count, g_ibsp.recv_count));\r
281 #endif\r
282 \r
283                 p_io_info->p_ov = lpOverlapped;\r
284                 cl_atomic_inc( &socket_info->ref_cnt );\r
285         }\r
286 \r
287         if( wc->wc_type == IB_WC_RECV )\r
288                 cl_atomic_dec( &socket_info->recv_cnt );\r
289         else\r
290                 cl_atomic_dec( &socket_info->send_cnt );\r
291 \r
292         IBSP_EXIT( IBSP_DBG_IO );\r
293 }\r
294 \r
295 \r
296 /* CQ completion handler. */\r
297 int\r
298 ib_cq_comp(\r
299                                         void                                            *cq_context )\r
300 {\r
301         struct cq_thread_info   *cq_tinfo = cq_context;\r
302         ib_api_status_t                 status;\r
303         ib_wc_t                                 wclist[WC_LIST_SIZE];\r
304         ib_wc_t                                 *free_wclist;\r
305         ib_wc_t                                 *done_wclist;\r
306         io_comp_info_t                  info[WC_LIST_SIZE];\r
307         int                                             cb_idx;\r
308         int                                             i;\r
309         int                                             n_comp = 0;\r
310 #ifdef _DEBUG_\r
311         int                                             comp_count;\r
312 #endif\r
313 \r
314         IBSP_ENTER( IBSP_DBG_WQ );\r
315 \r
316         CL_ASSERT( WC_LIST_SIZE >= 1 );\r
317 \r
318         do\r
319         {\r
320                 /* Try to retrieve up to WC_LIST_SIZE completions at a time. */\r
321                 for( i = 0; i < (WC_LIST_SIZE - 1); i++ )\r
322                 {\r
323                         wclist[i].p_next = &wclist[i + 1];\r
324                 }\r
325                 wclist[(WC_LIST_SIZE - 1)].p_next = NULL;\r
326 \r
327                 free_wclist = &wclist[0];\r
328                 done_wclist = NULL;\r
329 \r
330                 status = ib_poll_cq( cq_tinfo->cq, &free_wclist, &done_wclist );\r
331 \r
332                 IBSP_TRACE( IBSP_DBG_WQ,\r
333                         ("%s():%d:0x%x:0x%x: poll CQ got status %d, free=%p, done=%p\n",\r
334                         __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(),\r
335                         status, free_wclist, done_wclist) );\r
336 \r
337                 switch( status )\r
338                 {\r
339                 case IB_NOT_FOUND:\r
340                 case IB_SUCCESS:\r
341                         break;\r
342 \r
343                 case IB_INVALID_CQ_HANDLE:\r
344                         /* This happens when the switch closes the socket while the \r
345                          * execution thread was calling lpWPUCompleteOverlappedRequest. */\r
346                         IBSP_ERROR(\r
347                                 ("ib_poll_cq returned IB_INVLALID_CQ_HANDLE\n") );\r
348                         goto done;\r
349 \r
350                 default:\r
351                         IBSP_ERROR(\r
352                                 ("ib_poll_cq failed returned %s\n", ib_get_err_str( status )) );\r
353                         break;\r
354                 }\r
355 \r
356 #ifdef _DEBUG_\r
357                 comp_count = 0;\r
358 #endif\r
359 \r
360                 /* We have some completions. */\r
361                 cb_idx = 0;\r
362                 while( done_wclist )\r
363                 {\r
364 #ifdef _DEBUG_\r
365                         comp_count++;\r
366 #endif\r
367                         complete_wq( done_wclist, &info[cb_idx++] );\r
368 \r
369                         done_wclist = done_wclist->p_next;\r
370                 }\r
371 \r
372                 for( i = 0; i < cb_idx; i++ )\r
373                 {\r
374                         int error;\r
375                         int ret;\r
376 \r
377                         if( info[i].p_ov )\r
378                         {\r
379                                 IBSP_TRACE1( IBSP_DBG_IO,\r
380                                         ("Calling lpWPUCompleteOverlappedRequest: "\r
381                                         "socket=%p, ov=%p OffsetHigh=%d "\r
382                                         "InternalHigh=%d hEvent=%p\n",\r
383                                         info[i].p_socket, info[i].p_ov, info[i].p_ov->OffsetHigh,\r
384                                         info[i].p_ov->InternalHigh, info[i].p_ov->hEvent) );\r
385 \r
386                                 ret = g_ibsp.up_call_table.lpWPUCompleteOverlappedRequest(\r
387                                         info[i].p_socket->switch_socket, info[i].p_ov,\r
388                                         info[i].p_ov->OffsetHigh,\r
389                                         (DWORD)info[i].p_ov->InternalHigh, &error );\r
390                                 if( ret != 0 )\r
391                                 {\r
392                                         IBSP_ERROR( ("WPUCompleteOverlappedRequest for ov=%p "\r
393                                                 "returned %d err %d\n", info[i].p_ov, ret, error) );\r
394                                 }\r
395                                 deref_socket_info( info[i].p_socket );\r
396                         }\r
397                 }\r
398 \r
399                 n_comp += i;\r
400 \r
401 #ifdef _DEBUG_\r
402                 if( comp_count > g_ibsp.max_comp_count )\r
403                 {\r
404                         g_ibsp.max_comp_count = comp_count;\r
405                 }\r
406 #endif\r
407         } while( !free_wclist );\r
408 \r
409 done:\r
410 \r
411 #ifdef _DEBUG_\r
412         fzprint(("%s():%d:0x%x:0x%x: overlap_h0_count=%d overlap_h1_count=%d\n",\r
413                          __FUNCTION__,\r
414                          __LINE__, GetCurrentProcessId(),\r
415                          GetCurrentThreadId(), g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count));\r
416 #endif\r
417 \r
418         IBSP_EXIT( IBSP_DBG_WQ );\r
419         return n_comp;\r
420 }\r
421 \r
422 \r
423 /* IB completion thread */\r
424 static DWORD WINAPI\r
425 ib_cq_thread(\r
426                                         LPVOID                                          lpParameter )\r
427 {\r
428         struct cq_thread_info   *cq_tinfo = (struct cq_thread_info *)lpParameter;\r
429         cl_status_t                             cl_status;\r
430         ib_api_status_t                 status;\r
431         int                                             i;\r
432 \r
433         IBSP_ENTER( IBSP_DBG_HW );\r
434 \r
435 \r
436         fzprint(("%s():%d:0x%x:0x%x: cq_tinfo=0x%p\n", __FUNCTION__,\r
437                          __LINE__, GetCurrentProcessId(), GetCurrentThreadId(), cq_tinfo));\r
438 \r
439         do\r
440         {\r
441                 cl_status = cl_waitobj_wait_on( cq_tinfo->cq_waitobj, EVENT_NO_TIMEOUT, TRUE );\r
442                 if( cl_status != CL_SUCCESS )\r
443                 {\r
444                         IBSP_ERROR(\r
445                                 ("cl_waitobj_wait_on() (%d)\n", cl_status) );\r
446                 }\r
447 \r
448                 /* \r
449                  * TODO: By rearranging thread creation and cq creation, this check\r
450                  * may be eliminated.\r
451                  */\r
452                 if( cq_tinfo->cq != NULL )\r
453                 {\r
454                         fzprint(("%s():%d:0x%x:0x%x: Calling ib_cq_comp().\n", __FUNCTION__,\r
455                                          __LINE__, GetCurrentProcessId(), GetCurrentThreadId()));\r
456 \r
457                         i = g_max_poll;\r
458                         do\r
459                         {\r
460                                 if( ib_cq_comp( cq_tinfo ) )\r
461                                         i = g_max_poll;\r
462 \r
463                         } while( i-- );\r
464 \r
465                         fzprint(("%s():%d:0x%x:0x%x: Done calling ib_cq_comp().\n", __FUNCTION__,\r
466                                          __LINE__, GetCurrentProcessId(), GetCurrentThreadId()));\r
467 \r
468                         status = ib_rearm_cq( cq_tinfo->cq, FALSE );\r
469                         if( status != IB_SUCCESS )\r
470                         {\r
471                                 IBSP_ERROR(\r
472                                         ("ib_rearm_cq returned %s)\n", ib_get_err_str( status )) );\r
473                         }\r
474                 }\r
475 \r
476         } while( !cq_tinfo->ib_cq_thread_exit_wanted );\r
477 \r
478         cl_status = cl_waitobj_destroy( cq_tinfo->cq_waitobj );\r
479         if( cl_status != CL_SUCCESS )\r
480         {\r
481                 IBSP_ERROR(\r
482                         ("cl_waitobj_destroy() returned %s\n", CL_STATUS_MSG(cl_status)) );\r
483         }\r
484         HeapFree( g_ibsp.heap, 0, cq_tinfo );\r
485 \r
486         /* No special exit code, even on errors. */\r
487         IBSP_EXIT( IBSP_DBG_HW );\r
488         ExitThread( 0 );\r
489 }\r
490 \r
491 \r
492 /* Called with the HCA's CQ lock held. */\r
493 static struct cq_thread_info *\r
494 ib_alloc_cq_tinfo(\r
495                                         struct ibsp_hca                         *hca )\r
496 {\r
497         struct cq_thread_info *cq_tinfo = NULL;\r
498         ib_cq_create_t cq_create;\r
499         ib_api_status_t status;\r
500         cl_status_t cl_status;\r
501 \r
502         IBSP_ENTER( IBSP_DBG_HW );\r
503 \r
504         cq_tinfo = HeapAlloc(\r
505                 g_ibsp.heap, HEAP_ZERO_MEMORY, sizeof(struct cq_thread_info) );\r
506 \r
507         if( !cq_tinfo )\r
508         {\r
509                 IBSP_ERROR_EXIT( ("HeapAlloc() Failed.\n") );\r
510                 return NULL;\r
511         }\r
512 \r
513         cl_status = cl_waitobj_create( FALSE, &cq_tinfo->cq_waitobj );\r
514         if( cl_status != CL_SUCCESS )\r
515         {\r
516                 cq_tinfo->cq_waitobj = NULL;\r
517                 ib_destroy_cq_tinfo( cq_tinfo );\r
518                 IBSP_ERROR_EXIT(\r
519                         ("cl_waitobj_create() returned %s\n", CL_STATUS_MSG(cl_status)) );\r
520                 return NULL;\r
521         }\r
522 \r
523         cq_tinfo->hca = hca;\r
524         cq_tinfo->ib_cq_thread_exit_wanted = FALSE;\r
525 \r
526         cq_tinfo->ib_cq_thread = CreateThread( NULL, 0, ib_cq_thread, cq_tinfo, 0, (LPDWORD)&cq_tinfo->ib_cq_thread_id );\r
527 \r
528         if( cq_tinfo->ib_cq_thread == NULL )\r
529         {\r
530                 ib_destroy_cq_tinfo( cq_tinfo );\r
531                 IBSP_ERROR_EXIT( ("CreateThread failed (%d)", GetLastError()) );\r
532                 return NULL;\r
533         }\r
534 \r
535         STAT_INC( thread_num );\r
536 \r
537         /* Completion queue */\r
538         cq_create.size = IB_CQ_SIZE;\r
539 \r
540         cq_create.pfn_comp_cb = NULL;\r
541         cq_create.h_wait_obj = cq_tinfo->cq_waitobj;\r
542 \r
543         status = ib_create_cq( hca->hca_handle, &cq_create, cq_tinfo,\r
544                 NULL, &cq_tinfo->cq );\r
545         if( status )\r
546         {\r
547                 ib_destroy_cq_tinfo( cq_tinfo );\r
548                 IBSP_ERROR_EXIT(\r
549                         ("ib_create_cq returned %s\n", ib_get_err_str( status )) );\r
550                 return NULL;\r
551         }\r
552 \r
553         STAT_INC( cq_num );\r
554 \r
555         status = ib_rearm_cq( cq_tinfo->cq, FALSE );\r
556         if( status )\r
557         {\r
558                 ib_destroy_cq_tinfo( cq_tinfo );\r
559                 IBSP_ERROR_EXIT(\r
560                         ("ib_rearm_cq returned %s\n", ib_get_err_str( status )) );\r
561                 return NULL;\r
562         }\r
563 \r
564         cq_tinfo->cqe_size = IB_CQ_SIZE;\r
565 \r
566         if( hca->cq_tinfo )\r
567         {\r
568                 __cl_primitive_insert(\r
569                         &hca->cq_tinfo->list_item, &cq_tinfo->list_item );\r
570         }\r
571         else\r
572         {\r
573                 /* Setup the list entry to point to itself. */\r
574                 cq_tinfo->list_item.p_next = &cq_tinfo->list_item;\r
575                 cq_tinfo->list_item.p_prev = &cq_tinfo->list_item;\r
576         }\r
577 \r
578         /* Upon allocation, the new CQ becomes the primary. */\r
579         hca->cq_tinfo = cq_tinfo;\r
580 \r
581         IBSP_EXIT( IBSP_DBG_HW );\r
582         return (cq_tinfo);\r
583 }\r
584 \r
585 \r
586 void\r
587 ib_destroy_cq_tinfo(\r
588                                         struct cq_thread_info           *cq_tinfo )\r
589 {\r
590         ib_wc_t wclist;\r
591         ib_wc_t *free_wclist;\r
592         ib_wc_t *done_wclist;\r
593         ib_api_status_t status;\r
594         HANDLE h_cq_thread;\r
595 \r
596         IBSP_ENTER( IBSP_DBG_HW );\r
597 \r
598         CL_ASSERT( cq_tinfo );\r
599         CL_ASSERT( cq_tinfo->qp_count == 0 );\r
600 \r
601         if( cq_tinfo->cq )\r
602         {\r
603                 wclist.p_next = NULL;\r
604                 free_wclist = &wclist;\r
605 \r
606                 while( ib_poll_cq(\r
607                         cq_tinfo->cq, &free_wclist, &done_wclist ) == IB_SUCCESS )\r
608                 {\r
609                         IBSP_TRACE1( IBSP_DBG_WQ,\r
610                                 ("free=%p, done=%p\n", free_wclist, done_wclist) );\r
611                 }\r
612 \r
613                 IBSP_TRACE4( IBSP_DBG_WQ, ("ib_destroy_cq() start..\n") );\r
614 \r
615                 /*\r
616                  * Called from cleanup thread, okay to block.\r
617                  */\r
618                 status = ib_destroy_cq( cq_tinfo->cq, ib_sync_destroy );\r
619                 if( status )\r
620                 {\r
621                         IBSP_ERROR(\r
622                                 ("ib_destroy_cq returned %s\n", ib_get_err_str( status )) );\r
623                 }\r
624                 else\r
625                 {\r
626                         IBSP_TRACE4( IBSP_DBG_WQ, ("ib_destroy_cq() finished.\n") );\r
627 \r
628                         cq_tinfo->cq = NULL;\r
629 \r
630                         STAT_DEC( cq_num );\r
631                 }\r
632         }\r
633 \r
634         if( cq_tinfo->ib_cq_thread )\r
635         {\r
636                 /* ib_cq_thread() will release the cq_tinfo before exit. Don't\r
637                    reference cq_tinfo after signaling  */\r
638                 h_cq_thread = cq_tinfo->ib_cq_thread;\r
639                 cq_tinfo->ib_cq_thread = NULL;\r
640 \r
641                 cq_tinfo->ib_cq_thread_exit_wanted = TRUE;\r
642                 cl_waitobj_signal( cq_tinfo->cq_waitobj );\r
643 \r
644                 /* Wait for ib_cq_thread to die, if we are not running on it */\r
645                 if( GetCurrentThreadId() != cq_tinfo->ib_cq_thread_id )\r
646                 {\r
647                         fzprint(("%s():%d:0x%x:0x%x: Waiting for ib_cq_thread=0x%x to die\n",\r
648                                          __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(),\r
649                                          cq_tinfo->ib_cq_thread_id ));\r
650                         if( WaitForSingleObject( h_cq_thread, INFINITE ) != WAIT_OBJECT_0 )\r
651                         {\r
652                                 IBSP_ERROR( ("WaitForSingleObject failed\n") );\r
653                         }\r
654                         else\r
655                         {\r
656                                 STAT_DEC( thread_num );\r
657                         }\r
658                 }\r
659                 else\r
660                 {\r
661                         fzprint(("%s():%d:0x%x:0x%x: Currently on ib_cq_thread.\n", __FUNCTION__,\r
662                                          __LINE__, GetCurrentProcessId(), GetCurrentThreadId()));\r
663                         STAT_DEC( thread_num );\r
664                 }\r
665                 CloseHandle( h_cq_thread );\r
666         }\r
667         else\r
668         {\r
669                 /* There was no thread created, destroy cq_waitobj and\r
670                    free memory */\r
671                 if( cq_tinfo->cq_waitobj )\r
672                 {\r
673                         cl_waitobj_destroy( cq_tinfo->cq_waitobj );\r
674                         cq_tinfo->cq_waitobj = NULL;\r
675                 }\r
676                 HeapFree( g_ibsp.heap, 0, cq_tinfo );\r
677         }\r
678 \r
679         IBSP_EXIT( IBSP_DBG_HW );\r
680 }\r
681 \r
682 \r
683 static struct cq_thread_info *\r
684 ib_acquire_cq_tinfo(\r
685                                         struct ibsp_hca                         *hca )\r
686 {\r
687         struct cq_thread_info   *cq_tinfo = NULL;\r
688         uint32_t                                cqe_size;\r
689         ib_api_status_t                 status;\r
690 \r
691         IBSP_ENTER( IBSP_DBG_HW );\r
692 \r
693         cl_spinlock_acquire( &hca->cq_lock );\r
694 \r
695         if( !hca->cq_tinfo )\r
696         {\r
697                 cq_tinfo = ib_alloc_cq_tinfo( hca );\r
698                 if( !cq_tinfo )\r
699                 {\r
700                         IBSP_ERROR_EXIT( ("ib_alloc_cq_tinfo() failed\n") );\r
701                         cl_spinlock_release( &hca->cq_lock );\r
702                         return (NULL);\r
703                 }\r
704         }\r
705         else\r
706         {\r
707                 cq_tinfo = hca->cq_tinfo;\r
708         }\r
709 \r
710         CL_ASSERT( cq_tinfo != NULL );\r
711 \r
712         cqe_size = (cq_tinfo->qp_count + 1) * IB_CQ_SIZE;\r
713 \r
714         if( cq_tinfo->cqe_size < cqe_size )\r
715         {\r
716                 status = ib_modify_cq( cq_tinfo->cq, &cqe_size );\r
717                 switch( status )\r
718                 {\r
719                 case IB_INVALID_CQ_SIZE:\r
720                         cq_tinfo = ib_alloc_cq_tinfo( hca );\r
721                         if( !cq_tinfo )\r
722                                 break;\r
723 \r
724                         cq_tinfo->qp_count++;\r
725                         break;\r
726 \r
727                 case IB_SUCCESS:\r
728                         cq_tinfo->cqe_size = cqe_size;\r
729 \r
730                         cq_tinfo->qp_count++;\r
731 \r
732                         fzprint(("%s():%d:0x%x:0x%x: New cq size=%d.\n",\r
733                                          __FUNCTION__,\r
734                                          __LINE__, GetCurrentProcessId(),\r
735                                          GetCurrentThreadId(), cq_tinfo->cqe_size));\r
736                         break;\r
737 \r
738                 default:\r
739                         IBSP_ERROR_EXIT(\r
740                                 ("ib_modify_cq() returned %s\n", ib_get_err_str(status)) );\r
741                         cq_tinfo = NULL;\r
742                 }\r
743         }\r
744         else\r
745         {\r
746                 cq_tinfo->qp_count++;\r
747         }\r
748 \r
749         cl_spinlock_release( &hca->cq_lock );\r
750         IBSP_EXIT( IBSP_DBG_HW );\r
751         return (cq_tinfo);\r
752 }\r
753 \r
754 void\r
755 ib_release_cq_tinfo(\r
756                                         struct cq_thread_info           *cq_tinfo )\r
757 {\r
758         IBSP_ENTER( IBSP_DBG_HW );\r
759 \r
760         CL_ASSERT( cq_tinfo );\r
761         CL_ASSERT( cq_tinfo->hca );\r
762 \r
763         cl_spinlock_acquire( &cq_tinfo->hca->cq_lock );\r
764         /* If this CQ now has fewer QPs than the primary, make it the primary. */\r
765         if( --cq_tinfo->qp_count < cq_tinfo->hca->cq_tinfo->qp_count )\r
766                 cq_tinfo->hca->cq_tinfo = cq_tinfo;\r
767         cl_spinlock_release( &cq_tinfo->hca->cq_lock );\r
768 \r
769         IBSP_EXIT( IBSP_DBG_HW );\r
770 }\r
771 \r
772 \r
773 /* Release IB ressources. */\r
774 void\r
775 ib_release(void)\r
776 {\r
777         cl_fmap_item_t                  *p_item;\r
778 \r
779         IBSP_ENTER( IBSP_DBG_HW );\r
780 \r
781         if( g_ibsp.al_handle )\r
782         {\r
783                 cl_list_item_t *item;\r
784                 ib_api_status_t status;\r
785 \r
786                 unregister_pnp();\r
787 \r
788                 while( (item = cl_qlist_head( &g_ibsp.hca_list )) != cl_qlist_end( &g_ibsp.hca_list ) )\r
789                 {\r
790                         struct ibsp_hca *hca = PARENT_STRUCT(item, struct ibsp_hca, item);\r
791 \r
792                         pnp_ca_remove( hca );\r
793                 }\r
794 \r
795                 fzprint(("%s():%d:0x%x:0x%x: Calling ib_close_al...\n", __FUNCTION__,\r
796                                  __LINE__, GetCurrentProcessId(), GetCurrentThreadId()));\r
797 \r
798                 status = ib_close_al( g_ibsp.al_handle );\r
799 \r
800                 fzprint(("%s():%d:0x%x:0x%x: Done calling ib_close_al, status=%d.\n",\r
801                                  __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(),\r
802                                  status));\r
803                 if( status != IB_SUCCESS )\r
804                 {\r
805                         IBSP_ERROR(\r
806                                 ("ib_close_al returned %s\n", ib_get_err_str( status )) );\r
807                 }\r
808                 else\r
809                 {\r
810                         IBSP_TRACE( IBSP_DBG_HW, ("ib_close_al success\n") );\r
811                         STAT_DEC( al_num );\r
812                 }\r
813                 g_ibsp.al_handle = NULL;\r
814         }\r
815 \r
816         for( p_item = cl_fmap_head( &g_ibsp.ip_map );\r
817                 p_item != cl_fmap_end( &g_ibsp.ip_map );\r
818                 p_item = cl_fmap_head( &g_ibsp.ip_map ) )\r
819         {\r
820                 cl_fmap_remove_item( &g_ibsp.ip_map, p_item );\r
821 \r
822                 HeapFree( g_ibsp.heap, 0,\r
823                         PARENT_STRUCT(p_item, struct ibsp_ip_addr, item) );\r
824         }\r
825 \r
826         IBSP_EXIT( IBSP_DBG_HW );\r
827 }\r
828 \r
829 \r
830 /* Initialize IB ressources. */\r
831 int\r
832 ibsp_initialize(void)\r
833 {\r
834         ib_api_status_t status;\r
835         int ret;\r
836 \r
837         IBSP_ENTER( IBSP_DBG_HW );\r
838 \r
839         CL_ASSERT( g_ibsp.al_handle == NULL );\r
840         CL_ASSERT( cl_qlist_count( &g_ibsp.hca_list ) == 0 );\r
841 \r
842         /* Open the IB library */\r
843         status = ib_open_al( &g_ibsp.al_handle );\r
844 \r
845         IBSP_TRACE( IBSP_DBG_HW, ("open is %d %p\n", status, g_ibsp.al_handle) );\r
846 \r
847         if( status != IB_SUCCESS )\r
848         {\r
849                 IBSP_ERROR( ("ib_open_al failed (%d)\n", status) );\r
850                 ret = WSAEPROVIDERFAILEDINIT;\r
851                 goto done;\r
852         }\r
853 \r
854         STAT_INC( al_num );\r
855 \r
856         /* Register for PNP events */\r
857         status = register_pnp();\r
858         if( status )\r
859         {\r
860                 IBSP_ERROR( ("register_pnp failed (%d)\n", status) );\r
861                 ret = WSAEPROVIDERFAILEDINIT;\r
862                 goto done;\r
863         }\r
864 \r
865         STAT_INC( thread_num );\r
866 \r
867         ret = 0;\r
868 done:\r
869         if( ret )\r
870         {\r
871                 /* Free up resources. */\r
872                 ib_release();\r
873         }\r
874 \r
875         IBSP_EXIT( IBSP_DBG_HW );\r
876 \r
877         return ret;\r
878 }\r
879 \r
880 \r
881 /* Destroys the infiniband ressources of a socket. */\r
882 void\r
883 ib_destroy_socket(\r
884         IN      OUT                     struct ibsp_socket_info         *socket_info )\r
885 {\r
886         ib_api_status_t status;\r
887 \r
888         IBSP_ENTER( IBSP_DBG_EP );\r
889 \r
890         if( socket_info->qp )\r
891         {\r
892                 cl_atomic_inc( &socket_info->ref_cnt );\r
893                 status = ib_destroy_qp( socket_info->qp, deref_socket_info );\r
894                 if( status != IB_SUCCESS )\r
895                 {\r
896                         IBSP_ERROR( ("ib_destroy_qp returned %s\n",\r
897                                 ib_get_err_str( status )) );\r
898                         deref_socket_info( socket_info );\r
899                 }\r
900 \r
901                 ib_release_cq_tinfo( socket_info->cq_tinfo );\r
902 \r
903                 socket_info->qp = NULL;\r
904         }\r
905 \r
906         IBSP_EXIT( IBSP_DBG_EP );\r
907 }\r
908 \r
909 \r
910 /*\r
911  * Creates the necessary IB ressources for a socket\r
912  */\r
913 int\r
914 ib_create_socket(\r
915         IN      OUT                     struct ibsp_socket_info         *socket_info)\r
916 {\r
917         ib_qp_create_t                  qp_create;\r
918         ib_api_status_t                 status;\r
919         ib_qp_attr_t                    qp_attr;\r
920 \r
921         IBSP_ENTER( IBSP_DBG_EP );\r
922 \r
923         CL_ASSERT( socket_info != NULL );\r
924         CL_ASSERT( socket_info->port != NULL );\r
925         CL_ASSERT( socket_info->qp == NULL );\r
926 \r
927         socket_info->hca_pd = socket_info->port->hca->pd;\r
928 \r
929         /* Get the completion queue and thread info for this socket */\r
930         socket_info->cq_tinfo = ib_acquire_cq_tinfo( socket_info->port->hca );\r
931         if( !socket_info->cq_tinfo )\r
932         {\r
933                 IBSP_ERROR_EXIT( ("ib_acquire_cq_tinfo failed\n") );\r
934                 return WSAENOBUFS;\r
935         }\r
936 \r
937         /* Queue pair */\r
938         qp_create.qp_type = IB_QPT_RELIABLE_CONN;\r
939         qp_create.sq_depth = QP_ATTRIB_SQ_DEPTH;\r
940         qp_create.rq_depth = QP_ATTRIB_RQ_DEPTH;\r
941         qp_create.sq_sge = QP_ATTRIB_SQ_SGE;\r
942         qp_create.rq_sge = 1;\r
943         qp_create.h_rq_cq = socket_info->cq_tinfo->cq;\r
944         qp_create.h_sq_cq = socket_info->cq_tinfo->cq;\r
945         qp_create.sq_signaled = TRUE;\r
946 \r
947         status = ib_create_qp( socket_info->hca_pd, &qp_create, socket_info,    /* context */\r
948                 NULL,   /* async handler */\r
949                 &socket_info->qp );\r
950         if( status )\r
951         {\r
952                 ib_release_cq_tinfo( socket_info->cq_tinfo );\r
953                 IBSP_ERROR_EXIT(\r
954                         ("ib_create_qp returned %s\n", ib_get_err_str( status )) );\r
955                 return WSAENOBUFS;\r
956         }\r
957 \r
958         status = ib_query_qp( socket_info->qp, &qp_attr );\r
959         if( status == IB_SUCCESS )\r
960         {\r
961                 socket_info->max_inline = min( g_max_inline, qp_attr.sq_max_inline );\r
962         }\r
963         else\r
964         {\r
965                 IBSP_ERROR( ("ib_query_qp returned %s\n", ib_get_err_str( status )) );\r
966                 socket_info->max_inline = 0;\r
967         }\r
968 \r
969         STAT_INC( qp_num );\r
970 \r
971         IBSP_EXIT( IBSP_DBG_EP );\r
972         return 0;\r
973 }\r
974 \r
975 \r
976 void\r
977 wait_cq_drain(\r
978         IN      OUT                     struct ibsp_socket_info         *socket_info )\r
979 {\r
980         IBSP_ENTER( IBSP_DBG_EP );\r
981 \r
982         if( socket_info->cq_tinfo == NULL )\r
983         {\r
984                 IBSP_EXIT( IBSP_DBG_EP );\r
985                 return;\r
986         }\r
987 \r
988         /* Wait for the QP to be drained. */\r
989         while( socket_info->send_cnt || socket_info->recv_cnt )\r
990         {\r
991                 fzprint(("%s():%d:0x%x:0x%x: socket=0x%p wr_list_count=%d qp state=%d\n",\r
992                                  __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(),\r
993                                  socket_info, cl_qlist_count(&socket_info->wr_list)));\r
994 \r
995                 Sleep(100);\r
996         }\r
997 \r
998         IBSP_EXIT( IBSP_DBG_EP );\r
999 }\r
1000 \r
1001 \r
1002 void\r
1003 ibsp_dup_overlap_abort(\r
1004         IN      OUT                     struct ibsp_socket_info         *socket_info )\r
1005 {\r
1006         LPWSAOVERLAPPED lpOverlapped = NULL;\r
1007         int error;\r
1008         int ret;\r
1009         uint8_t                         idx;\r
1010 \r
1011         IBSP_ENTER( IBSP_DBG_EP );\r
1012         CL_ASSERT( !socket_info->send_cnt && !socket_info->recv_cnt );\r
1013 \r
1014         /* Browse the list of all posted overlapped structures\r
1015          * to mark them as aborted. */\r
1016         idx = socket_info->dup_idx - (uint8_t)socket_info->dup_cnt;\r
1017         if( idx >= QP_ATTRIB_RQ_DEPTH )\r
1018                 idx += QP_ATTRIB_RQ_DEPTH;\r
1019 \r
1020         while( socket_info->dup_cnt )\r
1021         {\r
1022                 lpOverlapped = socket_info->dup_wr[idx].wr.lpOverlapped;\r
1023 \r
1024                 fzprint(("%s():%d:0x%x:0x%x: socket=0x%p wr=0x%p overlapped=0x%p Internal=%d InternalHigh=%d hEvent=%d\n",\r
1025                         __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(), socket_info, &socket_info->dup_wr[idx], lpOverlapped, lpOverlapped->Internal, lpOverlapped->InternalHigh, lpOverlapped->hEvent));\r
1026 \r
1027                 lpOverlapped->OffsetHigh = WSAECONNABORTED;\r
1028                 lpOverlapped->InternalHigh = 0;\r
1029 \r
1030                 if( ((uintptr_t) lpOverlapped->hEvent) & 0x00000001 )\r
1031                 {\r
1032                         /* Indicate this operation is complete. The switch will poll\r
1033                          * with calls to WSPGetOverlappedResult(). */\r
1034 #ifdef _DEBUG_\r
1035                         cl_atomic_dec(&g_ibsp.overlap_h1_comp_count);\r
1036 \r
1037                         fzprint(("%s():%d:0x%x:0x%x: ov=0x%p h0=%d h1=%d h1_c=%d send=%d recv=%d\n",\r
1038                                          __FUNCTION__, __LINE__, GetCurrentProcessId(),\r
1039                                          GetCurrentThreadId(), lpOverlapped,\r
1040                                          g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count,\r
1041                                          g_ibsp.overlap_h1_comp_count, g_ibsp.send_count, g_ibsp.recv_count));\r
1042 #endif\r
1043 \r
1044                         CL_TRACE(IBSP_DBG_WQ, gdbg_lvl,\r
1045                                          ("%s: set internal overlapped=0x%p Internal=%d OffsetHigh=%d\n",\r
1046                                           __FUNCTION__, lpOverlapped, lpOverlapped->Internal,\r
1047                                           lpOverlapped->OffsetHigh));\r
1048 \r
1049                         lpOverlapped->Internal = 0;\r
1050                 }\r
1051                 else\r
1052                 {\r
1053 #ifdef _DEBUG_\r
1054                         cl_atomic_dec(&g_ibsp.overlap_h0_count);\r
1055 \r
1056 \r
1057                         fzprint(("%s():%d:0x%x:0x%x: ov=0x%p h0=%d h1=%d h1_c=%d send=%d recv=%d\n",\r
1058                                          __FUNCTION__, __LINE__, GetCurrentProcessId(),\r
1059                                          GetCurrentThreadId(), lpOverlapped,\r
1060                                          g_ibsp.overlap_h0_count, g_ibsp.overlap_h1_count,\r
1061                                          g_ibsp.overlap_h1_comp_count, g_ibsp.send_count, g_ibsp.recv_count));\r
1062 #endif\r
1063                         CL_TRACE(IBSP_DBG_WQ, gdbg_lvl,\r
1064                                          ("%s: calls lpWPUCompleteOverlappedRequest, overlapped=0x%p OffsetHigh=%d InternalHigh=%d hEvent=%d\n",\r
1065                                           __FUNCTION__, lpOverlapped, lpOverlapped->OffsetHigh,\r
1066                                           lpOverlapped->InternalHigh, lpOverlapped->hEvent));\r
1067 \r
1068                         ret = g_ibsp.up_call_table.lpWPUCompleteOverlappedRequest\r
1069                                 (socket_info->switch_socket,\r
1070                                  lpOverlapped,\r
1071                                  lpOverlapped->OffsetHigh, (DWORD) lpOverlapped->InternalHigh, &error);\r
1072 \r
1073                         if( ret != 0 )\r
1074                         {\r
1075                                 CL_ERROR(IBSP_DBG_EP, gdbg_lvl,\r
1076                                                  ("lpWPUCompleteOverlappedRequest failed with %d/%d\n", ret,\r
1077                                                   error));\r
1078                         }\r
1079                 }\r
1080                 cl_atomic_dec( &socket_info->dup_cnt );\r
1081         }\r
1082 \r
1083         IBSP_EXIT( IBSP_DBG_EP );\r
1084 }\r
1085 \r
1086 \r
1087 /* Closes a connection and release its ressources. */\r
1088 void\r
1089 shutdown_and_destroy_socket_info(\r
1090         IN      OUT                     struct ibsp_socket_info         *socket_info )\r
1091 {\r
1092         enum ibsp_socket_state  old_state;\r
1093 \r
1094         IBSP_ENTER( IBSP_DBG_EP );\r
1095 \r
1096         cl_spinlock_acquire( &socket_info->mutex );\r
1097         old_state = socket_info->socket_state;\r
1098         IBSP_CHANGE_SOCKET_STATE( socket_info, IBSP_CLOSED );\r
1099         cl_spinlock_release( &socket_info->mutex );\r
1100 \r
1101         if( socket_info->listen.handle )\r
1102         {\r
1103                 /* Stop listening and reject queued connections. */\r
1104                 ib_listen_cancel( socket_info );\r
1105         }\r
1106 \r
1107         cl_spinlock_acquire( &g_ibsp.socket_info_mutex );\r
1108         cl_qlist_remove_item( &g_ibsp.socket_info_list, &socket_info->item );\r
1109 \r
1110         switch( old_state )\r
1111         {\r
1112         case IBSP_CREATE:\r
1113         case IBSP_LISTEN:\r
1114                 /* Nothing to do. */\r
1115                 break;\r
1116 \r
1117         case IBSP_CONNECTED:\r
1118                 {\r
1119                         struct disconnect_reason reason;\r
1120 \r
1121                         memset( &reason, 0, sizeof(reason) );\r
1122                         reason.type = DISC_SHUTDOWN;\r
1123                         ib_disconnect( socket_info, &reason );\r
1124                 }\r
1125                 /* Fall through. */\r
1126 \r
1127         case IBSP_CONNECT:\r
1128         case IBSP_DISCONNECTED:\r
1129                 /* We changed the state - remove from connection map. */\r
1130                 CL_ASSERT( socket_info->conn_item.p_map );\r
1131                 cl_rbmap_remove_item( &g_ibsp.conn_map, &socket_info->conn_item );\r
1132                 break;\r
1133         }\r
1134         cl_spinlock_release( &g_ibsp.socket_info_mutex );\r
1135 \r
1136         /* Flush all completions. */\r
1137         if( socket_info->dup_cnt )\r
1138                 ibsp_dup_overlap_abort( socket_info );\r
1139 \r
1140         while( socket_info->send_cnt || socket_info->recv_cnt )\r
1141                 ib_cq_comp( socket_info->cq_tinfo );\r
1142 \r
1143         ibsp_dereg_socket( socket_info );\r
1144 \r
1145         ib_destroy_socket( socket_info );\r
1146 \r
1147 #ifdef IBSP_LOGGING\r
1148         DataLogger_Shutdown(&socket_info->SendDataLogger);\r
1149         DataLogger_Shutdown(&socket_info->RecvDataLogger);\r
1150 #endif\r
1151 \r
1152         /* Release the initial reference and clean up. */\r
1153         deref_socket_info( socket_info );\r
1154 \r
1155         IBSP_EXIT( IBSP_DBG_EP );\r
1156 }\r
1157 \r
1158 \r
1159 boolean_t\r
1160 ibsp_conn_insert(\r
1161         IN                              struct ibsp_socket_info         *s )\r
1162 {\r
1163         struct ibsp_socket_info         *p_sock;\r
1164         cl_rbmap_item_t                         *p_item, *p_insert_at;\r
1165         boolean_t                                       left = TRUE;\r
1166 \r
1167         p_item = cl_rbmap_root( &g_ibsp.conn_map );\r
1168         p_insert_at = p_item;\r
1169 \r
1170         cl_spinlock_acquire( &g_ibsp.socket_info_mutex );\r
1171         CL_ASSERT( !s->conn_item.p_map );\r
1172         while( p_item != cl_rbmap_end( &g_ibsp.conn_map ) )\r
1173         {\r
1174                 p_insert_at = p_item;\r
1175                 p_sock = PARENT_STRUCT( p_item, struct ibsp_socket_info, conn_item );\r
1176                 if( p_sock->local_addr.sin_family < s->local_addr.sin_family )\r
1177                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1178                 else if( p_sock->local_addr.sin_family > s->local_addr.sin_family )\r
1179                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1180                 else if( p_sock->local_addr.sin_addr.S_un.S_addr < s->local_addr.sin_addr.S_un.S_addr )\r
1181                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1182                 else if( p_sock->local_addr.sin_addr.S_un.S_addr > s->local_addr.sin_addr.S_un.S_addr )\r
1183                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1184                 else if( p_sock->local_addr.sin_port < s->local_addr.sin_port )\r
1185                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1186                 else if( p_sock->local_addr.sin_port > s->local_addr.sin_port )\r
1187                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1188                 else if( p_sock->peer_addr.sin_family < s->peer_addr.sin_family )\r
1189                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1190                 else if( p_sock->peer_addr.sin_family > s->peer_addr.sin_family )\r
1191                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1192                 else if( p_sock->peer_addr.sin_addr.S_un.S_addr < s->peer_addr.sin_addr.S_un.S_addr )\r
1193                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1194                 else if( p_sock->peer_addr.sin_addr.S_un.S_addr > s->peer_addr.sin_addr.S_un.S_addr )\r
1195                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1196                 else if( p_sock->peer_addr.sin_port < s->peer_addr.sin_port )\r
1197                         p_item = cl_rbmap_left( p_item ), left = TRUE;\r
1198                 else if( p_sock->peer_addr.sin_port > s->peer_addr.sin_port )\r
1199                         p_item = cl_rbmap_right( p_item ), left = FALSE;\r
1200                 else\r
1201                         goto done;\r
1202         }\r
1203 \r
1204         cl_rbmap_insert( &g_ibsp.conn_map, p_insert_at, &s->conn_item, left );\r
1205 \r
1206 done:\r
1207         cl_spinlock_release( &g_ibsp.socket_info_mutex );\r
1208         return p_item == cl_rbmap_end( &g_ibsp.conn_map );\r
1209 }\r
1210 \r
1211 \r
1212 void\r
1213 ibsp_conn_remove(\r
1214         IN                              struct ibsp_socket_info         *s )\r
1215 {\r
1216         cl_spinlock_acquire( &g_ibsp.socket_info_mutex );\r
1217         CL_ASSERT( s->conn_item.p_map );\r
1218         cl_rbmap_remove_item( &g_ibsp.conn_map, &s->conn_item );\r
1219         cl_spinlock_release( &g_ibsp.socket_info_mutex );\r
1220 }\r