[WSD] Rename some structures to make the code more readable.
[mirror/winof/.git] / ulp / wsd / user / ib_cm.c
1 /*\r
2  * Copyright (c) 2005 SilverStorm Technologies.  All rights reserved.\r
3  * Copyright (c) 2006 Mellanox Technologies.  All rights reserved.\r
4  *\r
5  * This software is available to you under the OpenIB.org BSD license\r
6  * below:\r
7  *\r
8  *     Redistribution and use in source and binary forms, with or\r
9  *     without modification, are permitted provided that the following\r
10  *     conditions are met:\r
11  *\r
12  *      - Redistributions of source code must retain the above\r
13  *        copyright notice, this list of conditions and the following\r
14  *        disclaimer.\r
15  *\r
16  *      - Redistributions in binary form must reproduce the above\r
17  *        copyright notice, this list of conditions and the following\r
18  *        disclaimer in the documentation and/or other materials\r
19  *        provided with the distribution.\r
20  *\r
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
22  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
23  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\r
24  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
25  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
26  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
27  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
28  * SOFTWARE.\r
29  *\r
30  * $Id$\r
31  */\r
32 #include "ibspdebug.h"\r
33 #if defined(EVENT_TRACING)\r
34 #include "ib_cm.tmh"\r
35 #endif\r
36 \r
37 #include "ibspdll.h"\r
38 \r
39 static void AL_API cm_req_callback(IN ib_cm_req_rec_t * p_cm_req_rec);\r
40 static void AL_API cm_rep_callback(IN ib_cm_rep_rec_t * p_cm_rep_rec);\r
41 static void AL_API cm_rtu_callback(IN ib_cm_rtu_rec_t * p_cm_rtu_rec);\r
42 static void AL_API cm_rej_callback(IN ib_cm_rej_rec_t * p_cm_rej_rec);\r
43 static void AL_API cm_mra_callback(IN ib_cm_mra_rec_t * p_cm_mra_rec);\r
44 static void AL_API cm_dreq_callback(IN ib_cm_dreq_rec_t * p_cm_dreq_rec);\r
45 static void AL_API listen_err_callback(IN ib_listen_err_rec_t * p_listen_err_rec);\r
46 static void AL_API cm_apr_callback(IN ib_cm_apr_rec_t * p_cm_apr_rec);\r
47 \r
48 \r
49 /* Computes a service ID for a port. */\r
50 static inline ib_net64_t\r
51 get_service_id_for_port(\r
52                                         ib_net16_t                                      ip_port)\r
53 {\r
54         return BASE_LISTEN_ID | ip_port;\r
55 }\r
56 \r
57 \r
58 /* Signals a select event to the switch. */\r
59 void\r
60 ibsp_post_select_event(\r
61                                         struct ibsp_socket_info         *socket_info,\r
62                                         int                                                     event,\r
63                                         int                                                     error )\r
64 {\r
65         HANDLE          h_event;\r
66 \r
67         IBSP_ENTER( IBSP_DBG_NEV );\r
68 \r
69         CL_ASSERT( socket_info );\r
70         CL_ASSERT( event );\r
71 \r
72         switch( event )\r
73         {\r
74         case FD_CONNECT:\r
75                 IBSP_PRINT(TRACE_LEVEL_INFORMATION, IBSP_DBG_NEV,\r
76                         ("socket %p FD_CONNECT\n", socket_info) );\r
77                 socket_info->errno_connect = error;\r
78                 break;\r
79 \r
80         case FD_ACCEPT:\r
81                 IBSP_PRINT(TRACE_LEVEL_INFORMATION, IBSP_DBG_NEV,\r
82                         ("socket %p FD_ACCEPT\n", socket_info) );\r
83                 break;\r
84 \r
85         default:\r
86                 CL_ASSERT( 0 );\r
87                 break;\r
88         }\r
89 \r
90         _InterlockedOr( &socket_info->network_events, event );\r
91 \r
92         h_event = InterlockedCompareExchangePointer(\r
93                 &socket_info->event_select, NULL, NULL );\r
94         /* Check for event notification request and signal as needed. */\r
95         if( (socket_info->event_mask & event) && h_event )\r
96         {\r
97                 IBSP_PRINT(TRACE_LEVEL_INFORMATION, IBSP_DBG_NEV,\r
98                         ("Signaling eventHandle %p at time %I64d.\n",\r
99                         h_event, cl_get_time_stamp() ) );\r
100                 SetEvent( h_event );\r
101         }\r
102 \r
103         IBSP_EXIT( IBSP_DBG_NEV );\r
104 }\r
105 \r
106 \r
107 /*\r
108  * A user-specified callback that is invoked after receiving a connection\r
109  * request message (REQ).\r
110  */\r
111 static void AL_API\r
112 cm_req_callback(\r
113         IN                              ib_cm_req_rec_t                         *p_cm_req_rec )\r
114 {\r
115         struct ibsp_socket_info *socket_info =\r
116                 (struct ibsp_socket_info * __ptr64)p_cm_req_rec->context;\r
117         struct listen_incoming *incoming;\r
118 \r
119         IBSP_ENTER( IBSP_DBG_CM );\r
120 \r
121         CL_ASSERT( socket_info );\r
122         CL_ASSERT( p_cm_req_rec->p_req_pdata );\r
123 \r
124         cl_spinlock_acquire( &socket_info->mutex1 );\r
125 \r
126         switch( socket_info->socket_state )\r
127         {\r
128         case IBSP_LISTEN:\r
129                 if( cl_qlist_count( &socket_info->listen.list ) >=\r
130                         socket_info->listen.backlog )\r
131                 {\r
132                         /* Already too many connection requests are queued */\r
133                         IBSP_PRINT(TRACE_LEVEL_INFORMATION, IBSP_DBG_CM,\r
134                                 ("already too many incoming connections, rejecting\n") );\r
135                         ib_reject( p_cm_req_rec->h_cm_req, IB_REJ_USER_DEFINED );\r
136                         break;\r
137                 }\r
138 \r
139                 incoming = HeapAlloc( g_ibsp.heap, 0, sizeof(struct listen_incoming) );\r
140                 if( !incoming )\r
141                 {\r
142                         /* Low on memory. */\r
143                         IBSP_ERROR( ("HeapAlloc failed, rejecting\n") );\r
144                         ib_reject( p_cm_req_rec->h_cm_req, IB_REJ_INSUF_RESOURCES );\r
145                         IBSP_EXIT( IBSP_DBG_CM );\r
146                         return;\r
147                 }\r
148 \r
149                 incoming->cm_req_received = *p_cm_req_rec;\r
150                 cl_memcpy( &incoming->params, p_cm_req_rec->p_req_pdata,\r
151                         sizeof(struct cm_req_params) );\r
152                 incoming->cm_req_received.p_req_pdata = (const uint8_t*)&incoming->params;\r
153 \r
154                 /* Add to the waiting list */\r
155                 cl_qlist_insert_tail( &socket_info->listen.list, &incoming->item );\r
156 \r
157                 ibsp_post_select_event( socket_info, FD_ACCEPT, 0 );\r
158                 break;\r
159 \r
160         case IBSP_DUPLICATING_REMOTE:\r
161                 {\r
162                         int ret;\r
163 \r
164                         /* Non-blocking cancel since we're in CM callback context */\r
165                         ib_cm_cancel( socket_info->listen.handle, NULL );\r
166                         socket_info->listen.handle = NULL;\r
167                         cl_spinlock_release( &socket_info->mutex1 );\r
168 \r
169                         wait_cq_drain( socket_info );\r
170 \r
171                         cl_spinlock_acquire( &socket_info->mutex1 );\r
172                         ret = ib_accept( socket_info, p_cm_req_rec );\r
173                         if( ret )\r
174                         {\r
175                                 cl_spinlock_release( &socket_info->mutex1 );\r
176                                 IBSP_ERROR( (\r
177                                         "ib_accept for duplicate socket returned %d, rejecting\n",\r
178                                         ret) );\r
179                                 /* Call ib_destroy_socket for above ib_create_socket() call */\r
180                                 ib_destroy_socket( socket_info );\r
181                                 ib_reject( p_cm_req_rec->h_cm_req, IB_REJ_USER_DEFINED );\r
182                                 ibsp_dup_overlap_abort( socket_info );\r
183                                 IBSP_EXIT( IBSP_DBG_CM );\r
184                                 return;\r
185                         }\r
186                 }\r
187                 break;\r
188 \r
189         default:\r
190                 IBSP_ERROR( ("socket is not listening anymore\n") );\r
191                 /* We're closing down - let some other listen match. */\r
192                 ib_reject( p_cm_req_rec->h_cm_req, IB_REJ_INVALID_SID );\r
193                 break;\r
194         }\r
195 \r
196         cl_spinlock_release( &socket_info->mutex1 );\r
197 \r
198         IBSP_EXIT( IBSP_DBG_CM );\r
199 }\r
200 \r
201 \r
202 /*\r
203  * A user-specified callback that is invoked after receiving a connection\r
204  * request reply message (REP).\r
205  */\r
206 static void AL_API\r
207 cm_rep_callback(\r
208         IN                              ib_cm_rep_rec_t                         *p_cm_rep_rec )\r
209 {\r
210         struct ibsp_socket_info *socket_info =\r
211                 (struct ibsp_socket_info * __ptr64)p_cm_rep_rec->qp_context;\r
212         ib_cm_rtu_t cm_rtu;\r
213         ib_api_status_t status;\r
214 \r
215         IBSP_ENTER( IBSP_DBG_CM );\r
216 \r
217         memset( &cm_rtu, 0, sizeof(cm_rtu) );\r
218 \r
219         cm_rtu.access_ctrl = IB_AC_RDMA_READ | IB_AC_RDMA_WRITE | IB_AC_LOCAL_WRITE;\r
220 #if 0\r
221         // Bug in TAVOR\r
222         cm_rtu.sq_depth = QP_ATTRIB_SQ_DEPTH;\r
223         cm_rtu.rq_depth = QP_ATTRIB_RQ_DEPTH;\r
224 #endif\r
225         cm_rtu.pfn_cm_apr_cb = cm_apr_callback;\r
226         cm_rtu.pfn_cm_dreq_cb = cm_dreq_callback;\r
227 \r
228         cl_spinlock_acquire( &socket_info->mutex1 );\r
229 \r
230         switch( socket_info->socket_state )\r
231         {\r
232         case IBSP_CONNECT:\r
233                 status = ib_cm_rtu( p_cm_rep_rec->h_cm_rep, &cm_rtu );\r
234                 if( status != IB_SUCCESS )\r
235                 {\r
236                         /* Note: a REJ has been automatically sent. */\r
237                         IBSP_ERROR( ("ib_cm_rtu returned %s\n", ib_get_err_str( status )) );\r
238                         IBSP_CHANGE_SOCKET_STATE( socket_info, IBSP_BIND );\r
239 \r
240                         /* We changed the state - remove from connection map. */\r
241                         ibsp_conn_remove( socket_info );\r
242 \r
243                         ibsp_post_select_event( socket_info, FD_CONNECT, WSAETIMEDOUT );\r
244                 }\r
245                 else\r
246                 {\r
247                         IBSP_CHANGE_SOCKET_STATE( socket_info, IBSP_CONNECTED );\r
248                         ibsp_post_select_event( socket_info, FD_CONNECT, 0 );\r
249                 }\r
250                 break;\r
251 \r
252         case IBSP_DUPLICATING_NEW:\r
253                 status = ib_cm_rtu( p_cm_rep_rec->h_cm_rep, &cm_rtu );\r
254                 if( status != IB_SUCCESS )\r
255                 {\r
256                         IBSP_CHANGE_SOCKET_STATE( socket_info, IBSP_BIND );\r
257 \r
258                         /* We changed the state - remove from connection map. */\r
259                         ibsp_conn_remove( socket_info );\r
260 \r
261                         /* Note: a REJ has been automatically sent. */\r
262                         IBSP_ERROR( ("ib_cm_rtu returned %s\n", ib_get_err_str( status )) );\r
263                 }\r
264                 else\r
265                 {\r
266                         IBSP_CHANGE_SOCKET_STATE( socket_info, IBSP_CONNECTED );\r
267                 }\r
268                 SetEvent( socket_info->h_event );\r
269                 break;\r
270 \r
271         default:\r
272                 /* The socket might be closing */\r
273                 IBSP_ERROR( ("socket %p not in connecting state (%s)\n",\r
274                         socket_info, IBSP_SOCKET_STATE_STR( socket_info->socket_state )) );\r
275 \r
276                 ib_reject( p_cm_rep_rec->h_cm_rep, IB_REJ_USER_DEFINED );\r
277         }\r
278 \r
279         cl_spinlock_release( &socket_info->mutex1 );\r
280 \r
281         IBSP_EXIT( IBSP_DBG_CM );\r
282 }\r
283 \r
284 \r
285 /*\r
286  * A user-specified callback that is invoked after receiving a connection\r
287  * ready to use message (RTU).\r
288  */\r
289 static void AL_API\r
290 cm_rtu_callback(\r
291         IN                              ib_cm_rtu_rec_t                         *p_cm_rtu_rec )\r
292 {\r
293         struct ibsp_socket_info *socket_info =\r
294                 (struct ibsp_socket_info * __ptr64)p_cm_rtu_rec->qp_context;\r
295 \r
296         IBSP_ENTER( IBSP_DBG_CM );\r
297 \r
298         cl_spinlock_acquire( &socket_info->mutex1 );\r
299 \r
300         if( socket_info->socket_state == IBSP_DUPLICATING_REMOTE )\r
301         {\r
302                 struct _recv_wr         *wr;\r
303                 ib_api_status_t         status;\r
304                 uint8_t                         idx;\r
305 \r
306                 /* Repost all the WR to the new QP */\r
307                 cl_spinlock_acquire( &socket_info->recv_lock );\r
308 \r
309                 while( socket_info->dup_cnt )\r
310                 {\r
311                         if( (socket_info->recv_cnt + socket_info->dup_cnt) >\r
312                                 QP_ATTRIB_RQ_DEPTH )\r
313                         {\r
314                                 CL_ASSERT( (socket_info->recv_cnt + socket_info->dup_cnt) <=\r
315                                         QP_ATTRIB_RQ_DEPTH );\r
316                                 /* TODO: Flag the socket as having failed. */\r
317                                 break;\r
318                         }\r
319 \r
320 \r
321                         /* Figure out the starting index in the duplicate array. */\r
322                         idx = socket_info->dup_idx - (uint8_t)socket_info->dup_cnt;\r
323                         if( idx >= QP_ATTRIB_RQ_DEPTH )\r
324                         {\r
325                                 /* The duplicates wrap over the end of the array. */\r
326                                 idx += QP_ATTRIB_RQ_DEPTH;\r
327                         }\r
328 \r
329                         /*\r
330                          * Copy the duplicate work request from the duplicate array\r
331                          * to the receive array.\r
332                          */\r
333                         socket_info->recv_wr[socket_info->recv_idx] =\r
334                                 socket_info->dup_wr[idx];\r
335 \r
336                         wr = &socket_info->recv_wr[socket_info->recv_idx];\r
337 \r
338                         /* Update the work request ID. */\r
339                         wr->recv.wr_id = (uint64_t)(void* __ptr64)wr;\r
340 \r
341                         /*\r
342                          * Increment the count before posting so it doesn't go\r
343                          * negative in the completion path.\r
344                          */\r
345                         cl_atomic_inc( &socket_info->recv_cnt );\r
346 \r
347                         status = ib_post_recv( socket_info->qp, &wr->recv, NULL );\r
348 \r
349                         if( status == IB_SUCCESS )\r
350                         {\r
351                                 /* Update the index and wrap as needed */\r
352 #if QP_ATTRIB_RQ_DEPTH == 256 || QP_ATTRIB_RQ_DEPTH == 128 || \\r
353         QP_ATTRIB_RQ_DEPTH == 64 || QP_ATTRIB_RQ_DEPTH == 32 || \\r
354         QP_ATTRIB_RQ_DEPTH == 16 || QP_ATTRIB_RQ_DEPTH == 8\r
355                                 socket_info->recv_idx++;\r
356                                 socket_info->recv_idx &= (QP_ATTRIB_RQ_DEPTH - 1);\r
357 #else\r
358                                 if( ++socket_info->recv_idx == QP_ATTRIB_RQ_DEPTH )\r
359                                         socket_info->recv_idx = 0;\r
360 #endif\r
361 \r
362                                 cl_atomic_dec( &socket_info->dup_cnt );\r
363                         }\r
364                         else\r
365                         {\r
366                                 IBSP_ERROR( (\r
367                                         "ib_post_recv returned %s for reposted buffer\n",\r
368                                         ib_get_err_str( status )) );\r
369 \r
370                                 cl_atomic_dec( &socket_info->recv_cnt );\r
371                                 CL_ASSERT( status == IB_SUCCESS );\r
372                                 /* TODO: Flag the socket as having failed. */\r
373                                 break;\r
374                         }\r
375                 }\r
376 \r
377                 cl_spinlock_release( &socket_info->recv_lock );\r
378 \r
379                 socket_info->qp_error = 0;\r
380                 IBSP_CHANGE_SOCKET_STATE( socket_info, IBSP_CONNECTED );\r
381         }\r
382         else if( socket_info->socket_state != IBSP_CONNECTED )\r
383         {\r
384                 /* The Socket might be closing */\r
385                 IBSP_ERROR( ("Got RTU while in socket_state %s - ignoring\n",\r
386                         IBSP_SOCKET_STATE_STR( socket_info->socket_state )) );\r
387         }\r
388 \r
389         cl_spinlock_release( &socket_info->mutex1 );\r
390 \r
391         IBSP_EXIT( IBSP_DBG_CM );\r
392 }\r
393 \r
394 \r
395 /* Force the QP to error state to flush posted work requests. */\r
396 static inline void\r
397 __flush_qp(\r
398         IN                              struct ibsp_socket_info         *p_socket )\r
399 {\r
400         ib_qp_mod_t                     qp_mod;\r
401         ib_api_status_t         status;\r
402 \r
403         memset( &qp_mod, 0, sizeof(qp_mod) );\r
404         qp_mod.req_state = IB_QPS_ERROR;\r
405         status = ib_modify_qp( p_socket->qp, &qp_mod );\r
406         if( status != IB_SUCCESS )\r
407         {\r
408                 IBSP_ERROR( ("ib_modify_qp returned %s\n", ib_get_err_str( status )) );\r
409                 p_socket->send_cnt = 0;\r
410                 p_socket->recv_cnt = 0;\r
411         }\r
412 }\r
413 \r
414 \r
415 /*\r
416  * A user-specified callback that is invoked after receiving a connection\r
417  * rejection message (REJ).\r
418  */\r
419 static void AL_API\r
420 cm_rej_callback(\r
421         IN                              ib_cm_rej_rec_t                         *p_cm_rej_rec )\r
422 {\r
423         struct ibsp_socket_info *socket_info =\r
424                 (struct ibsp_socket_info * __ptr64)p_cm_rej_rec->qp_context;\r
425 \r
426         IBSP_ENTER( IBSP_DBG_CM );\r
427 \r
428         IBSP_PRINT(TRACE_LEVEL_INFORMATION, IBSP_DBG_CM, ("socket %p connect reject, reason=%d\n",\r
429                 socket_info, cl_ntoh16(p_cm_rej_rec->rej_status)) );\r
430 \r
431         cl_spinlock_acquire( &socket_info->mutex1 );\r
432 \r
433         switch( socket_info->socket_state )\r
434         {\r
435         case IBSP_CONNECT:\r
436                 /* Remove from connection map. */\r
437                 ibsp_conn_remove( socket_info );\r
438 \r
439                 IBSP_CHANGE_SOCKET_STATE( socket_info, IBSP_BIND );\r
440                 if( p_cm_rej_rec->rej_status == IB_REJ_TIMEOUT )\r
441                         ibsp_post_select_event( socket_info, FD_CONNECT, WSAETIMEDOUT );\r
442                 else\r
443                         ibsp_post_select_event( socket_info, FD_CONNECT, WSAECONNREFUSED );\r
444                 break;\r
445 \r
446         case IBSP_CONNECTED:\r
447                 /*\r
448                  * DISCONNECTED is a terminal state.  We'll remove the connection\r
449                  * when the socket gets destroyed.\r
450                  */\r
451                 IBSP_CHANGE_SOCKET_STATE( socket_info, IBSP_DISCONNECTED );\r
452 \r
453                 socket_info->qp_error = WSAECONNABORTED;\r
454 \r
455                 __flush_qp( socket_info );\r
456                 break;\r
457 \r
458         case IBSP_DUPLICATING_NEW:\r
459                 /* Leave in that state. IBSPSocket will eventually return \r
460                  * an error becaus the socket is not connected. */\r
461                 ibsp_conn_remove( socket_info );\r
462                 SetEvent( socket_info->h_event );\r
463                 break;\r
464 \r
465         default:\r
466                 IBSP_ERROR( ("socket %p got an REJ reason %d in state %s\n",\r
467                         socket_info, cl_ntoh16( p_cm_rej_rec->rej_status ),\r
468                         IBSP_SOCKET_STATE_STR(socket_info->socket_state)) );\r
469                 break;\r
470         }\r
471 \r
472         cl_spinlock_release( &socket_info->mutex1 );\r
473 \r
474         IBSP_EXIT( IBSP_DBG_CM );\r
475 }\r
476 \r
477 \r
478 /*\r
479  * A user-specified callback that is invoked after receiving a message\r
480  * received acknowledgement.\r
481  */\r
482 static void AL_API\r
483 cm_mra_callback(\r
484         IN                              ib_cm_mra_rec_t                         *p_cm_mra_rec )\r
485 {\r
486         /* TODO */\r
487         IBSP_ENTER( IBSP_DBG_CM );\r
488 \r
489         UNUSED_PARAM( p_cm_mra_rec );\r
490 \r
491         IBSP_EXIT( IBSP_DBG_CM );\r
492 }\r
493 \r
494 \r
495 /*\r
496  * A user-specified callback that is invoked after receiving a disconnect\r
497  * request message (DREQ).\r
498  */\r
499 static void AL_API\r
500 cm_dreq_callback(\r
501         IN                              ib_cm_dreq_rec_t                        *p_cm_dreq_rec )\r
502 {\r
503         ib_api_status_t status;\r
504         ib_cm_drep_t cm_drep;\r
505         struct disconnect_reason *reason;\r
506         struct ibsp_socket_info *socket_info =\r
507                 (struct ibsp_socket_info * __ptr64)p_cm_dreq_rec->qp_context;\r
508 \r
509         IBSP_ENTER( IBSP_DBG_CM );\r
510         IBSP_PRINT(TRACE_LEVEL_INFORMATION, IBSP_DBG_CM,\r
511                 ("socket=%p state=%s\n",\r
512                 socket_info, IBSP_SOCKET_STATE_STR( socket_info->socket_state )) );\r
513 \r
514         reason = (struct disconnect_reason * __ptr64)p_cm_dreq_rec->p_dreq_pdata;\r
515 \r
516         cl_spinlock_acquire( &socket_info->mutex1 );\r
517 \r
518         if( socket_info->socket_state == IBSP_CONNECTED )\r
519         {\r
520                 switch( reason->type )\r
521                 {\r
522                 case DISC_DUPLICATING:\r
523                         {\r
524                                 int ret;\r
525 \r
526                                 IBSP_CHANGE_SOCKET_STATE( socket_info, IBSP_DUPLICATING_REMOTE );\r
527                                 socket_info->qp_error = -1;\r
528                                 socket_info->duplicate.identifier = reason->duplicating.identifier;\r
529                                 socket_info->duplicate.dwProcessId = reason->duplicating.dwProcessId;\r
530 \r
531                                 /* Now, setup our listening callback. */\r
532                                 socket_info->listen.listen_req_param.dwProcessId =\r
533                                         reason->duplicating.dwProcessId;\r
534                                 socket_info->listen.listen_req_param.identifier =\r
535                                         reason->duplicating.identifier;\r
536 \r
537                                 ret = ib_listen( socket_info );\r
538                                 if( !ret )\r
539                                 {\r
540                                         /* We changed the state - remove from connection map. */\r
541                                         ibsp_conn_remove( socket_info );\r
542                                         break;\r
543                                 }\r
544 \r
545                                 IBSP_ERROR_EXIT( ("ib_listen failed with %d\n", ret) );\r
546                                 /* Fall through. */\r
547                         }\r
548                 default:\r
549                         /* Right now, treat anything as a normal disconnect. */\r
550                         IBSP_CHANGE_SOCKET_STATE( socket_info, IBSP_DISCONNECTED );\r
551                         /*\r
552                          * DISCONNECTED is a terminal state.  We'll remove the connection\r
553                          * when the socket gets destroyed.\r
554                          */\r
555                         socket_info->qp_error = WSAECONNRESET;\r
556                 }\r
557 \r
558                 memset( &cm_drep, 0, sizeof(cm_drep) );\r
559 \r
560                 status = ib_cm_drep( p_cm_dreq_rec->h_cm_dreq, &cm_drep );\r
561                 if( status != IB_SUCCESS )\r
562                         IBSP_ERROR( ("ib_cm_drep returned %s\n", ib_get_err_str( status )) );\r
563         }\r
564         cl_spinlock_release( &socket_info->mutex1 );\r
565 \r
566         IBSP_EXIT( IBSP_DBG_CM );\r
567 }\r
568 \r
569 \r
570 /*\r
571  * A user-specified callback that is invoked after receiving a disconnect\r
572  *      reply message.\r
573  */\r
574 static void AL_API\r
575 cm_drep_callback(\r
576         IN                              ib_cm_drep_rec_t                        *p_cm_drep_rec )\r
577 {\r
578         IBSP_ENTER( IBSP_DBG_CM );\r
579         UNUSED_PARAM( p_cm_drep_rec );\r
580         IBSP_EXIT( IBSP_DBG_CM );\r
581 }\r
582 \r
583 \r
584 /*\r
585  * A user-specified callback that is invoked after an error has occurred on\r
586  * a listen request.\r
587  */\r
588 static void AL_API\r
589 listen_err_callback(\r
590         IN                              ib_listen_err_rec_t                     *p_listen_err_rec )\r
591 {\r
592         /* TODO */\r
593         IBSP_ENTER( IBSP_DBG_CM );\r
594 \r
595         UNUSED_PARAM( p_listen_err_rec );\r
596 \r
597         IBSP_ERROR( ("not implemented") );\r
598 \r
599         CL_ASSERT( 0 );\r
600 \r
601         IBSP_EXIT( IBSP_DBG_CM );\r
602 }\r
603 \r
604 \r
605 /*\r
606  * A user-specified callback that is invoked after receiving a load\r
607  * alternate path response message.\r
608  */\r
609 static void AL_API\r
610 cm_apr_callback(\r
611         IN                              ib_cm_apr_rec_t                         *p_cm_apr_rec )\r
612 {\r
613         /* TODO */\r
614         IBSP_ENTER( IBSP_DBG_CM );\r
615 \r
616         UNUSED_PARAM( p_cm_apr_rec );\r
617 \r
618         IBSP_ERROR( ("not implemented") );\r
619 \r
620         CL_ASSERT( 0 );\r
621 \r
622         IBSP_EXIT( IBSP_DBG_CM );\r
623 }\r
624 \r
625 \r
626 /*\r
627  * A user-specified callback that is invoked after receiving a load\r
628  * alternate path message.\r
629  *\r
630  * SYNOPSIS\r
631  */\r
632 static void AL_API\r
633 cm_lap_callback(\r
634         IN                              ib_cm_lap_rec_t                         *p_cm_lap_rec )\r
635 {\r
636         /* TODO */\r
637         IBSP_ENTER( IBSP_DBG_CM );\r
638 \r
639         UNUSED_PARAM( p_cm_lap_rec );\r
640 \r
641         IBSP_ERROR( ("not implemented") );\r
642 \r
643         CL_ASSERT( 0 );\r
644 \r
645         IBSP_EXIT( IBSP_DBG_CM );\r
646 }\r
647 \r
648 \r
649 /* Listen for an incoming connection. */\r
650 int\r
651 ib_listen(\r
652         IN                              struct ibsp_socket_info         *socket_info )\r
653 {\r
654         ib_cm_listen_t param;\r
655         ib_api_status_t status;\r
656 \r
657         IBSP_ENTER( IBSP_DBG_CM );\r
658 \r
659         memset( &param, 0, sizeof(param) );\r
660 \r
661         param.svc_id = get_service_id_for_port( socket_info->local_addr.sin_port );\r
662         if( socket_info->port )\r
663         {\r
664                 /* The socket is bound to an IP address */\r
665                 param.ca_guid = socket_info->port->hca->guid;\r
666                 param.port_guid = socket_info->port->guid;\r
667         }\r
668         else\r
669         {\r
670                 /* The socket is bound to INADDR_ANY */\r
671                 param.ca_guid = IB_ALL_CAS;\r
672                 param.port_guid = IB_ALL_PORTS;\r
673         }\r
674         param.lid = IB_ALL_LIDS;\r
675 \r
676         param.p_compare_buffer = (uint8_t *) & socket_info->listen.listen_req_param;\r
677         param.compare_length = sizeof(struct listen_req_param);\r
678         param.compare_offset = offsetof(struct cm_req_params, listen_req_param);\r
679 \r
680         fzprint(("%s():%d:0x%x:0x%x: socket=0x%p params: %x %x\n", __FUNCTION__,\r
681                          __LINE__, GetCurrentProcessId(),\r
682                          GetCurrentThreadId(), socket_info,\r
683                          socket_info->listen.listen_req_param.dwProcessId,\r
684                          socket_info->listen.listen_req_param.identifier));\r
685 \r
686         param.pfn_cm_req_cb = cm_req_callback;\r
687 \r
688         param.qp_type = IB_QPT_RELIABLE_CONN;\r
689 \r
690         status = ib_cm_listen( g_ibsp.al_handle, &param, listen_err_callback, socket_info,      /* context */\r
691                 &socket_info->listen.handle );\r
692 \r
693         if( status != IB_SUCCESS )\r
694         {\r
695                 IBSP_ERROR_EXIT( ("ib_cm_listen failed (0x%d)\n", status) );\r
696                 return ibal_to_wsa_error( status );\r
697         }\r
698 \r
699         STAT_INC( listen_num );\r
700 \r
701         IBSP_PRINT_EXIT(TRACE_LEVEL_INFORMATION, IBSP_DBG_CM,\r
702                 ("started listening for port %d\n",\r
703                 CL_HTON16( socket_info->local_addr.sin_port )) );\r
704 \r
705         return 0;\r
706 }\r
707 \r
708 \r
709 /* Reject all the queued incoming connection requests. */\r
710 void\r
711 ib_listen_backlog(\r
712         IN                              struct ibsp_socket_info         *socket_info,\r
713         IN                              int                                                     backlog )\r
714 {\r
715         cl_list_item_t *item;\r
716         struct listen_incoming *incoming;\r
717 \r
718         socket_info->listen.backlog = backlog;\r
719 \r
720         while(\r
721                 cl_qlist_count( &socket_info->listen.list ) > (uint32_t)backlog )\r
722         {\r
723                 item = cl_qlist_remove_tail( &socket_info->listen.list );\r
724 \r
725                 incoming = PARENT_STRUCT(item, struct listen_incoming, item);\r
726 \r
727                 ib_reject( incoming->cm_req_received.h_cm_req, IB_REJ_USER_DEFINED );\r
728 \r
729                 HeapFree( g_ibsp.heap, 0, incoming );\r
730         }\r
731 }\r
732 \r
733 \r
734 /* Stop listening on the socket. */\r
735 void\r
736 ib_listen_cancel(\r
737         IN                              struct ibsp_socket_info         *socket_info )\r
738 {\r
739         ib_api_status_t status;\r
740 \r
741         IBSP_ENTER( IBSP_DBG_CM );\r
742 \r
743         status = ib_cm_cancel( socket_info->listen.handle, ib_sync_destroy );\r
744         if( status )\r
745         {\r
746                 IBSP_ERROR( (\r
747                         "ib_cm_cancel returned %s\n", ib_get_err_str( status )) );\r
748         }\r
749         else\r
750         {\r
751                 STAT_DEC( listen_num );\r
752         }\r
753 \r
754         /* We can empty the queue now. Since we are closing, \r
755          * no new entry will be added. */\r
756         cl_spinlock_acquire( &socket_info->mutex1 );\r
757         ib_listen_backlog( socket_info, 0 );\r
758         cl_spinlock_release( &socket_info->mutex1 );\r
759 \r
760         socket_info->listen.handle = NULL;\r
761 \r
762         IBSP_EXIT( IBSP_DBG_CM );\r
763 }\r
764 \r
765 \r
766 int\r
767 ib_connect(\r
768         IN                              struct ibsp_socket_info         *socket_info,\r
769         IN                              ib_path_rec_t                           *path_rec )\r
770 {\r
771         ib_cm_req_t cm_req;\r
772         ib_api_status_t status;\r
773         struct cm_req_params params;\r
774 \r
775         IBSP_ENTER( IBSP_DBG_CM );\r
776 \r
777         fzprint(("%s():%d:0x%x:0x%x: socket=0x%p \n", __FUNCTION__,\r
778                          __LINE__, GetCurrentProcessId(), GetCurrentThreadId(), socket_info));\r
779 \r
780         IBSP_PRINT(TRACE_LEVEL_INFORMATION, IBSP_DBG_CM, ("From:\n") );\r
781         DebugPrintSockAddr( IBSP_DBG_CM, &socket_info->local_addr );\r
782         IBSP_PRINT(TRACE_LEVEL_INFORMATION, IBSP_DBG_CM, ("To:\n") );\r
783         DebugPrintSockAddr( IBSP_DBG_CM, &socket_info->peer_addr );\r
784 \r
785         /* Insert into the connection map. */\r
786         if( !ibsp_conn_insert( socket_info ) )\r
787         {\r
788                 IBSP_EXIT( IBSP_DBG_CM );\r
789                 return WSAEADDRINUSE;\r
790         }\r
791 \r
792         memset( &cm_req, 0, sizeof(cm_req) );\r
793 \r
794         cm_req.svc_id = get_service_id_for_port( socket_info->peer_addr.sin_port );\r
795         cm_req.max_cm_retries = g_max_cm_retries;\r
796         cm_req.p_primary_path = path_rec;\r
797         cm_req.pfn_cm_rep_cb = cm_rep_callback;\r
798 \r
799         cm_req.p_req_pdata = (uint8_t *) & params;\r
800         params.source = socket_info->local_addr;\r
801         params.dest = socket_info->peer_addr;\r
802         params.listen_req_param.dwProcessId = socket_info->duplicate.dwProcessId;\r
803         params.listen_req_param.identifier = socket_info->duplicate.identifier;\r
804 \r
805         IBSP_PRINT(TRACE_LEVEL_INFORMATION, IBSP_DBG_CM,\r
806                 ("ib_connect listen params: %x \n", params.listen_req_param.dwProcessId\r
807                 /*params.listen_req_param.identifier*/));\r
808         IBSP_PRINT(TRACE_LEVEL_INFORMATION, IBSP_DBG_CM,\r
809                 ("connecting to port %d, SID=%016I64x\n", socket_info->peer_addr.sin_port,\r
810                 cm_req.svc_id) );\r
811 \r
812         cm_req.req_length = sizeof(struct cm_req_params);\r
813 \r
814         cm_req.qp_type = IB_QPT_RELIABLE_CONN;\r
815         cm_req.h_qp = socket_info->qp;\r
816         cm_req.resp_res = QP_ATTRIB_RESPONDER_RESOURCES;\r
817         cm_req.init_depth = QP_ATTRIB_INITIATOR_DEPTH;\r
818 \r
819         cm_req.remote_resp_timeout =\r
820                 ib_path_rec_pkt_life( path_rec ) + CM_REMOTE_TIMEOUT;\r
821         if( cm_req.remote_resp_timeout > 0x1F )\r
822                 cm_req.remote_resp_timeout = 0x1F;\r
823         else if( cm_req.remote_resp_timeout < CM_MIN_REMOTE_TIMEOUT )\r
824                 cm_req.remote_resp_timeout = CM_MIN_REMOTE_TIMEOUT;\r
825 \r
826         cm_req.flow_ctrl = TRUE;        /* HCAs must support end-to-end flow control. */\r
827 \r
828         cm_req.local_resp_timeout =\r
829                 ib_path_rec_pkt_life( path_rec ) + CM_LOCAL_TIMEOUT;\r
830         if( cm_req.local_resp_timeout > 0x1F )\r
831                 cm_req.local_resp_timeout = 0x1F;\r
832         else if( cm_req.local_resp_timeout < CM_MIN_LOCAL_TIMEOUT )\r
833                 cm_req.local_resp_timeout = CM_MIN_LOCAL_TIMEOUT;\r
834 \r
835         cm_req.rnr_nak_timeout = QP_ATTRIB_RNR_NAK_TIMEOUT;\r
836         cm_req.rnr_retry_cnt = QP_ATTRIB_RNR_RETRY;\r
837         cm_req.retry_cnt = g_qp_retries;\r
838         cm_req.p_alt_path = NULL;\r
839         cm_req.pfn_cm_mra_cb = cm_mra_callback;\r
840         cm_req.pfn_cm_rej_cb = cm_rej_callback;\r
841 \r
842         status = ib_cm_req( &cm_req );\r
843         if( status != IB_SUCCESS )\r
844         {\r
845                 /* Remove from connection map. */\r
846                 ibsp_conn_remove( socket_info );\r
847 \r
848                 IBSP_ERROR_EXIT( ("ib_cm_req failed (0x%d)\n", status) );\r
849                 return WSAEHOSTUNREACH;\r
850         }\r
851 \r
852         IBSP_EXIT( IBSP_DBG_CM );\r
853         /* Operation is pending */\r
854         return WSAEWOULDBLOCK;\r
855 }\r
856 \r
857 \r
858 void\r
859 ib_reject(\r
860         IN                              const ib_cm_handle_t            h_cm,\r
861         IN                              const ib_rej_status_t           rej_status )\r
862 {\r
863         ib_cm_rej_t cm_rej;\r
864         ib_api_status_t status;\r
865 \r
866         IBSP_ENTER( IBSP_DBG_CM );\r
867 \r
868         memset( &cm_rej, 0, sizeof(cm_rej) );\r
869         cm_rej.rej_status = rej_status;\r
870 \r
871         status = ib_cm_rej( h_cm, &cm_rej );\r
872         if( status != IB_SUCCESS )\r
873                 IBSP_ERROR( ("ib_cm_rej returned %s\n", ib_get_err_str( status )) );\r
874 \r
875         IBSP_EXIT( IBSP_DBG_CM );\r
876 }\r
877 \r
878 \r
879 int\r
880 ib_accept(\r
881         IN                              struct ibsp_socket_info         *socket_info,\r
882         IN                              ib_cm_req_rec_t                         *cm_req_received )\r
883 {\r
884         ib_cm_rep_t cm_rep;\r
885         ib_api_status_t status;\r
886 \r
887         IBSP_ENTER( IBSP_DBG_CM );\r
888 \r
889         /* Insert into the connection map. */\r
890         if( !ibsp_conn_insert( socket_info ) )\r
891         {\r
892                 IBSP_EXIT( IBSP_DBG_CM );\r
893                 return WSAEADDRINUSE;\r
894         }\r
895 \r
896         memset( &cm_rep, 0, sizeof(cm_rep) );\r
897 \r
898         cm_rep.qp_type = IB_QPT_RELIABLE_CONN;\r
899         cm_rep.h_qp = socket_info->qp;\r
900         cm_rep.access_ctrl = IB_AC_RDMA_READ | IB_AC_RDMA_WRITE | IB_AC_LOCAL_WRITE;\r
901 #if 0\r
902         // Bug in TAVOR\r
903         cm_rep.sq_depth = QP_ATTRIB_SQ_DEPTH;\r
904         cm_rep.rq_depth = QP_ATTRIB_RQ_DEPTH;\r
905 #endif\r
906         cm_rep.init_depth = QP_ATTRIB_INITIATOR_DEPTH;\r
907         cm_rep.target_ack_delay = 10;\r
908         cm_rep.failover_accepted = IB_FAILOVER_ACCEPT_UNSUPPORTED;\r
909         cm_rep.flow_ctrl = cm_req_received->flow_ctrl;\r
910         cm_rep.rnr_nak_timeout = QP_ATTRIB_RNR_NAK_TIMEOUT;\r
911         cm_rep.rnr_retry_cnt = cm_req_received->rnr_retry_cnt;\r
912         cm_rep.pfn_cm_mra_cb = cm_mra_callback;\r
913         cm_rep.pfn_cm_rej_cb = cm_rej_callback;\r
914         cm_rep.pfn_cm_rtu_cb = cm_rtu_callback;\r
915         cm_rep.pfn_cm_lap_cb = cm_lap_callback;\r
916         cm_rep.pfn_cm_dreq_cb = cm_dreq_callback;\r
917 \r
918         fzprint(("%s():%d:0x%x:0x%x: flow_ctrl=%d rnr_retry_cnt=%d\n", __FUNCTION__,\r
919                          __LINE__, GetCurrentProcessId(),\r
920                          GetCurrentThreadId(), cm_rep.flow_ctrl, cm_rep.rnr_retry_cnt));\r
921 \r
922         status = ib_cm_rep( cm_req_received->h_cm_req, &cm_rep );\r
923         if( status != IB_SUCCESS )\r
924         {\r
925                 /* Remove from connection map. */\r
926                 ibsp_conn_remove( socket_info );\r
927 \r
928                 IBSP_ERROR_EXIT( ("ib_cm_rep failed (0x%s) at time %I64d\n",\r
929                         ib_get_err_str( status ), cl_get_time_stamp()) );\r
930                 return WSAEACCES;\r
931         }\r
932 \r
933         IBSP_EXIT( IBSP_DBG_CM );\r
934         return 0;\r
935 }\r
936 \r
937 \r
938 void\r
939 ib_disconnect(\r
940         IN                              struct ibsp_socket_info         *socket_info,\r
941         IN                              struct disconnect_reason        *reason )\r
942 {\r
943         ib_api_status_t         status;\r
944         ib_cm_dreq_t            cm_dreq;\r
945 \r
946         IBSP_ENTER( IBSP_DBG_CM );\r
947 \r
948         memset( &cm_dreq, 0, sizeof(cm_dreq) );\r
949 \r
950         cm_dreq.qp_type = IB_QPT_RELIABLE_CONN;\r
951         cm_dreq.h_qp = socket_info->qp;\r
952         cm_dreq.pfn_cm_drep_cb = cm_drep_callback;\r
953 \r
954         cm_dreq.p_dreq_pdata = (uint8_t *) reason;\r
955         cm_dreq.dreq_length = sizeof(struct disconnect_reason);\r
956 \r
957         status = ib_cm_dreq( &cm_dreq );\r
958 \r
959         /*\r
960          * If both sides initiate disconnection, we might get\r
961          * an invalid state or handle here.\r
962          */\r
963         if( status != IB_SUCCESS && status != IB_INVALID_STATE &&\r
964                 status != IB_INVALID_HANDLE )\r
965         {\r
966                 IBSP_ERROR( ("ib_cm_dreq returned %s\n", ib_get_err_str( status )) );\r
967         }\r
968 \r
969         /*\r
970          * Note that we don't care about getting the DREP - we move the QP to the\r
971          * error state now and flush all posted work requests.  If the\r
972          * disconnection was graceful, we'll only have the pre-posted receives to\r
973          * flush.  If the disconnection is ungraceful, we don't care if we\r
974          * interrupt transfers.\r
975          */\r
976 \r
977         /* Move the QP to error to flush any work requests. */\r
978         __flush_qp( socket_info );\r
979 \r
980         IBSP_EXIT( IBSP_DBG_CM );\r
981 }\r