2 * Copyright (c) 2005 SilverStorm Technologies. All rights reserved.
\r
3 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
\r
5 * This software is available to you under the OpenIB.org BSD license
\r
8 * Redistribution and use in source and binary forms, with or
\r
9 * without modification, are permitted provided that the following
\r
10 * conditions are met:
\r
12 * - Redistributions of source code must retain the above
\r
13 * copyright notice, this list of conditions and the following
\r
16 * - Redistributions in binary form must reproduce the above
\r
17 * copyright notice, this list of conditions and the following
\r
18 * disclaimer in the documentation and/or other materials
\r
19 * provided with the distribution.
\r
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
\r
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
\r
23 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
\r
24 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
\r
25 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
\r
26 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
\r
27 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
\r
33 #include <iba/ib_al.h>
\r
39 #include "al_cm_shared.h"
\r
40 #include "al_debug.h"
\r
42 #if defined(EVENT_TRACING)
\r
46 #include "al_cm.tmh"
\r
52 #include "ib_common.h"
\r
59 * Note: Running Comm id is a global number in this implementation.
\r
61 atomic32_t local_comm_id = 1;
\r
64 * The global cm object has a lock, as do connection objects. It is OK
\r
65 * to acquire the global cm object's lock when already holding a connection
\r
66 * object's lock. This means that it is not possible to acquire a connection's
\r
67 * lock after acquiring the global cm's lock. Connection objects provide
\r
68 * reference counting to allow releasing the global cm's lock and acquiring
\r
69 * the connection's lock.
\r
71 * Callbacks where users are expected to call a CM function hold a reference
\r
72 * on the connection object until the expected call is made. These include:
\r
73 * REQ callback - expect REP or REJ
\r
74 * REP callback - expect RTU or REJ
\r
75 * LAP callback - expect APR
\r
76 * DREQ callback - expect DREP
\r
77 * The following callbacks do not expect a user call.
\r
78 * RTU callback - connection is established.
\r
79 * DREP callback - connection is released.
\r
80 * APR callback - LAP transaction is complete.
\r
81 * MRA callback - notification only.
\r
83 * Connections are stored in the connection map once established using a 64-bit
\r
84 * key that is generated as (local_comm_id << 32 | remote_comm_id) if using the
\r
85 * connection as data source, or (remote_comm_id <<32 | local_comm_id) if using
\r
86 * a received MAD as source.
\r
91 /* Global instance of the CM agent. */
\r
92 al_cm_agent_t *gp_cm = NULL;
\r
97 __parse_cm_info(void);
\r
101 __process_cm_send_comp(
\r
102 IN cl_async_proc_item_t *p_item );
\r
105 /* debug functions */
\r
109 #if defined( _DEBUG_ )
\r
110 cl_list_item_t *p_item;
\r
111 al_listen_t *p_listen;
\r
115 AL_ENTER( AL_DBG_CM );
\r
117 cl_spinlock_acquire( &gp_cm->obj.lock );
\r
118 AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_CM,
\r
120 "\tactive_listen_list..: %"PRIdSIZE_T"\n"
\r
121 "\tinactive_listen_list: %"PRIdSIZE_T"\n"
\r
122 "\tpending_list........: %"PRIdSIZE_T"\n"
\r
123 "\ttime_wait_list......: %"PRIdSIZE_T"\n"
\r
124 "\tconn_pool...........: %"PRIdSIZE_T"\n"
\r
125 "\treq_pool............: %"PRIdSIZE_T"\n"
\r
126 "\tconn_map............: %"PRIdSIZE_T"\n",
\r
127 cl_qlist_count( &gp_cm->active_listen_list ),
\r
128 cl_qlist_count( &gp_cm->inactive_listen_list ),
\r
129 cl_qlist_count( &gp_cm->pending_list ),
\r
130 cl_qlist_count( &gp_cm->time_wait_list ),
\r
131 cl_qpool_count( &gp_cm->conn_pool ),
\r
132 cl_pool_count( &gp_cm->req_pool ),
\r
133 cl_qmap_count( &gp_cm->conn_map ) ) );
\r
134 /* walk lists for connection info */
\r
135 AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_CM, ("Active listens:\n") );
\r
138 for( p_item = cl_qlist_head( &gp_cm->active_listen_list );
\r
139 p_item != cl_qlist_end( &gp_cm->active_listen_list );
\r
140 p_item = cl_qlist_next( p_item ) )
\r
142 p_listen = PARENT_STRUCT( p_item, al_listen_t, list_item );
\r
144 AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_CM,
\r
146 "\tservice_id...:0x%I64x\n"
\r
147 "\tlid..........:0x%x\n"
\r
148 "\tport_guid....:0x%I64x\n"
\r
149 "\tqp_type......:%d\n",
\r
151 p_listen->info.svc_id, p_listen->info.lid,
\r
152 p_listen->info.port_guid, p_listen->info.qp_type ) );
\r
156 AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_CM, ("Inactive listens:\n") );
\r
159 for( p_item = cl_qlist_head( &gp_cm->inactive_listen_list );
\r
160 p_item != cl_qlist_end( &gp_cm->inactive_listen_list );
\r
161 p_item = cl_qlist_next( p_item ) )
\r
163 p_listen = PARENT_STRUCT( p_item, al_listen_t, list_item );
\r
165 AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_CM,
\r
167 "\tservice_id...:0x%I64x\n"
\r
168 "\tlid..........:0x%x\n"
\r
169 "\tport_guid....:0x%I64x\n"
\r
170 "\tqp_type......:%d\n",
\r
172 p_listen->info.svc_id, p_listen->info.lid,
\r
173 p_listen->info.port_guid, p_listen->info.qp_type ) );
\r
177 AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_CM, ("In Timewait:\n") );
\r
180 for( p_item = cl_qlist_head( &gp_cm->time_wait_list );
\r
181 p_item != cl_qlist_end( &gp_cm->time_wait_list );
\r
182 p_item = cl_qlist_next( p_item ) )
\r
184 p_conn = PARENT_STRUCT( p_item, al_conn_t, map_item );
\r
186 AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_CM,
\r
188 "\tstate........:%d\n"
\r
189 "\tref_cnt......:%d\n"
\r
190 "\tlocal_id.....:0x%x\n"
\r
191 "\tremote_id....:0x%x\n"
\r
192 "\tlocal_qpn....:0x%x\n"
\r
193 "\tremote_qpn...:0x%x\n",
\r
195 p_conn->state, p_conn->ref_cnt, p_conn->local_comm_id,
\r
196 p_conn->remote_comm_id,
\r
197 cl_ntoh32(p_conn->local_qpn),
\r
198 cl_ntoh32(p_conn->remote_qpn) ) );
\r
201 cl_spinlock_release( &gp_cm->obj.lock );
\r
203 AL_EXIT( AL_DBG_CM );
\r
209 * PnP callback invoked when deregistration is complete.
\r
211 /***static***/ void
\r
215 AL_ENTER( AL_DBG_CM );
\r
217 UNUSED_PARAM( context );
\r
219 deref_al_obj( &gp_cm->obj );
\r
221 AL_EXIT( AL_DBG_CM );
\r
226 * Starts immediate cleanup of the CM. Invoked during al_obj destruction.
\r
228 /***static***/ void
\r
230 IN al_obj_t* p_obj )
\r
232 ib_api_status_t status;
\r
234 AL_ENTER( AL_DBG_CM );
\r
236 CL_ASSERT( &gp_cm->obj == p_obj );
\r
237 UNUSED_PARAM( p_obj );
\r
239 /* Deregister from PnP notifications. */
\r
242 status = ib_dereg_pnp( gp_cm->h_pnp, __cm_pnp_dereg_cb );
\r
243 if( status != IB_SUCCESS )
\r
245 AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
246 ("ib_dereg_pnp failed with status %s.\n",
\r
247 ib_get_err_str(status)) );
\r
248 deref_al_obj( &gp_cm->obj );
\r
252 /* Cancel all timewait timers. */
\r
254 AL_EXIT( AL_DBG_CM );
\r
259 * Frees the global CM agent. Invoked during al_obj destruction.
\r
263 IN al_obj_t* p_obj )
\r
265 AL_ENTER( AL_DBG_CM );
\r
267 CL_ASSERT( &gp_cm->obj == p_obj );
\r
268 /* All listen request should have been cleaned up by this point. */
\r
269 CL_ASSERT( cl_is_qlist_empty( &gp_cm->active_listen_list ) );
\r
270 CL_ASSERT( cl_is_qlist_empty( &gp_cm->inactive_listen_list ) );
\r
271 /* All connections should have been cancelled/disconnected by now. */
\r
272 CL_ASSERT( cl_is_qlist_empty( &gp_cm->pending_list ) );
\r
273 CL_ASSERT( cl_is_qmap_empty( &gp_cm->conn_map ) );
\r
277 //***TODO: Figure out the right handling of the time wait state.
\r
278 //***TODO: Must cancel all timewait timers.
\r
279 /* Return all connection objects in the time-wait state to their pool. */
\r
280 cl_qpool_put_list( &gp_cm->conn_pool, &gp_cm->time_wait_list );
\r
283 * All CM port agents should have been destroyed by now via the
\r
284 * standard child object destruction provided by the al_obj.
\r
286 cl_qpool_destroy( &gp_cm->conn_pool );
\r
287 cl_pool_destroy( &gp_cm->req_pool );
\r
288 destroy_al_obj( p_obj );
\r
293 AL_EXIT( AL_DBG_CM );
\r
298 * Performs immediate cleanup of resources.
\r
300 /***static***/ void
\r
301 __destroying_port_cm(
\r
302 IN al_obj_t *p_obj )
\r
304 cm_port_agent_t *p_port_cm;
\r
306 AL_ENTER( AL_DBG_CM );
\r
308 p_port_cm = PARENT_STRUCT( p_obj, cm_port_agent_t, obj );
\r
310 if( p_port_cm->h_qp )
\r
312 ib_destroy_qp( p_port_cm->h_qp, (ib_pfn_destroy_cb_t)deref_al_obj );
\r
313 p_port_cm->h_qp = NULL;
\r
316 if( p_port_cm->h_pd )
\r
318 ib_dealloc_pd( p_port_cm->h_pd, (ib_pfn_destroy_cb_t)deref_al_obj );
\r
319 p_port_cm->h_pd = NULL;
\r
322 AL_EXIT( AL_DBG_CM );
\r
328 * Release all resources allocated by a port CM agent. Finishes any cleanup
\r
329 * for a port agent.
\r
331 /***static***/ void
\r
333 IN al_obj_t *p_obj )
\r
335 cm_port_agent_t *p_port_cm;
\r
337 AL_ENTER( AL_DBG_CM );
\r
339 p_port_cm = PARENT_STRUCT( p_obj, cm_port_agent_t, obj );
\r
341 CL_ASSERT( cl_is_qlist_empty( &p_port_cm->av_list ) );
\r
343 if( p_port_cm->h_ca )
\r
344 deref_al_obj( &p_port_cm->h_ca->obj );
\r
346 cl_spinlock_destroy( &p_port_cm->lock );
\r
347 destroy_al_obj( &p_port_cm->obj );
\r
348 cl_free( p_port_cm );
\r
350 AL_EXIT( AL_DBG_CM );
\r
356 IN cm_port_agent_t* const p_port_cm,
\r
357 IN ib_mad_element_t *p_mad )
\r
359 ib_api_status_t status = IB_SUCCESS;
\r
360 ib_av_handle_t h_av;
\r
361 ib_av_attr_t av_attr;
\r
362 cl_list_item_t *p_list_item;
\r
364 AL_ENTER( AL_DBG_CM );
\r
366 cl_spinlock_acquire( &p_port_cm->lock );
\r
368 /* Search for an existing AV that can be used. */
\r
369 for( p_list_item = cl_qlist_head( &p_port_cm->av_list );
\r
370 p_list_item != cl_qlist_end( &p_port_cm->av_list );
\r
371 p_list_item = cl_qlist_next( p_list_item ) )
\r
373 h_av = PARENT_STRUCT( p_list_item, ib_av_t, list_item );
\r
375 /* verify sl level */
\r
376 if( p_mad->remote_sl != h_av->av_attr.sl )
\r
379 if( p_mad->grh_valid == FALSE )
\r
381 /* verify source */
\r
382 if( p_mad->path_bits != h_av->av_attr.path_bits )
\r
385 /* verify destination */
\r
386 if( p_mad->remote_lid != h_av->av_attr.dlid )
\r
391 /* verify source */
\r
392 if( cl_memcmp( &p_mad->p_grh->src_gid.raw,
\r
393 &h_av->av_attr.grh.src_gid.raw,
\r
394 sizeof(ib_gid_t) ) )
\r
397 /* verify destination */
\r
398 if( cl_memcmp( &p_mad->p_grh->dest_gid.raw,
\r
399 &h_av->av_attr.grh.dest_gid.raw,
\r
400 sizeof(ib_gid_t) ) )
\r
404 ref_al_obj( &h_av->obj );
\r
405 p_mad->h_av = h_av;
\r
409 cl_spinlock_release( &p_port_cm->lock );
\r
411 if( p_list_item == cl_qlist_end( &p_port_cm->av_list ) )
\r
413 /* Create a new AV. */
\r
414 /* format av info */
\r
415 cl_memclr( &av_attr, sizeof(ib_av_attr_t) );
\r
416 av_attr.sl = p_mad->remote_sl;
\r
417 av_attr.static_rate = IB_PATH_RECORD_RATE_10_GBS;
\r
418 av_attr.path_bits = p_mad->path_bits;
\r
419 av_attr.dlid = p_mad->remote_lid;
\r
420 av_attr.port_num = (uint8_t)( p_port_cm->port_idx + 1 );
\r
421 av_attr.grh_valid = p_mad->grh_valid;
\r
422 if( p_mad->grh_valid == TRUE )
\r
423 av_attr.grh = *p_mad->p_grh;
\r
425 status = ib_create_av( p_port_cm->h_pd, &av_attr, &h_av );
\r
426 if( status == IB_SUCCESS )
\r
428 /* take a reference on the object and add to av list */
\r
429 ref_al_obj( &h_av->obj );
\r
430 p_mad->h_av = h_av;
\r
431 cl_spinlock_acquire( &p_port_cm->lock );
\r
432 cl_qlist_insert_tail( &p_port_cm->av_list, &h_av->list_item );
\r
433 cl_spinlock_release( &p_port_cm->lock );
\r
437 AL_EXIT( AL_DBG_CM );
\r
443 IN cm_port_agent_t* const p_port_cm,
\r
444 IN ib_mad_element_t *p_mad )
\r
449 AL_ENTER( AL_DBG_CM );
\r
451 h_av = p_mad->h_av;
\r
455 /* Remove the reference on the AL object. */
\r
456 cl_spinlock_acquire( &p_port_cm->lock );
\r
457 ref_cnt = deref_al_obj( &h_av->obj );
\r
458 CL_ASSERT( ref_cnt >= 1 );
\r
461 cl_qlist_remove_item( &p_port_cm->av_list, &h_av->list_item );
\r
462 cl_spinlock_release( &p_port_cm->lock );
\r
464 ib_destroy_av( h_av );
\r
468 cl_spinlock_release( &p_port_cm->lock );
\r
471 AL_EXIT( AL_DBG_CM );
\r
479 IN const ib_al_handle_t h_al,
\r
480 IN const ib_qp_type_t qp_type )
\r
482 cl_pool_item_t *p_item;
\r
486 AL_ENTER( AL_DBG_CM );
\r
487 CL_ASSERT( h_al && h_al->obj.type == AL_OBJ_TYPE_H_AL );
\r
489 cl_spinlock_acquire( &gp_cm->obj.lock );
\r
490 p_item = cl_qpool_get( &gp_cm->conn_pool );
\r
493 cl_spinlock_release( &gp_cm->obj.lock );
\r
494 AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
495 ("cl_qpool_get failed for al_conn_t.\n") );
\r
499 p_conn = PARENT_STRUCT( p_item, al_conn_t, map_item );
\r
501 p_conn->p_req_info = cl_pool_get( &gp_cm->req_pool );
\r
502 if( !p_conn->p_req_info )
\r
504 cl_qpool_put( &gp_cm->conn_pool, (cl_pool_item_t*)&p_conn->map_item );
\r
505 cl_spinlock_release( &gp_cm->obj.lock );
\r
506 AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
507 ("cl_pool_get failed for conn_req_t.\n") );
\r
510 cl_spinlock_release( &gp_cm->obj.lock );
\r
513 * Increment the counter in the comm ID to meet REJ retry requirements
\r
514 * from IB spec section 12.9.8.7
\r
516 counter = (uint8_t)(p_conn->local_comm_id >> 24) + 1;
\r
517 p_conn->local_comm_id &= CM_CONN_POOL_MAX_MASK;
\r
518 p_conn->local_comm_id |= (((net32_t)counter) << 24);
\r
520 /* Clear out the request information. */
\r
521 cl_memclr( p_conn->p_req_info, sizeof(conn_req_t) );
\r
522 p_conn->p_req_info->qp_mod_rtr.req_state = IB_QPS_RTR;
\r
523 p_conn->p_req_info->qp_mod_rts.req_state = IB_QPS_RTS;
\r
524 p_conn->p_req_info->port_idx = (uint8_t)-1;
\r
525 p_conn->p_req_info->alt_port_idx = (uint8_t)-1;
\r
527 /* Initialize the connection parameters. */
\r
528 p_conn->ref_cnt = 1;
\r
530 /* set version to highest known version */
\r
531 p_conn->p_req_info->class_ver = IB_MCLASS_CM_VER_2;
\r
533 p_conn->qp_type = qp_type;
\r
534 p_conn->p_req_info->xport_type = qp_type;
\r
536 p_conn->h_qp = NULL;
\r
538 p_conn->local_qpn = 0;
\r
540 p_conn->remote_qpn = 0;
\r
542 /* init event based logic for synchronous operations */
\r
543 p_conn->p_sync_event = NULL;
\r
545 p_conn->pfn_cm_apr_cb = NULL;
\r
546 p_conn->pfn_cm_drep_cb = NULL;
\r
547 p_conn->pfn_cm_dreq_cb = NULL;
\r
548 p_conn->pfn_cm_lap_cb = NULL;
\r
550 p_conn->remote_ca_guid = 0;
\r
551 p_conn->remote_comm_id = 0;
\r
553 cl_memclr( &p_conn->path, sizeof( ib_path_rec_t ) * 2 );
\r
555 p_conn->state = CM_CONN_RESET;
\r
557 p_conn->was_active = TRUE;
\r
558 p_conn->target_ack_delay = 0;
\r
561 * Initialize the MAD header. The only field that needs to be set
\r
562 * is the attribute ID.
\r
564 p_conn->mads.hdr.attr_mod = 0;
\r
565 p_conn->mads.hdr.base_ver = 1;
\r
566 p_conn->mads.hdr.class_spec = 0;
\r
567 p_conn->mads.hdr.method = IB_MAD_METHOD_SEND;
\r
568 p_conn->mads.hdr.mgmt_class = IB_MCLASS_COMM_MGMT;
\r
569 p_conn->mads.hdr.resv = 0;
\r
570 p_conn->mads.hdr.status = 0;
\r
571 p_conn->mads.hdr.trans_id = (ib_net64_t)cl_hton32( p_conn->local_comm_id );
\r
573 /* class version is set to the user's request */
\r
574 p_conn->mads.hdr.class_ver = IB_MCLASS_CM_VER_2;
\r
576 al_insert_conn( h_al, p_conn );
\r
577 if( p_conn->hdl == AL_INVALID_HANDLE )
\r
579 al_remove_conn( p_conn );
\r
580 cl_spinlock_acquire( &gp_cm->obj.lock );
\r
581 cl_pool_put( &gp_cm->req_pool, p_conn->p_req_info );
\r
582 p_conn->p_req_info = NULL;
\r
583 cl_qpool_put( &gp_cm->conn_pool, (cl_pool_item_t*)&p_conn->map_item );
\r
584 cl_spinlock_release( &gp_cm->obj.lock );
\r
585 AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
586 ("al_insert_conn failed for conn_req_t.\n") );
\r
590 ref_al_obj( &gp_cm->obj );
\r
591 AL_EXIT( AL_DBG_CM );
\r
597 IN al_conn_t* const p_conn )
\r
599 AL_ENTER( AL_DBG_CM );
\r
601 cl_atomic_inc( &p_conn->ref_cnt );
\r
603 AL_EXIT( AL_DBG_CM );
\r
610 IN const ib_cm_handle_t h_conn )
\r
613 * Set the reference count to 1, then decrement it to force cleanup.
\r
614 * Note that if we are in this function, the connection object has not
\r
615 * been cleaned up properly. We are simply trying to cleanup the best
\r
618 h_conn->ref_cnt = 1;
\r
619 __deref_conn( h_conn );
\r
626 IN al_conn_t* const p_conn )
\r
628 ib_qp_handle_t h_qp;
\r
630 AL_ENTER( AL_DBG_CM );
\r
633 * Destruction of the connection object and unbinding it from the QP
\r
634 * must be atomic. The QP will try to destroy the connection object
\r
635 * when the QP is destroyed. We use the global CM lock to synchronize
\r
636 * changes to the connection's QP handle.
\r
638 cl_spinlock_acquire( &gp_cm->obj.lock );
\r
639 if( !cl_atomic_dec( &p_conn->ref_cnt ) )
\r
641 /* Free the connection request information. */
\r
642 __release_req_info( p_conn );
\r
645 * Unbind the QP while holding the CM lock -- see above.
\r
646 * Note that we should not have a QP if we are at this point. The
\r
647 * only way this can occur is if we did not properly cleanup the
\r
648 * QP in some other code path (such as disconnection).
\r
650 h_qp = p_conn->h_qp;
\r
651 cm_unbind_qp( p_conn );
\r
653 /* Remove the connection from AL, if not already done. */
\r
655 al_remove_conn( p_conn );
\r
658 * The connection should be in the RESET state. If it is not, then
\r
659 * we have most likely timed out trying to cleanup the connection.
\r
660 * Attempting to recover from such poor behavior now.
\r
662 switch( p_conn->state )
\r
664 case CM_CONN_REQ_RCVD:
\r
665 case CM_CONN_REQ_SENT:
\r
666 case CM_CONN_REQ_MRA_RCVD:
\r
667 case CM_CONN_REQ_MRA_SENT:
\r
668 case CM_CONN_REP_RCVD:
\r
669 case CM_CONN_REP_SENT:
\r
670 case CM_CONN_REP_MRA_RCVD:
\r
671 case CM_CONN_REP_MRA_SENT:
\r
672 /* We're a pending connection. */
\r
673 cl_qlist_remove_item( &gp_cm->pending_list,
\r
674 (cl_list_item_t*)&p_conn->map_item );
\r
677 case CM_CONN_ESTABLISHED:
\r
678 case CM_CONN_LAP_RCVD:
\r
679 case CM_CONN_LAP_SENT:
\r
680 case CM_CONN_LAP_MRA_RCVD:
\r
681 case CM_CONN_LAP_MRA_SENT:
\r
682 case CM_CONN_DREQ_SENT:
\r
683 case CM_CONN_DREQ_RCVD:
\r
684 case CM_CONN_DREP_SENT:
\r
685 /* We're still in the connection map. */
\r
686 cl_qmap_remove_item( &gp_cm->conn_map, &p_conn->map_item );
\r
689 case CM_CONN_TIMEWAIT:
\r
690 /* We're still in the time wait list. */
\r
691 cl_qlist_remove_item( &gp_cm->time_wait_list,
\r
692 (cl_list_item_t*)&p_conn->map_item );
\r
696 /* We shouldn't be in any lists or maps. */
\r
700 /* Return the connection object to the free pool. */
\r
701 cl_qpool_put( &gp_cm->conn_pool,
\r
702 (cl_pool_item_t*)&p_conn->map_item );
\r
703 cl_spinlock_release( &gp_cm->obj.lock );
\r
707 /* We shouldn't have a QP at this point. */
\r
708 AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
709 ("QP (%016I64x) still referenced by connection object\n", (LONG_PTR)h_qp) );
\r
710 cm_reset_qp( h_qp, 0 );
\r
711 deref_al_obj( &h_qp->obj );
\r
713 deref_al_obj( &gp_cm->obj );
\r
717 cl_spinlock_release( &gp_cm->obj.lock );
\r
720 AL_EXIT( AL_DBG_CM );
\r
728 IN ib_net16_t port_lid,
\r
733 uint16_t path_bits;
\r
737 lid1 = CL_NTOH16(lid);
\r
738 lid2 = CL_NTOH16(port_lid);
\r
745 path_bits = (uint16_t)( (path_bits << 1) | 1 );
\r
754 if (lid != port_lid)
\r
765 IN cm_port_agent_t* const p_port_cm,
\r
766 IN al_conn_t* const p_conn )
\r
768 ib_mad_element_t *p_mad;
\r
769 ib_api_status_t status;
\r
772 AL_ENTER( AL_DBG_CM );
\r
774 CL_ASSERT( p_conn );
\r
776 /* Get a MAD from the pool. */
\r
777 status = ib_get_mad( p_port_cm->pool_key, MAD_BLOCK_SIZE, &p_mad );
\r
778 if( status != IB_SUCCESS )
\r
780 AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
781 ("ib_get_mad failed with status %s.\n", ib_get_err_str(status)) );
\r
785 /* Set the addressing information in the MAD. */
\r
786 if( p_conn->path[p_conn->idx_primary].hop_flow_raw.val )
\r
788 cl_memclr( &grh, sizeof( ib_grh_t ) );
\r
789 p_mad->p_grh = &grh;
\r
790 grh.ver_class_flow = ib_grh_set_ver_class_flow(
\r
791 1, p_conn->path[p_conn->idx_primary].tclass,
\r
792 ib_path_rec_flow_lbl( &p_conn->path[p_conn->idx_primary] ) );
\r
794 ib_path_rec_hop_limit( &p_conn->path[p_conn->idx_primary] );
\r
795 grh.src_gid = p_conn->path[p_conn->idx_primary].sgid;
\r
796 grh.dest_gid = p_conn->path[p_conn->idx_primary].dgid;
\r
800 p_mad->p_grh = NULL;
\r
803 ib_path_rec_sl( &p_conn->path[p_conn->idx_primary] );
\r
804 p_mad->remote_lid = p_conn->path[p_conn->idx_primary].dlid;
\r
805 p_mad->path_bits = 0;
\r
806 p_mad->remote_qp = IB_QP1;
\r
807 p_mad->send_opt = IB_SEND_OPT_SIGNALED;
\r
808 p_mad->remote_qkey = IB_QP1_WELL_KNOWN_Q_KEY;
\r
809 switch( p_conn->mads.hdr.attr_id )
\r
811 case CM_REQ_ATTR_ID:
\r
812 case CM_REP_ATTR_ID:
\r
813 case CM_LAP_ATTR_ID:
\r
814 case CM_DREQ_ATTR_ID:
\r
816 * REQ, REP, LAP, and DREQ are retried until either a response is
\r
817 * received or the operation times out.
\r
819 p_mad->resp_expected = TRUE;
\r
820 p_mad->retry_cnt = p_conn->max_cm_retries;
\r
821 p_mad->timeout_ms = p_conn->retry_timeout;
\r
826 * All other CM MADs are sent once, and repeated if the previous MAD
\r
827 * is received again.
\r
829 p_mad->resp_expected = FALSE;
\r
830 p_mad->retry_cnt = 0;
\r
831 p_mad->timeout_ms = 0;
\r
835 /* Copy the mad contents. */
\r
836 cl_memcpy( p_mad->p_mad_buf, &p_conn->mads, MAD_BLOCK_SIZE );
\r
838 /* Set the contexts. */
\r
839 p_mad->context1 = p_conn;
\r
840 p_mad->context2 = NULL;
\r
841 /* reference the connection for which we are sending the MAD. */
\r
842 __ref_conn( p_conn );
\r
844 /* Store the mad service handle in the connection for cancelling. */
\r
845 p_conn->h_mad_svc = p_port_cm->h_mad_svc;
\r
847 /* Get AV for the send */
\r
848 p_mad->h_av = NULL;
\r
849 status = __get_av( p_port_cm, p_mad );
\r
850 if( status != IB_SUCCESS )
\r
852 __deref_conn( p_conn );
\r
853 ib_put_mad( p_mad );
\r
854 AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
855 ("__get_av failed with status %s.\n", ib_get_err_str(status)) );
\r
859 /* Fire in the hole! */
\r
860 p_conn->p_send_mad = p_mad;
\r
861 status = ib_send_mad( p_port_cm->h_mad_svc, p_mad, NULL );
\r
862 if( status != IB_SUCCESS )
\r
864 p_conn->p_send_mad = NULL;
\r
865 __deref_conn( p_conn );
\r
866 ib_put_mad( p_mad );
\r
867 AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
868 ("ib_send_mad failed with status %s.\n", ib_get_err_str(status)) );
\r
872 AL_EXIT( AL_DBG_CM );
\r
880 IN cm_port_agent_t* const p_port_cm,
\r
881 IN ib_mad_element_t* const p_mad )
\r
883 ib_api_status_t status;
\r
885 AL_ENTER( AL_DBG_CM );
\r
887 CL_ASSERT( p_port_cm );
\r
888 CL_ASSERT( p_mad );
\r
889 CL_ASSERT( !p_mad->resp_expected );
\r
891 /* Use the mad's attributes already present */
\r
893 /* Set the contexts. */
\r
894 p_mad->context1 = NULL;
\r
895 p_mad->context2 = NULL;
\r
897 /* Get AV for the send */
\r
900 status = __get_av( p_port_cm, p_mad );
\r
901 if( status != IB_SUCCESS )
\r
903 ib_put_mad( p_mad );
\r
904 AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
905 ("__get_av failed with status %s.\n", ib_get_err_str(status)) );
\r
910 status = ib_send_mad( p_port_cm->h_mad_svc, p_mad, NULL );
\r
911 if( status != IB_SUCCESS )
\r
913 ib_put_mad( p_mad );
\r
914 AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
915 ("ib_send_mad failed with status %s.\n", ib_get_err_str(status)) );
\r
919 AL_EXIT( AL_DBG_CM );
\r
927 IN cm_port_agent_t* const p_port_cm,
\r
928 IN al_conn_t* const p_conn,
\r
929 IN ib_mad_element_t* const p_mad )
\r
931 ib_api_status_t status;
\r
933 AL_ENTER( AL_DBG_CM );
\r
935 CL_ASSERT( p_port_cm );
\r
936 CL_ASSERT( p_conn );
\r
937 CL_ASSERT( p_mad );
\r
939 /* Repeat the last mad sent for the connection. */
\r
940 cm_res_acquire( p_conn );
\r
941 switch( p_conn->state )
\r
943 case CM_CONN_REQ_SENT:
\r
945 * CM_CONN_REQ_SENT is only valid for peer requests that win
\r
946 * the peer comparisson.
\r
948 CL_ASSERT( p_conn->p_req_info->pfn_cm_req_cb );
\r
949 case CM_CONN_REP_SENT:
\r
950 case CM_CONN_ESTABLISHED:
\r
951 case CM_CONN_DREQ_SENT:
\r
952 case CM_CONN_DREP_SENT:
\r
953 case CM_CONN_TIMEWAIT:
\r
954 cl_memcpy( p_mad->p_mad_buf, &p_conn->mads, sizeof(mad_cm_req_t) );
\r
955 p_mad->send_context1 = NULL;
\r
956 p_mad->send_context2 = NULL;
\r
957 status = __cm_send_mad( p_port_cm, p_mad );
\r
958 if( status == IB_SUCCESS )
\r
961 /* Failure. Fall through. */
\r
964 /* Return the MAD to the mad pool */
\r
965 ib_put_mad( p_mad );
\r
968 cm_res_release( p_conn );
\r
970 AL_EXIT( AL_DBG_CM );
\r
976 IN ib_mad_svc_handle_t h_mad_svc,
\r
978 IN ib_mad_element_t *p_mad )
\r
981 cm_async_mad_t *p_async_mad;
\r
983 AL_ENTER( AL_DBG_CM );
\r
985 UNUSED_PARAM( h_mad_svc );
\r
987 CL_ASSERT( p_mad->p_next == NULL );
\r
989 p_hdr = (ib_mad_t*)p_mad->p_mad_buf;
\r
991 p_async_mad = (cm_async_mad_t*)cl_zalloc( sizeof(cm_async_mad_t) );
\r
994 ib_put_mad( p_mad );
\r
995 AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
996 ("failed to cl_zalloc cm_async_mad_t (%d bytes)\n",
\r
997 sizeof(cm_async_mad_t)) );
\r
1001 p_async_mad->p_port_cm = (cm_port_agent_t*)context;
\r
1002 p_async_mad->p_mad = p_mad;
\r
1004 switch( p_hdr->attr_id )
\r
1006 case CM_REQ_ATTR_ID:
\r
1007 p_async_mad->item.pfn_callback = __process_cm_req;
\r
1010 case CM_MRA_ATTR_ID:
\r
1011 p_async_mad->item.pfn_callback = __process_cm_mra;
\r
1014 case CM_REJ_ATTR_ID:
\r
1015 p_async_mad->item.pfn_callback = __process_cm_rej;
\r
1018 case CM_REP_ATTR_ID:
\r
1019 p_async_mad->item.pfn_callback = __process_cm_rep;
\r
1022 case CM_RTU_ATTR_ID:
\r
1023 p_async_mad->item.pfn_callback = __process_cm_rtu;
\r
1026 case CM_DREQ_ATTR_ID:
\r
1027 p_async_mad->item.pfn_callback = __process_cm_dreq;
\r
1030 case CM_DREP_ATTR_ID:
\r
1031 p_async_mad->item.pfn_callback = __process_cm_drep;
\r
1034 case CM_LAP_ATTR_ID:
\r
1035 p_async_mad->item.pfn_callback = __process_cm_lap;
\r
1038 case CM_APR_ATTR_ID:
\r
1039 p_async_mad->item.pfn_callback = __process_cm_apr;
\r
1042 case CM_SIDR_REQ_ATTR_ID:
\r
1043 p_async_mad->item.pfn_callback = __process_cm_sidr_req;
\r
1046 case CM_SIDR_REP_ATTR_ID:
\r
1047 p_async_mad->item.pfn_callback = __process_cm_sidr_rep;
\r
1051 cl_free( p_async_mad );
\r
1052 ib_put_mad( p_mad );
\r
1053 AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
1054 ("Invalid CM MAD attribute ID.\n") );
\r
1058 /* Queue the MAD for asynchronous processing. */
\r
1059 cl_async_proc_queue( gp_async_proc_mgr, &p_async_mad->item );
\r
1061 AL_EXIT( AL_DBG_CM );
\r
1067 IN ib_mad_svc_handle_t h_mad_svc,
\r
1069 IN ib_mad_element_t *p_mad )
\r
1071 cm_async_mad_t *p_async_mad;
\r
1073 AL_ENTER( AL_DBG_CM );
\r
1075 UNUSED_PARAM( h_mad_svc );
\r
1076 CL_ASSERT( p_mad->p_next == NULL );
\r
1078 p_async_mad = (cm_async_mad_t*)cl_zalloc( sizeof(cm_async_mad_t) );
\r
1079 if( !p_async_mad )
\r
1081 ib_put_mad( p_mad );
\r
1082 AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
1083 ("failed to cl_zalloc cm_async_mad_t (%d bytes)\n",
\r
1084 sizeof(cm_async_mad_t)) );
\r
1088 p_async_mad->p_port_cm = (cm_port_agent_t*)context;
\r
1089 p_async_mad->p_mad = p_mad;
\r
1090 p_async_mad->item.pfn_callback = __process_cm_send_comp;
\r
1092 /* Queue the MAD for asynchronous processing. */
\r
1093 cl_async_proc_queue( gp_async_proc_mgr, &p_async_mad->item );
\r
1095 AL_EXIT( AL_DBG_CM );
\r
1100 __process_cm_send_comp(
\r
1101 IN cl_async_proc_item_t *p_item )
\r
1103 cm_async_mad_t *p_async_mad;
\r
1104 cm_port_agent_t *p_port_cm;
\r
1105 al_conn_t *p_conn;
\r
1106 ib_mad_element_t *p_mad;
\r
1108 AL_ENTER( AL_DBG_CM );
\r
1110 p_async_mad = PARENT_STRUCT( p_item, cm_async_mad_t, item );
\r
1111 p_mad = p_async_mad->p_mad;
\r
1112 p_port_cm = p_async_mad->p_port_cm;
\r
1113 cl_free( p_async_mad );
\r
1115 p_conn = (ib_cm_handle_t)p_mad->context1;
\r
1118 * The connection context is not set when performing immediate responses,
\r
1119 * such as repeating MADS.
\r
1123 __put_av( p_port_cm, p_mad );
\r
1124 ib_put_mad( p_mad );
\r
1125 AL_EXIT( AL_DBG_CM );
\r
1129 cm_res_acquire( p_conn );
\r
1130 switch( p_mad->status )
\r
1132 case IB_WCS_SUCCESS:
\r
1133 case IB_WCS_CANCELED:
\r
1134 if( p_conn->state == CM_CONN_REP_SENT )
\r
1137 * Free connection for UD types (SIDR)
\r
1139 //***TODO: Do something similar with UD in IB_TIMEOUT case.
\r
1140 if( p_conn->qp_type == IB_QPT_UNRELIABLE_DGRM )
\r
1142 /* Move the connection from the pending list to the connection map. */
\r
1143 cl_spinlock_acquire( &gp_cm->obj.lock );
\r
1144 cl_qlist_remove_item( &gp_cm->pending_list,
\r
1145 (cl_list_item_t*)&p_conn->map_item );
\r
1146 cl_spinlock_release( &gp_cm->obj.lock );
\r
1148 __deref_conn( p_conn );
\r
1152 /* Release the reference taken when sending. */
\r
1153 cm_res_release( p_conn );
\r
1154 __deref_conn( p_conn );
\r
1157 case IB_WCS_TIMEOUT_RETRY_ERR:
\r
1159 * Timeout. No response received within allowable time. Queue
\r
1160 * an async item to reject and call the user back. We continue to
\r
1161 * hold the reference on p_conn from the send.
\r
1163 switch( p_conn->state )
\r
1165 case CM_CONN_REQ_SENT:
\r
1166 case CM_CONN_REP_SENT:
\r
1167 p_conn->timeout_item.pfn_callback = __proc_conn_timeout;
\r
1169 case CM_CONN_LAP_SENT:
\r
1170 p_conn->timeout_item.pfn_callback = __proc_lap_timeout;
\r
1172 case CM_CONN_DREQ_SENT:
\r
1173 p_conn->timeout_item.pfn_callback = __proc_dconn_timeout;
\r
1176 p_conn->timeout_item.pfn_callback = NULL;
\r
1180 /* Process timeouts asynchronously - queue the callback. */
\r
1181 if( p_conn->timeout_item.pfn_callback )
\r
1182 cl_async_proc_queue( gp_async_proc_mgr, &p_conn->timeout_item );
\r
1184 cm_res_release( p_conn );
\r
1186 /* Continue to hold the reference if we're processing a timeout. */
\r
1187 if( !p_conn->timeout_item.pfn_callback )
\r
1188 __deref_conn( p_conn );
\r
1192 /* Some sort of error. */
\r
1193 //***TODO: What kind of errors can we get, and how do we handle them?
\r
1195 /* Release the reference taken when sending. */
\r
1196 cm_res_release( p_conn );
\r
1197 __deref_conn( p_conn );
\r
1200 __put_av( p_port_cm, p_mad );
\r
1201 ib_put_mad( p_mad );
\r
1203 AL_EXIT( AL_DBG_CM );
\r
1209 * Called when the HCA generates the communication established event.
\r
1210 * This happens when a receive happens when in the RTR state. For now, we
\r
1211 * continue to wait until the RTU is received before transitioning the QP.
\r
1212 * Doing this is not ideal, but should work in almost all cases, since
\r
1213 * the REP and RTU will be retried, and avoids having to deal with unpleasant
\r
1214 * race conditions.
\r
1217 cm_conn_established(
\r
1218 IN al_conn_t* const p_conn )
\r
1220 /* We ignore the callback since we use RTU as the trigger. */
\r
1221 UNUSED_PARAM( p_conn );
\r
1227 * Called when the HCA migrates to the alternate path.
\r
1231 IN al_conn_t* const p_conn )
\r
1233 AL_ENTER( AL_DBG_CM );
\r
1235 /* Update the index to the primary path. */
\r
1236 cm_res_acquire( p_conn );
\r
1238 /* Increment the index. */
\r
1239 p_conn->idx_primary++;
\r
1240 /* Only the lowest bit is valid (0 or 1). */
\r
1241 p_conn->idx_primary&=0x1;
\r
1242 cm_res_release( p_conn );
\r
1243 AL_EXIT( AL_DBG_CM );
\r
1247 * Called when the HCA generates a communication established or APM event.
\r
1250 cm_async_event_cb(
\r
1251 IN const ib_async_event_rec_t* const p_event_rec )
\r
1253 al_conn_qp_t* p_qp;
\r
1255 CL_ASSERT( p_event_rec );
\r
1257 p_qp = (al_conn_qp_t* __ptr64)p_event_rec->context;
\r
1260 * Make sure to check that the QP is still connected by verifying
\r
1261 * that we still reference a p_conn structure. The QP may be being
\r
1265 switch( p_event_rec->code )
\r
1267 case IB_AE_QP_COMM:
\r
1268 if( !p_qp->p_conn )
\r
1270 cm_conn_established( p_qp->p_conn );
\r
1273 case IB_AE_QP_APM:
\r
1274 if( !p_qp->p_conn )
\r
1276 cm_conn_migrated( p_qp->p_conn );
\r
1279 case IB_AE_QP_APM_ERROR:
\r
1280 //***TODO: Figure out how to handle these errors.
\r
1291 IN ib_async_event_rec_t *p_event_rec )
\r
1293 UNUSED_PARAM( p_event_rec );
\r
1296 * Most of the QP events are trapped by the real owner of the QP.
\r
1297 * For real events, the CM may not be able to do much anyways!
\r
1302 /***static***/ ib_api_status_t
\r
1304 IN cm_port_agent_t* const p_port_cm,
\r
1305 IN const ib_port_attr_t* const p_port_attr )
\r
1307 ib_api_status_t status;
\r
1308 cl_status_t cl_status;
\r
1309 ib_qp_create_t qp_create;
\r
1310 ib_mad_svc_t mad_svc;
\r
1312 AL_ENTER( AL_DBG_CM );
\r
1314 cl_status = cl_spinlock_init( &p_port_cm->lock );
\r
1315 if( cl_status != CL_SUCCESS )
\r
1317 AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
1318 ("cl_spinlock_init failed\n") );
\r
1319 return ib_convert_cl_status( cl_status );
\r
1323 * Create the PD alias. We use the port CM's al_obj_t as the context
\r
1324 * to allow using deref_al_obj as the destroy callback.
\r
1326 status = ib_alloc_pd( p_port_cm->h_ca, IB_PDT_ALIAS, &p_port_cm->obj,
\r
1327 &p_port_cm->h_pd );
\r
1328 if( status != IB_SUCCESS )
\r
1330 AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
1331 ("ib_alloc_pd failed with status %s\n", ib_get_err_str(status)) );
\r
1334 /* Reference the port object on behalf of the PD. */
\r
1335 ref_al_obj( &p_port_cm->obj );
\r
1337 /* Create the MAD QP. */
\r
1338 cl_memclr( &qp_create, sizeof( ib_qp_create_t ) );
\r
1339 qp_create.qp_type = IB_QPT_QP1_ALIAS;
\r
1340 qp_create.rq_depth = CM_MAD_RQ_DEPTH;
\r
1341 qp_create.sq_depth = CM_MAD_SQ_DEPTH;
\r
1342 qp_create.rq_sge = CM_MAD_RQ_SGE;
\r
1343 qp_create.sq_sge = CM_MAD_SQ_SGE;
\r
1344 qp_create.sq_signaled = TRUE;
\r
1346 * We use the port CM's al_obj_t as the context to allow using
\r
1347 * deref_al_obj as the destroy callback.
\r
1349 status = ib_get_spl_qp( p_port_cm->h_pd, p_port_attr->port_guid,
\r
1350 &qp_create, &p_port_cm->obj, __cm_qp_event_cb, &p_port_cm->pool_key,
\r
1351 &p_port_cm->h_qp );
\r
1352 if( status != IB_SUCCESS )
\r
1354 AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
1355 ("ib_get_spl_qp failed with status %s\n", ib_get_err_str(status)) );
\r
1358 /* Reference the port object on behalf of the QP. */
\r
1359 ref_al_obj( &p_port_cm->obj );
\r
1361 /* Create the MAD service. */
\r
1362 cl_memclr( &mad_svc, sizeof(mad_svc) );
\r
1363 mad_svc.mad_svc_context = p_port_cm;
\r
1364 mad_svc.pfn_mad_recv_cb = __cm_mad_recv_cb;
\r
1365 mad_svc.pfn_mad_send_cb = __cm_mad_send_cb;
\r
1366 mad_svc.support_unsol = TRUE;
\r
1367 mad_svc.mgmt_class = IB_MCLASS_COMM_MGMT;
\r
1368 mad_svc.mgmt_version = IB_MCLASS_CM_VER_2;
\r
1369 mad_svc.method_array[IB_MAD_METHOD_SEND] = TRUE;
\r
1371 ib_reg_mad_svc( p_port_cm->h_qp, &mad_svc, &p_port_cm->h_mad_svc );
\r
1372 if( status != IB_SUCCESS )
\r
1374 AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
1375 ("ib_reg_mad_svc failed with status %s\n", ib_get_err_str(status)) );
\r
1379 AL_EXIT( AL_DBG_CM );
\r
1380 return IB_SUCCESS;
\r
1385 * Create a port agent for a given port.
\r
1387 /***static***/ ib_api_status_t
\r
1389 IN ib_pnp_port_rec_t *p_pnp_rec )
\r
1391 cm_port_agent_t *p_port_cm;
\r
1392 ib_api_status_t status;
\r
1393 ib_port_attr_mod_t port_attr_mod;
\r
1395 AL_ENTER( AL_DBG_CM );
\r
1397 /* calculate size of port_cm struct */
\r
1398 p_port_cm = (cm_port_agent_t*)cl_zalloc( sizeof(cm_port_agent_t) +
\r
1399 p_pnp_rec->p_ca_attr->size );
\r
1402 AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
1403 ("Failed to cl_zalloc port CM agent.\n") );
\r
1404 return IB_INSUFFICIENT_MEMORY;
\r
1407 construct_al_obj( &p_port_cm->obj, AL_OBJ_TYPE_CM );
\r
1408 cl_qlist_init( &p_port_cm->av_list );
\r
1409 cl_spinlock_construct( &p_port_cm->lock );
\r
1411 status = init_al_obj( &p_port_cm->obj, p_port_cm, TRUE,
\r
1412 __destroying_port_cm, NULL, __free_port_cm );
\r
1413 if( status != IB_SUCCESS )
\r
1415 __free_port_cm( &p_port_cm->obj );
\r
1416 AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
1417 ("init_al_obj failed with status %s.\n", ib_get_err_str(status)) );
\r
1421 /* Attach to the global CM object. */
\r
1422 status = attach_al_obj( &gp_cm->obj, &p_port_cm->obj );
\r
1423 if( status != IB_SUCCESS )
\r
1425 p_port_cm->obj.pfn_destroy( &p_port_cm->obj, NULL );
\r
1426 AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
1427 ("attach_al_obj returned %s.\n", ib_get_err_str(status)) );
\r
1431 p_port_cm->port_idx =
\r
1432 (uint8_t)(p_pnp_rec->p_port_attr->port_num - 1);
\r
1434 /* cache required port attributes */
\r
1435 p_port_cm->p_ca_attr = (ib_ca_attr_t*)
\r
1436 (((uint8_t*)p_port_cm) + sizeof(cm_port_agent_t));
\r
1438 ib_copy_ca_attr( p_port_cm->p_ca_attr, p_pnp_rec->p_ca_attr );
\r
1440 /* Get a reference to the CA on which we are loading. */
\r
1441 p_port_cm->h_ca = acquire_ca( p_pnp_rec->p_ca_attr->ca_guid );
\r
1442 if( !p_port_cm->h_ca )
\r
1444 p_port_cm->obj.pfn_destroy( &p_port_cm->obj, NULL );
\r
1445 AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, ("acquire_ca failed.\n") );
\r
1446 return IB_INVALID_GUID;
\r
1449 status = __init_data_svc( p_port_cm, p_pnp_rec->p_port_attr );
\r
1450 if( status != IB_SUCCESS )
\r
1452 p_port_cm->obj.pfn_destroy( &p_port_cm->obj, NULL );
\r
1453 AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
1454 ("__init_data_svc failed with status %s.\n",
\r
1455 ib_get_err_str(status)) );
\r
1459 /* Update local port attributes */
\r
1460 cl_memclr( &port_attr_mod, sizeof(ib_port_attr_mod_t) );
\r
1461 port_attr_mod.cap.cm = TRUE;
\r
1462 status = ib_modify_ca( p_port_cm->h_ca, p_pnp_rec->p_port_attr->port_num,
\r
1463 IB_CA_MOD_IS_CM_SUPPORTED, &port_attr_mod );
\r
1465 /* Update the PNP context to reference this port. */
\r
1466 p_pnp_rec->pnp_rec.context = p_port_cm;
\r
1468 /* Release the reference taken in init_al_obj. */
\r
1469 deref_al_obj( &p_port_cm->obj );
\r
1471 AL_EXIT( AL_DBG_CM );
\r
1472 return IB_SUCCESS;
\r
1476 __refresh_port_cm(
\r
1477 IN ib_pnp_port_rec_t *p_pnp_rec )
\r
1479 cm_port_agent_t *p_port_cm;
\r
1481 CL_ASSERT( p_pnp_rec->pnp_rec.context );
\r
1483 p_port_cm = (cm_port_agent_t* __ptr64)p_pnp_rec->pnp_rec.context;
\r
1487 if( p_pnp_rec->p_ca_attr )
\r
1489 CL_ASSERT( p_port_cm->p_ca_attr->size == p_pnp_rec->p_ca_attr->size );
\r
1490 ib_copy_ca_attr( p_port_cm->p_ca_attr, p_pnp_rec->p_ca_attr );
\r
1495 * PnP callback for port event notifications.
\r
1497 /***static***/ ib_api_status_t
\r
1499 IN ib_pnp_rec_t *p_pnp_rec )
\r
1501 ib_api_status_t status = IB_SUCCESS;
\r
1503 AL_ENTER( AL_DBG_CM );
\r
1505 switch( p_pnp_rec->pnp_event )
\r
1507 case IB_PNP_PORT_ADD:
\r
1508 /* Create the port agent. */
\r
1509 CL_ASSERT( !p_pnp_rec->context );
\r
1510 status = __create_port_cm( (ib_pnp_port_rec_t*)p_pnp_rec );
\r
1513 case IB_PNP_PORT_ACTIVE:
\r
1514 /* refresh cache. */
\r
1515 __refresh_port_cm( (ib_pnp_port_rec_t*)p_pnp_rec );
\r
1518 case IB_PNP_PORT_REMOVE:
\r
1519 CL_ASSERT( p_pnp_rec->context );
\r
1521 /* Destroy the port agent. */
\r
1522 ref_al_obj( &((cm_port_agent_t* __ptr64)p_pnp_rec->context)->obj );
\r
1523 ((cm_port_agent_t* __ptr64)p_pnp_rec->context)->obj.pfn_destroy(
\r
1524 &((cm_port_agent_t* __ptr64)p_pnp_rec->context)->obj, NULL );
\r
1526 /* Fall through to validate the listen requests. */
\r
1528 case IB_PNP_PORT_DOWN:
\r
1529 case IB_PNP_PKEY_CHANGE:
\r
1530 case IB_PNP_GID_CHANGE:
\r
1531 case IB_PNP_LID_CHANGE:
\r
1533 * Validate listen requests. Due to the potential lack of port
\r
1534 * attributes (when a port is removed), it is not possible to
\r
1535 * invalidate requests. Instead, we validate all requests and
\r
1536 * fail any that are not valid on any exitsting ports.
\r
1538 __refresh_port_cm( (ib_pnp_port_rec_t*)p_pnp_rec );
\r
1539 __validate_listens();
\r
1543 break; /* Ignore other PNP events. */
\r
1546 AL_EXIT( AL_DBG_CM );
\r
1551 * Timer async proc routine that locks and does the job
\r
1554 __process_cm_timer(
\r
1555 IN cl_async_proc_item_t *p_item )
\r
1557 //*** TODO: Timer callback must handle retries, timeouts, and time wait.
\r
1558 al_conn_t *p_conn;
\r
1559 cm_async_timer_t *p_async_timer;
\r
1561 AL_ENTER( AL_DBG_CM );
\r
1563 p_async_timer = PARENT_STRUCT( p_item, cm_async_timer_t, item );
\r
1564 p_conn = p_async_timer->p_conn;
\r
1566 cm_res_acquire( p_conn );
\r
1567 switch( p_conn->state )
\r
1569 case CM_CONN_REQ_MRA_RCVD:
\r
1570 case CM_CONN_REP_MRA_RCVD:
\r
1573 * Format the reject record before aborting the connection since
\r
1574 * we need the QP context.
\r
1576 ib_cm_rej_rec_t rej_rec;
\r
1577 cl_memclr( &rej_rec, sizeof( ib_cm_rej_rec_t ) );
\r
1578 rej_rec.h_qp = p_conn->h_qp;
\r
1579 rej_rec.qp_context = p_conn->h_qp->obj.context;
\r
1580 rej_rec.rej_status = IB_REJ_TIMEOUT;
\r
1582 /* Report the timeout and send the REJ. */
\r
1583 __conn_reject( p_conn, IB_REJ_TIMEOUT, NULL, 0, NULL );
\r
1584 cm_res_release( p_conn );
\r
1586 p_conn->pfn_cm_rej_cb( &rej_rec );
\r
1590 case CM_CONN_LAP_MRA_RCVD:
\r
1592 /* Report the timeout. */
\r
1593 ib_cm_apr_rec_t apr_rec;
\r
1594 cl_memclr( &apr_rec, sizeof( ib_cm_apr_rec_t ) );
\r
1595 apr_rec.h_qp = p_conn->h_qp;
\r
1596 apr_rec.qp_context = p_conn->h_qp->obj.context;
\r
1597 apr_rec.cm_status = IB_TIMEOUT;
\r
1598 apr_rec.apr_status = IB_AP_REJECT;
\r
1600 /* Return to the established state. */
\r
1601 p_conn->state = CM_CONN_ESTABLISHED;
\r
1602 cm_res_release( p_conn );
\r
1604 /* Notify the user that the LAP failed. */
\r
1605 p_conn->pfn_cm_apr_cb( &apr_rec );
\r
1609 //***TODO: Check synchronization issues relating to the timer being part
\r
1610 //***TODO: of the connection object. Only timewait has potential issues.
\r
1611 //***TODO: If when destroying all connections in the timewait state the
\r
1612 //***TODO: timers are trimmed to zero, the destruction should be able to
\r
1613 //***TODO: wait until all timers expire.
\r
1614 case CM_CONN_TIMEWAIT:
\r
1615 p_conn->state = CM_CONN_RESET;
\r
1616 cl_spinlock_acquire( &gp_cm->obj.lock );
\r
1617 cl_qlist_remove_item( &gp_cm->time_wait_list,
\r
1618 (cl_list_item_t*)&p_conn->map_item );
\r
1619 cl_spinlock_release( &gp_cm->obj.lock );
\r
1620 cm_res_release( p_conn );
\r
1625 * No assert here, since the state could change just after the timer
\r
1626 * expires but before the callback is invoked.
\r
1628 cm_res_release( p_conn );
\r
1632 /* Release the connection. */
\r
1633 __deref_conn( p_conn );
\r
1634 cl_free( p_async_timer );
\r
1636 AL_EXIT( AL_DBG_CM );
\r
1641 * Timer callback for taking connections out of the time wait state.
\r
1643 /***static***/ void
\r
1645 IN void *context )
\r
1647 cm_async_timer_t *p_async_timer;
\r
1649 AL_ENTER( AL_DBG_CM );
\r
1651 p_async_timer = (cm_async_timer_t*)cl_zalloc( sizeof(cm_async_timer_t) );
\r
1652 if( !p_async_timer )
\r
1654 AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
1655 ("failed to cl_zalloc cm_async_timer_t (%d bytes). System unstable!\n",
\r
1656 sizeof(cm_async_timer_t)) );
\r
1657 /* Bah - just release the connection now. */
\r
1658 __deref_conn( (al_conn_t*)context );
\r
1663 * Queue an async item for further processing. We are still holding a
\r
1664 * reference on the gp_cm.
\r
1666 p_async_timer->p_conn = (al_conn_t*)context;
\r
1667 p_async_timer->item.pfn_callback = __process_cm_timer;
\r
1668 cl_async_proc_queue( gp_async_proc_mgr, &p_async_timer->item );
\r
1670 AL_EXIT( AL_DBG_CM );
\r
1676 * Constructor for connection objects, called by the qpool when allocating new
\r
1677 * connection objects.
\r
1679 /***static***/ cl_status_t
\r
1681 IN void* const p_object,
\r
1683 OUT cl_pool_item_t** const pp_pool_item )
\r
1685 cl_status_t cl_status;
\r
1686 al_conn_t* p_conn = (al_conn_t*)p_object;
\r
1688 UNUSED_PARAM( context );
\r
1690 cl_status = cm_res_init( p_conn );
\r
1691 if( cl_status != CL_SUCCESS )
\r
1693 /*AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
1694 ("cm_res_init failed with status %s.\n",
\r
1695 CL_STATUS_MSG( cl_status )) );*/
\r
1699 cl_status = cl_timer_init( &p_conn->timer, __conn_timer_cb, p_conn );
\r
1700 if( cl_status != CL_SUCCESS )
\r
1702 /*AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
1703 ("cl_timer_init failed with status %s.\n",
\r
1704 CL_STATUS_MSG( cl_status )) );*/
\r
1705 cm_res_destroy( p_conn );
\r
1709 p_conn->state = CM_CONN_RESET;
\r
1710 p_conn->local_comm_id = (net32_t)cl_atomic_inc( &local_comm_id );
\r
1711 /* Preset the top byte of the communication ID. */
\r
1712 p_conn->local_comm_id |= (net32_t)(cl_get_time_stamp() << 24);
\r
1714 *pp_pool_item = (cl_pool_item_t*)&p_conn->map_item;
\r
1716 return CL_SUCCESS;
\r
1722 * Destructor for connection objects, called by the qpool when freeing memory
\r
1723 * allocated for a connection object.
\r
1725 /***static***/ void
\r
1727 IN const cl_pool_item_t* const p_pool_item,
\r
1728 IN void* context )
\r
1730 al_conn_t *p_conn;
\r
1732 UNUSED_PARAM( context );
\r
1734 p_conn = PARENT_STRUCT( p_pool_item, al_conn_t, map_item );
\r
1736 /* Destroy the timer. */
\r
1737 cl_timer_destroy( &p_conn->timer );
\r
1739 /* Destroy the lock/mutex. */
\r
1740 cm_res_destroy( p_conn );
\r
1745 * Allocates and initialized the global CM agent.
\r
1749 IN al_obj_t* const p_parent_obj )
\r
1751 ib_api_status_t status;
\r
1752 cl_status_t cl_status;
\r
1753 ib_pnp_req_t pnp_req;
\r
1755 AL_ENTER( AL_DBG_CM );
\r
1757 CL_ASSERT( gp_cm == NULL );
\r
1759 /* Allocate the global CM agent. */
\r
1760 gp_cm = (al_cm_agent_t*)cl_zalloc( sizeof(al_cm_agent_t) );
\r
1763 AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
1764 ("Failed allocation of global CM agent.\n") );
\r
1765 return IB_INSUFFICIENT_MEMORY;
\r
1768 construct_al_obj( &gp_cm->obj, AL_OBJ_TYPE_CM );
\r
1769 cl_qpool_construct( &gp_cm->conn_pool );
\r
1770 cl_pool_construct( &gp_cm->req_pool );
\r
1771 cl_qlist_init( &gp_cm->active_listen_list );
\r
1772 cl_qlist_init( &gp_cm->inactive_listen_list );
\r
1773 cl_qlist_init( &gp_cm->pending_list );
\r
1774 cl_qmap_init( &gp_cm->conn_map );
\r
1775 cl_qlist_init( &gp_cm->time_wait_list );
\r
1777 status = init_al_obj( &gp_cm->obj, NULL, TRUE, __destroying_cm, NULL,
\r
1779 if( status != IB_SUCCESS )
\r
1781 __free_cm( &gp_cm->obj );
\r
1782 AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
1783 ("init_al_obj failed with status %s.\n", ib_get_err_str(status)) );
\r
1786 /* Attach to the parent object. */
\r
1787 status = attach_al_obj( p_parent_obj, &gp_cm->obj );
\r
1788 if( status != IB_SUCCESS )
\r
1790 gp_cm->obj.pfn_destroy( &gp_cm->obj, NULL );
\r
1791 AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
1792 ("attach_al_obj returned %s.\n", ib_get_err_str(status)) );
\r
1796 cl_status = cl_qpool_init( &gp_cm->conn_pool,
\r
1797 CM_CONN_POOL_MIN, CM_CONN_POOL_MAX, CM_CONN_POOL_GROW,
\r
1798 sizeof(al_conn_t), __conn_ctor, __conn_dtor, NULL );
\r
1799 if( cl_status != CL_SUCCESS )
\r
1801 gp_cm->obj.pfn_destroy( &gp_cm->obj, NULL );
\r
1802 AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
1803 ("cl_qpool_init failed with status %s.\n",
\r
1804 CL_STATUS_MSG(cl_status)) );
\r
1805 return ib_convert_cl_status( cl_status );
\r
1808 cl_status = cl_pool_init( &gp_cm->req_pool, CM_REQ_POOL_MIN, 0,
\r
1809 CM_REQ_POOL_GROW, sizeof(conn_req_t), NULL, NULL, NULL );
\r
1810 if( cl_status != CL_SUCCESS )
\r
1812 gp_cm->obj.pfn_destroy( &gp_cm->obj, NULL );
\r
1813 AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
1814 ("cl_pool_init failed with status %s.\n",
\r
1815 CL_STATUS_MSG(cl_status)) );
\r
1816 return ib_convert_cl_status( cl_status );
\r
1819 /* Register for port PnP notifications. */
\r
1820 cl_memclr( &pnp_req, sizeof(pnp_req) );
\r
1821 pnp_req.pnp_class = IB_PNP_PORT;
\r
1822 pnp_req.pfn_pnp_cb = __cm_pnp_cb;
\r
1823 status = ib_reg_pnp( gh_al, &pnp_req, &gp_cm->h_pnp );
\r
1824 if( status != IB_SUCCESS )
\r
1826 gp_cm->obj.pfn_destroy( &gp_cm->obj, NULL );
\r
1827 AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
1828 ("ib_reg_pnp failed with status %s.\n", ib_get_err_str(status)) );
\r
1833 * Leave the reference taken in init_al_obj oustanding since PnP
\r
1834 * deregistration is asynchronous. This replaces a call to ref and
\r
1835 * deref the object.
\r
1838 AL_EXIT( AL_DBG_CM );
\r
1839 return IB_SUCCESS;
\r
1846 IN const ib_gid_t* const p_gid,
\r
1847 IN const ib_net16_t lid,
\r
1848 OUT cm_port_agent_t** const pp_port_cm OPTIONAL,
\r
1849 OUT ib_ca_attr_t** const pp_ca_attr OPTIONAL )
\r
1851 cl_list_item_t *p_item;
\r
1852 cm_port_agent_t *p_port_cm;
\r
1853 ib_api_status_t status = IB_INVALID_SETTING;
\r
1854 ib_ca_attr_t *p_ca_attr;
\r
1855 ib_port_attr_t *p_port_attr;
\r
1858 AL_ENTER( AL_DBG_CM );
\r
1860 cl_spinlock_acquire( &gp_cm->obj.lock );
\r
1861 for( p_item = cl_qlist_head( &gp_cm->obj.obj_list );
\r
1862 p_item != cl_qlist_end( &gp_cm->obj.obj_list );
\r
1863 p_item = cl_qlist_next( p_item ) )
\r
1865 p_port_cm = PARENT_STRUCT(
\r
1866 PARENT_STRUCT( p_item, al_obj_t, pool_item ),
\r
1867 cm_port_agent_t, obj );
\r
1869 p_ca_attr = p_port_cm->p_ca_attr;
\r
1871 /* Shortcut to the port attributes for cleaner code. */
\r
1872 p_port_attr = &p_ca_attr->p_port_attr[p_port_cm->port_idx];
\r
1874 for( gid_idx = 0; gid_idx < p_port_attr->num_gids; gid_idx++ )
\r
1876 if( cl_memcmp( &p_port_attr->p_gid_table[gid_idx],
\r
1877 p_gid, sizeof(ib_gid_t) ) )
\r
1882 /* Found a GID match. Look for LID match. */
\r
1883 if( __is_lid_valid( lid, p_port_attr->lid,
\r
1884 p_port_attr->lmc ) != TRUE )
\r
1889 /* Chaa-ching! We have a winner! */
\r
1890 cl_spinlock_release( &gp_cm->obj.lock );
\r
1892 *pp_ca_attr = p_ca_attr;
\r
1895 *pp_port_cm = p_port_cm;
\r
1897 AL_EXIT( AL_DBG_CM );
\r
1898 return IB_SUCCESS;
\r
1901 /* No match. Reset the port cm pointer. */
\r
1902 status = IB_INVALID_SETTING;
\r
1904 cl_spinlock_release( &gp_cm->obj.lock );
\r
1906 AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
\r
1907 ("No match found.\n") );
\r