In osm_ucast_mgr, where osm_req_set is called for SwitchInfo,
[mirror/winof/.git] / ulp / opensm / user / opensm / osm_mcast_mgr.c
1 /*
2  * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved.
3  * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved.
4  * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5  *
6  * This software is available to you under the OpenIB.org BSD license
7  * below:
8  *
9  *     Redistribution and use in source and binary forms, with or
10  *     without modification, are permitted provided that the following
11  *     conditions are met:
12  *
13  *      - Redistributions of source code must retain the above
14  *        copyright notice, this list of conditions and the following
15  *        disclaimer.
16  *
17  *      - Redistributions in binary form must reproduce the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer in the documentation and/or other materials
20  *        provided with the distribution.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
26  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
27  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
28  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29  * SOFTWARE.
30  *
31  * $Id$
32  */
33
34
35 /*
36  * Abstract:
37  *    Implementation of osm_mcast_mgr_t.
38  * This file implements the Multicast Manager object.
39  *
40  * Environment:
41  *    Linux User Mode
42  *
43  * $Revision: 1.9 $
44  */
45
46 #if HAVE_CONFIG_H
47 #  include <config.h>
48 #endif /* HAVE_CONFIG_H */
49
50 #include <iba/ib_types.h>
51 #include <complib/cl_memory.h>
52 #include <complib/cl_debug.h>
53 #include <opensm/osm_mcast_mgr.h>
54 #include <opensm/osm_multicast.h>
55 #include <opensm/osm_node.h>
56 #include <opensm/osm_switch.h>
57 #include <opensm/osm_helper.h>
58 #include <opensm/osm_msgdef.h>
59
60 #define LINE_LENGTH 256
61
62 /**********************************************************************
63  **********************************************************************/
64 typedef struct _osm_mcast_work_obj
65 {
66   cl_list_item_t     list_item;
67   osm_port_t*        p_port;
68
69 } osm_mcast_work_obj_t;
70
71 /**********************************************************************
72  **********************************************************************/
73 static osm_mcast_work_obj_t*
74 __osm_mcast_work_obj_new(
75   IN const osm_port_t*     const p_port )
76 {
77   /*
78     TO DO - get these objects from a lockpool.
79   */
80   osm_mcast_work_obj_t*    p_obj;
81
82   /*
83     clean allocated memory to avoid assertion when trying to insert to
84     qlist.
85     see cl_qlist_insert_tail(): CL_ASSERT(p_list_item->p_list != p_list)
86   */
87   p_obj = cl_zalloc( sizeof( *p_obj ) );
88   if( p_obj )
89     p_obj->p_port = (osm_port_t*)p_port;
90
91   return( p_obj );
92 }
93
94 /**********************************************************************
95  **********************************************************************/
96 static void
97 __osm_mcast_work_obj_delete(
98   IN osm_mcast_work_obj_t* p_wobj )
99 {
100   cl_free( p_wobj );
101 }
102
103 /**********************************************************************
104    Recursively remove nodes from the tree
105 **********************************************************************/
106 void
107 __osm_mcast_mgr_purge_tree_node(
108   IN osm_mtree_node_t*     p_mtn )
109 {
110   uint8_t                  i;
111
112   for( i = 0; i < p_mtn->max_children; i++ )
113   {
114     if( p_mtn->child_array[i] &&
115         (p_mtn->child_array[i] != OSM_MTREE_LEAF) )
116       __osm_mcast_mgr_purge_tree_node( p_mtn->child_array[i] );
117
118     p_mtn->child_array[i] = NULL;
119
120   }
121
122   cl_free( p_mtn );
123 }
124
125 /**********************************************************************
126  **********************************************************************/
127 static void
128 __osm_mcast_mgr_purge_tree(
129   IN osm_mcast_mgr_t*         const p_mgr,
130   IN osm_mgrp_t*           const p_mgrp )
131 {
132   OSM_LOG_ENTER( p_mgr->p_log, __osm_mcast_mgr_purge_tree );
133
134   if( p_mgrp->p_root )
135     __osm_mcast_mgr_purge_tree_node( p_mgrp->p_root );
136
137   p_mgrp->p_root = NULL;
138
139   OSM_LOG_EXIT( p_mgr->p_log );
140 }
141
142 /**********************************************************************
143  **********************************************************************/
144 uint32_t
145 osm_mcast_mgr_compute_avg_hops(
146   osm_mcast_mgr_t*         const p_mgr,
147   const osm_mgrp_t*        const p_mgrp,
148   const osm_switch_t*         const p_sw )
149 {
150   uint32_t avg_hops = 0;
151   uint32_t hops = 0;
152   uint32_t num_ports = 0;
153   uint16_t base_lid_ho;
154   const osm_port_t* p_port;
155   const osm_mcm_port_t* p_mcm_port;
156   const cl_qmap_t* p_mcm_tbl;
157   const cl_qmap_t* p_port_tbl;
158
159   OSM_LOG_ENTER( p_mgr->p_log, osm_mcast_mgr_compute_avg_hops );
160
161   p_mcm_tbl = &p_mgrp->mcm_port_tbl;
162   p_port_tbl = &p_mgr->p_subn->port_guid_tbl;
163
164   /*
165     For each member of the multicast group, compute the
166     number of hops to its base LID.
167   */
168   for( p_mcm_port = (osm_mcm_port_t*)cl_qmap_head( p_mcm_tbl );
169        p_mcm_port != (osm_mcm_port_t*)cl_qmap_end( p_mcm_tbl );
170        p_mcm_port = (osm_mcm_port_t*)cl_qmap_next(&p_mcm_port->map_item))
171   {
172     /*
173       Acquire the port object for this port guid, then create
174       the new worker object to build the list.
175     */
176     p_port = (osm_port_t*)cl_qmap_get( p_port_tbl,
177                                        ib_gid_get_guid( &p_mcm_port->port_gid ) );
178
179     if( p_port == (osm_port_t*)cl_qmap_end( p_port_tbl ) )
180     {
181       osm_log( p_mgr->p_log, OSM_LOG_ERROR,
182                "osm_mcast_mgr_compute_avg_hops: ERR 0A18: "
183                "No port object for port 0x%016" PRIx64 ".\n",
184                cl_ntoh64( ib_gid_get_guid( &p_mcm_port->port_gid ) ) );
185       continue;
186     }
187
188     base_lid_ho = cl_ntoh16( osm_port_get_base_lid( p_port ) );
189     hops += osm_switch_get_least_hops( p_sw, base_lid_ho );
190     num_ports++;
191   }
192
193   /*
194     We should be here if there aren't any ports in the group.
195   */
196   CL_ASSERT( num_ports );
197
198   if( num_ports != 0 )
199   {
200     avg_hops = hops / num_ports;
201   }
202
203   OSM_LOG_EXIT( p_mgr->p_log );
204   return( avg_hops );
205 }
206
207 /**********************************************************************
208  Calculate the maximal "min hops" from the given switch to any
209  of the group HCAs
210  **********************************************************************/
211 uint32_t
212 osm_mcast_mgr_compute_max_hops(
213   osm_mcast_mgr_t*         const p_mgr,
214   const osm_mgrp_t*        const p_mgrp,
215   const osm_switch_t*      const p_sw )
216 {
217   uint32_t max_hops = 0;
218   uint32_t hops = 0;
219   uint16_t base_lid_ho;
220   const osm_port_t* p_port;
221   const osm_mcm_port_t* p_mcm_port;
222   const cl_qmap_t* p_mcm_tbl;
223   const cl_qmap_t* p_port_tbl;
224
225   OSM_LOG_ENTER( p_mgr->p_log, osm_mcast_mgr_compute_max_hops );
226
227   p_mcm_tbl = &p_mgrp->mcm_port_tbl;
228   p_port_tbl = &p_mgr->p_subn->port_guid_tbl;
229
230   /*
231     For each member of the multicast group, compute the
232     number of hops to its base LID.
233   */
234   for( p_mcm_port = (osm_mcm_port_t*)cl_qmap_head( p_mcm_tbl );
235        p_mcm_port != (osm_mcm_port_t*)cl_qmap_end( p_mcm_tbl );
236        p_mcm_port = (osm_mcm_port_t*)cl_qmap_next(&p_mcm_port->map_item))
237   {
238     /*
239       Acquire the port object for this port guid, then create
240       the new worker object to build the list.
241     */
242     p_port = (osm_port_t*)cl_qmap_get(
243       p_port_tbl,
244       ib_gid_get_guid( &p_mcm_port->port_gid ) );
245
246     if( p_port == (osm_port_t*)cl_qmap_end( p_port_tbl ) )
247     {
248       osm_log( p_mgr->p_log, OSM_LOG_ERROR,
249                "osm_mcast_mgr_compute_max_hops: ERR 0A18: "
250                "No port object for port 0x%016" PRIx64 ".\n",
251                cl_ntoh64( ib_gid_get_guid( &p_mcm_port->port_gid ) ) );
252       continue;
253     }
254
255     base_lid_ho = cl_ntoh16( osm_port_get_base_lid( p_port ) );
256     hops = osm_switch_get_least_hops( p_sw, base_lid_ho );
257     if (hops > max_hops) max_hops = hops;
258   }
259
260   if( max_hops == 0 )
261   {
262     /*
263       We should be here if there aren't any ports in the group.
264     */
265     max_hops = 10001; /* see later - we use it to realize no hops */
266   }
267
268   OSM_LOG_EXIT( p_mgr->p_log );
269   return( max_hops );
270 }
271
272 /**********************************************************************
273    This function attempts to locate the optimal switch for the
274    center of the spanning tree.  The current algorithm chooses
275    a switch with the lowest average hop count to the members
276    of the multicast group.
277 **********************************************************************/
278 static osm_switch_t*
279 __osm_mcast_mgr_find_optimal_switch(
280   osm_mcast_mgr_t*         const p_mgr,
281   const osm_mgrp_t*        const p_mgrp )
282 {
283   cl_qmap_t*               p_sw_tbl;
284   const osm_switch_t*         p_sw;
285   const osm_switch_t*         p_best_sw = NULL;
286   uint32_t                 hops = 0;
287   uint32_t                 best_hops = 10000;   /* any big # will do */
288   uint64_t              sw_guid_ho;
289 #ifdef OSM_VENDOR_INTF_ANAFA  
290         boolean_t             use_avg_hops = TRUE; /* anafa2 - bug hca on switch */ /* use max hops for root */
291 #else
292         boolean_t             use_avg_hops = FALSE;  /* use max hops for root */
293 #endif
294
295   OSM_LOG_ENTER( p_mgr->p_log, __osm_mcast_mgr_find_optimal_switch );
296
297   p_sw_tbl = &p_mgr->p_subn->sw_guid_tbl;
298
299   CL_ASSERT( !osm_mgrp_is_empty( p_mgrp ) );
300
301   for( p_sw = (osm_switch_t*)cl_qmap_head( p_sw_tbl );
302        p_sw != (osm_switch_t*)cl_qmap_end( p_sw_tbl );
303        p_sw = (osm_switch_t*)cl_qmap_next( &p_sw->map_item ) )
304   {
305     if( !osm_switch_supports_mcast( p_sw ) )
306       continue;
307
308     if (use_avg_hops) 
309       hops = osm_mcast_mgr_compute_avg_hops( p_mgr, p_mgrp, p_sw );
310     else
311       hops = osm_mcast_mgr_compute_max_hops( p_mgr, p_mgrp, p_sw );
312
313     if( osm_log_is_active( p_mgr->p_log, OSM_LOG_DEBUG ) )
314     {
315       sw_guid_ho = cl_ntoh64( osm_node_get_node_guid(
316                                 osm_switch_get_node_ptr( p_sw ) ) );
317
318       osm_log( p_mgr->p_log, OSM_LOG_DEBUG,
319                "__osm_mcast_mgr_find_optimal_switch: "
320                "Switch 0x%016" PRIx64 ", hops = %f.\n",
321                sw_guid_ho, hops );
322     }
323
324     if( hops < best_hops )
325     {
326       p_best_sw = p_sw;
327       best_hops = hops;
328     }
329   }
330
331   if( osm_log_is_active( p_mgr->p_log, OSM_LOG_VERBOSE ) )
332   {
333     if( p_best_sw )
334     {
335       sw_guid_ho = cl_ntoh64( osm_node_get_node_guid(
336                                 osm_switch_get_node_ptr( p_best_sw ) ) );
337
338       osm_log( p_mgr->p_log, OSM_LOG_VERBOSE,
339                "__osm_mcast_mgr_find_optimal_switch: "
340                "Best switch is 0x%" PRIx64 ", hops = %f.\n",
341                sw_guid_ho, best_hops );
342     }
343     else
344     {
345       osm_log( p_mgr->p_log, OSM_LOG_VERBOSE,
346                "__osm_mcast_mgr_find_optimal_switch: "
347                "No multicast capable switches detected.\n" );
348     }
349   }
350
351   OSM_LOG_EXIT( p_mgr->p_log );
352   return( (osm_switch_t*)p_best_sw );
353 }
354
355 /**********************************************************************
356    This function returns the existing or optimal root swtich for the tree.
357 **********************************************************************/
358 static osm_switch_t*
359 __osm_mcast_mgr_find_root_switch(
360   osm_mcast_mgr_t* const p_mgr,
361   const osm_mgrp_t* const p_mgrp )
362 {
363   const osm_switch_t* p_sw = NULL;
364
365   OSM_LOG_ENTER( p_mgr->p_log, __osm_mcast_mgr_find_root_switch );
366
367   /*
368     We always look for the best multicast tree root switch.
369     Otherwise since we always start with a a single join
370     the root will be always on the first switch attached to it.
371     - Very bad ...
372   */
373   p_sw = __osm_mcast_mgr_find_optimal_switch( p_mgr, p_mgrp );
374
375   OSM_LOG_EXIT( p_mgr->p_log );
376   return( (osm_switch_t*)p_sw );
377 }
378
379 /**********************************************************************
380  **********************************************************************/
381 void
382 osm_mcast_mgr_construct(
383   IN osm_mcast_mgr_t* const p_mgr )
384 {
385   cl_memclr( p_mgr, sizeof(*p_mgr) );
386 }
387
388 /**********************************************************************
389  **********************************************************************/
390 void
391 osm_mcast_mgr_destroy(
392   IN osm_mcast_mgr_t* const p_mgr )
393 {
394   OSM_LOG_ENTER( p_mgr->p_log, osm_mcast_mgr_destroy );
395
396   CL_ASSERT( p_mgr );
397   OSM_LOG_EXIT( p_mgr->p_log );
398 }
399
400 /**********************************************************************
401  **********************************************************************/
402 ib_api_status_t
403 osm_mcast_mgr_init(
404   IN osm_mcast_mgr_t*         const p_mgr,
405   IN osm_req_t*            const p_req,
406   IN osm_subn_t*           const p_subn,
407   IN osm_log_t*            const p_log,
408   IN cl_plock_t*           const p_lock )
409 {
410   ib_api_status_t          status = IB_SUCCESS;
411
412   OSM_LOG_ENTER( p_log, osm_mcast_mgr_init );
413
414   CL_ASSERT( p_req );
415   CL_ASSERT( p_subn );
416   CL_ASSERT( p_lock );
417
418   osm_mcast_mgr_construct( p_mgr );
419
420   p_mgr->p_log = p_log;
421   p_mgr->p_subn = p_subn;
422   p_mgr->p_lock = p_lock;
423   p_mgr->p_req = p_req;
424
425   OSM_LOG_EXIT( p_mgr->p_log );
426   return( status );
427 }
428
429 /**********************************************************************
430  **********************************************************************/
431 static osm_signal_t
432 __osm_mcast_mgr_set_tbl(
433   IN osm_mcast_mgr_t*         const p_mgr,
434   IN osm_switch_t*         const p_sw )
435 {
436   osm_node_t*              p_node;
437   osm_dr_path_t*           p_path;
438   osm_madw_context_t       mad_context;
439   ib_api_status_t          status;
440   uint32_t              block_id_ho = 0;
441   int16_t                  block_num = 0;
442   uint32_t              position = 0;
443   uint32_t              max_position;
444   osm_mcast_tbl_t*         p_tbl;
445   ib_net16_t               block[IB_MCAST_BLOCK_SIZE];
446   osm_signal_t          signal = OSM_SIGNAL_DONE;
447
448   OSM_LOG_ENTER( p_mgr->p_log, __osm_mcast_mgr_set_tbl );
449
450   CL_ASSERT( p_mgr );
451   CL_ASSERT( p_sw );
452
453   p_node = osm_switch_get_node_ptr( p_sw );
454
455   CL_ASSERT( p_node );
456
457   p_path = osm_node_get_any_dr_path_ptr( p_node );
458
459   CL_ASSERT( p_path );
460
461   /*
462     Send multicast forwarding table blocks to the switch
463     as long as the switch indicates it has blocks needing
464     configuration.
465   */
466
467   mad_context.mft_context.node_guid = osm_node_get_node_guid( p_node );
468   mad_context.mft_context.set_method = TRUE;
469
470   p_tbl = osm_switch_get_mcast_tbl_ptr( p_sw );
471   max_position = p_tbl->max_position;
472
473   while( osm_mcast_tbl_get_block( p_tbl, block_num,
474                                   (uint8_t)position, block ) )
475   {
476     if( osm_log_is_active( p_mgr->p_log, OSM_LOG_DEBUG ) )
477     {
478       osm_log( p_mgr->p_log, OSM_LOG_DEBUG,
479                "__osm_mcast_mgr_set_tbl: "
480                "Writing MFT block 0x%X.\n", block_id_ho );
481     }
482
483     block_id_ho = block_num + (position << 28);
484
485     status = osm_req_set( p_mgr->p_req,
486                           p_path,
487                           (void*)block,
488                           sizeof(block),
489                           IB_MAD_ATTR_MCAST_FWD_TBL,
490                           cl_hton32( block_id_ho ),
491                           CL_DISP_MSGID_NONE,
492                           &mad_context );
493
494     if( status != IB_SUCCESS )
495     {
496       osm_log( p_mgr->p_log, OSM_LOG_ERROR,
497                "__osm_mcast_mgr_set_tbl: ERR 0A02: "
498                "Sending linear fwd. tbl. block failed (%s).\n",
499                ib_get_err_str( status ) );
500     }
501
502     signal = OSM_SIGNAL_DONE_PENDING;
503
504     if( ++position > max_position )
505     {
506       position = 0;
507       block_num++;
508     }
509   }
510
511   OSM_LOG_EXIT( p_mgr->p_log );
512   return( signal );
513 }
514
515 /**********************************************************************
516   This is part of the recursive function to compute the paths in the
517   spanning tree that eminate from this switch.  On input, the p_list
518   contains the group members that must be routed from this switch.
519 **********************************************************************/
520 static void
521 __osm_mcast_mgr_subdivide(
522   osm_mcast_mgr_t*         const p_mgr,
523   osm_mgrp_t*              const p_mgrp,
524   osm_switch_t*            const p_sw,
525   cl_qlist_t*              const p_list,
526   cl_qlist_t*              const list_array,
527   uint8_t                  const array_size )
528 {
529   uint8_t                  port_num;
530   uint16_t              mlid_ho;
531   uint16_t              lid_ho;
532   boolean_t             ignore_existing;
533   osm_mcast_work_obj_t*    p_wobj;
534
535   OSM_LOG_ENTER( p_mgr->p_log, __osm_mcast_mgr_subdivide );
536
537   mlid_ho = cl_ntoh16( osm_mgrp_get_mlid( p_mgrp ) );
538
539   /* 
540      For Multicast Groups we want to not count on previous 
541      configurations - since we can easily generate a storm 
542      by loops.
543   */
544   ignore_existing = TRUE;
545
546   /*
547     Subdivide the set of ports into non-overlapping subsets
548     that will be routed to other switches.
549   */
550   while( (p_wobj = (osm_mcast_work_obj_t*)cl_qlist_remove_head( p_list )) !=
551          (osm_mcast_work_obj_t*)cl_qlist_end( p_list ) )
552   {
553     lid_ho = cl_ntoh16( osm_port_get_base_lid( p_wobj->p_port ) );
554
555     port_num = osm_switch_recommend_mcast_path(
556       p_sw, lid_ho, mlid_ho, ignore_existing );
557
558     if( port_num == OSM_NO_PATH )
559     {
560       /*
561         This typically occurs if the switch does not support
562         multicast and the multicast tree must branch at this
563         switch.
564       */
565       uint64_t node_guid_ho = cl_ntoh64( osm_node_get_node_guid(
566                                            osm_switch_get_node_ptr( p_sw ) ) );
567
568       osm_log( p_mgr->p_log, OSM_LOG_ERROR,
569                "__osm_mcast_mgr_subdivide: ERR 0A03: "
570                "Error routing MLID 0x%X through switch 0x%" PRIx64 ".\n"
571                "\t\t\t\tNo multicast paths from this switch for port "
572                "with LID 0x%X.\n",
573                mlid_ho, node_guid_ho, lid_ho );
574
575       __osm_mcast_work_obj_delete( p_wobj );
576       continue;
577     }
578
579     if( port_num > array_size )
580     {
581       uint64_t node_guid_ho = cl_ntoh64( osm_node_get_node_guid(
582                                            osm_switch_get_node_ptr( p_sw ) ) );
583
584       osm_log( p_mgr->p_log, OSM_LOG_ERROR,
585                "__osm_mcast_mgr_subdivide: ERR 0A04: "
586                "Error routing MLID 0x%X through switch "
587                "0x%" PRIx64 ".\n"
588                "\t\t\t\tNo multicast paths from this switch "
589                "to port with LID 0x%X.\n",
590                mlid_ho, node_guid_ho, lid_ho );
591
592       __osm_mcast_work_obj_delete( p_wobj );
593
594       /* This is means OpenSM has a bug. */
595       CL_ASSERT( FALSE );
596       continue;
597     }
598
599     cl_qlist_insert_tail( &list_array[port_num], &p_wobj->list_item );
600   }
601
602   OSM_LOG_EXIT( p_mgr->p_log );
603 }
604
605 /**********************************************************************
606  **********************************************************************/
607 static void
608 __osm_mcast_mgr_purge_list(
609   osm_mcast_mgr_t*         const p_mgr,
610   cl_qlist_t*              const p_list )
611 {
612   osm_mcast_work_obj_t*    p_wobj;
613
614   OSM_LOG_ENTER( p_mgr->p_log, __osm_mcast_mgr_purge_list );
615
616   while( (p_wobj = (osm_mcast_work_obj_t*)cl_qlist_remove_head( p_list ) )
617          != (osm_mcast_work_obj_t*)cl_qlist_end( p_list ) )
618   {
619     osm_log( p_mgr->p_log, OSM_LOG_ERROR,
620              "__osm_mcast_mgr_purge_list: ERR 0A06: "
621              "Unable to route for port 0x%" PRIx64 ".\n",
622              osm_port_get_guid( p_wobj->p_port ) );
623     __osm_mcast_work_obj_delete( p_wobj );
624   }
625
626   OSM_LOG_EXIT( p_mgr->p_log );
627 }
628
629 /**********************************************************************
630   This is the recursive function to compute the paths in the spanning
631   tree that eminate from this switch.  On input, the p_list contains
632   the group members that must be routed from this switch.
633
634   The function returns the newly created mtree node element.
635 **********************************************************************/
636 static osm_mtree_node_t*
637 __osm_mcast_mgr_branch(
638   osm_mcast_mgr_t*         const p_mgr,
639   osm_mgrp_t*              const p_mgrp,
640   osm_switch_t*            const p_sw,
641   cl_qlist_t*              const p_list,
642   uint8_t                  depth,
643   uint8_t                  const upstream_port,
644   uint8_t*              const p_max_depth )
645 {
646   uint8_t                  max_children;
647   osm_mtree_node_t*        p_mtn = NULL;
648   cl_qlist_t*              list_array = NULL;
649   uint8_t                  i;
650   cl_qmap_t*               p_sw_guid_tbl;
651   ib_net64_t               node_guid;
652   uint64_t              node_guid_ho;
653   osm_mcast_work_obj_t*    p_wobj;
654   cl_qlist_t*              p_port_list;
655   size_t                count;
656   uint16_t              mlid_ho;
657   osm_mcast_tbl_t*         p_tbl;
658
659   OSM_LOG_ENTER( p_mgr->p_log, __osm_mcast_mgr_branch );
660
661   CL_ASSERT( p_sw );
662   CL_ASSERT( p_list );
663   CL_ASSERT( p_max_depth );
664
665   node_guid = osm_node_get_node_guid(  osm_switch_get_node_ptr( p_sw ) );
666   node_guid_ho = cl_ntoh64( node_guid );
667   mlid_ho = cl_ntoh16( osm_mgrp_get_mlid( p_mgrp ) );
668
669   if( osm_log_is_active( p_mgr->p_log, OSM_LOG_VERBOSE ) )
670   {
671     osm_log( p_mgr->p_log, OSM_LOG_VERBOSE,
672              "__osm_mcast_mgr_branch: "
673              "Routing MLID 0x%X through switch 0x%" PRIx64 ".\n"
674              "\t\t\t\t%u nodes at depth %u.\n",
675              mlid_ho,
676              node_guid_ho,
677              cl_qlist_count( p_list ), depth );
678   }
679
680   CL_ASSERT( cl_qlist_count( p_list ) > 0 );
681
682   depth++;
683
684   if( depth > *p_max_depth )
685   {
686     CL_ASSERT( depth == *p_max_depth + 1 );
687     *p_max_depth = depth;
688   }
689
690   if( osm_switch_supports_mcast( p_sw ) == FALSE )
691   {
692     /*
693       This switch doesn't do multicast.  Clean-up.
694     */
695     osm_log( p_mgr->p_log, OSM_LOG_ERROR,
696              "__osm_mcast_mgr_branch: ERR 0A14: "
697              "Switch 0x%" PRIx64 " does not support multicast.\n",
698              node_guid_ho );
699
700     /*
701       Deallocate all the work objects on this branch of the tree.
702     */
703     __osm_mcast_mgr_purge_list( p_mgr, p_list );
704     goto Exit;
705   }
706
707   p_mtn = osm_mtree_node_new( p_sw );
708   if( p_mtn == NULL )
709   {
710     /*
711       We are unable to continue routing down this
712       leg of the tree.  Clean-up.
713     */
714     osm_log( p_mgr->p_log, OSM_LOG_ERROR,
715              "__osm_mcast_mgr_branch: ERR 0A15: "
716              "Insufficient memory to build multicast tree.\n" );
717
718     /*
719       Deallocate all the work objects on this branch of the tree.
720     */
721     __osm_mcast_mgr_purge_list( p_mgr, p_list );
722     goto Exit;
723   }
724
725   p_sw_guid_tbl = &p_mgr->p_subn->sw_guid_tbl;
726
727   max_children = osm_mtree_node_get_max_children( p_mtn );
728
729   CL_ASSERT( max_children > 1 );
730
731   /*
732     Prepare an empty list for each port in the switch.
733     TO DO - this list array could probably be moved
734     inside the switch element to save on malloc thrashing.
735   */
736   list_array = cl_zalloc( sizeof(cl_qlist_t) * max_children );
737   if( list_array == NULL )
738   {
739     osm_log( p_mgr->p_log, OSM_LOG_ERROR,
740              "__osm_mcast_mgr_branch: ERR 0A16: "
741              "Unable to allocate list array.\n" );
742     __osm_mcast_mgr_purge_list( p_mgr, p_list );
743     goto Exit;
744   }
745
746   for( i = 0; i < max_children; i++ )
747     cl_qlist_init( &list_array[i] );
748
749   __osm_mcast_mgr_subdivide( p_mgr, p_mgrp, p_sw, p_list, list_array,
750                              max_children );
751
752   p_tbl = osm_switch_get_mcast_tbl_ptr( p_sw );
753
754   /*
755     Add the upstream port to the forwarding table unless
756     we're at the root of the spanning tree.
757   */
758   if( depth > 1 )
759   {
760     if( osm_log_is_active( p_mgr->p_log, OSM_LOG_DEBUG ) )
761     {
762       osm_log( p_mgr->p_log, OSM_LOG_DEBUG,
763                "__osm_mcast_mgr_branch: "
764                "Adding upstream port 0x%X.\n", upstream_port );
765     }
766
767     CL_ASSERT( upstream_port );
768     osm_mcast_tbl_set( p_tbl, mlid_ho, upstream_port );
769   }
770
771   /*
772     For each port that was allocated some routes,
773     recurse into this function to continue building the tree
774     if the node on the other end of that port is another switch.
775     Otherwise, the node is an endpoint, and we've found a leaf
776     of the tree.  Mark leaves with our special pointer value.
777   */
778
779   for( i = 0; i < max_children; i++ )
780   {
781     const osm_physp_t      *p_physp;
782     const osm_physp_t      *p_remote_physp;
783     const osm_node_t    *p_node;
784     const osm_node_t    *p_remote_node;
785     osm_switch_t        *p_remote_sw;
786
787     p_port_list = &list_array[i];
788
789     count = cl_qlist_count( p_port_list );
790
791     /*
792       There should be no children routed through the upstream port!
793     */
794     CL_ASSERT( ( upstream_port == 0 ) || ( i != upstream_port) ||
795                ( (i == upstream_port) && (count == 0)) );
796
797     if( count == 0)
798       continue;      /* No routes down this port. */
799
800     if( osm_log_is_active( p_mgr->p_log, OSM_LOG_DEBUG ) )
801     {
802       osm_log( p_mgr->p_log, OSM_LOG_DEBUG,
803                "__osm_mcast_mgr_branch: "
804                "Routing %u destination(s) via switch port 0x%X.\n",
805                count, i );
806     }
807
808     /*
809       This port routes frames for this mcast group.  Therefore,
810       set the appropriate bit in the multicast forwarding
811       table for this switch.
812     */
813     osm_mcast_tbl_set( p_tbl, mlid_ho, i );
814     if (i == 0) 
815       /* This means we are adding the switch to the mc group.
816          We do not need to continue looking at the remote port, just 
817          needed to add the port to the table */
818       continue;
819
820     p_node = osm_switch_get_node_ptr( p_sw );
821     p_remote_node = osm_node_get_remote_node( p_node, i, NULL );
822
823     if( osm_node_get_type( p_remote_node ) == IB_NODE_TYPE_SWITCH )
824     {
825       /*
826         Acquire a pointer to the remote switch then recurse.
827       */
828       p_remote_sw = (osm_switch_t*)cl_qmap_get(
829         p_sw_guid_tbl, osm_node_get_node_guid( p_remote_node ) );
830       CL_ASSERT( p_remote_sw );
831
832       p_physp = osm_node_get_physp_ptr( p_node, i );
833       CL_ASSERT( p_physp );
834       CL_ASSERT( osm_physp_is_valid( p_physp ) );
835
836       p_remote_physp = osm_physp_get_remote( p_physp );
837       CL_ASSERT( p_remote_physp );
838       CL_ASSERT( osm_physp_is_valid( p_remote_physp ) );
839
840       p_mtn->child_array[i] = __osm_mcast_mgr_branch(
841         p_mgr, p_mgrp, p_remote_sw,
842         p_port_list, depth,
843         osm_physp_get_port_num( p_remote_physp),
844         p_max_depth );
845     }
846     else
847     {
848       /*
849         The neighbor node is not a switch, so this
850         must be a leaf.
851       */
852       CL_ASSERT( count == 1 );
853
854       p_mtn->child_array[i] = OSM_MTREE_LEAF;
855       p_wobj = (osm_mcast_work_obj_t*)cl_qlist_remove_head(
856         p_port_list );
857
858       CL_ASSERT( cl_is_qlist_empty( p_port_list ) );
859
860       if( osm_log_is_active( p_mgr->p_log, OSM_LOG_DEBUG ) )
861       {
862         osm_log( p_mgr->p_log, OSM_LOG_DEBUG,
863                  "__osm_mcast_mgr_branch: "
864                  "Found leaf for port 0x%016" PRIx64 ",\n"
865                  "\t\t\t\ton switch port 0x%X.\n",
866                  cl_ntoh64( osm_port_get_guid( p_wobj->p_port ) ), i );
867       }
868
869       __osm_mcast_work_obj_delete( p_wobj );
870     }
871   }
872
873   cl_free( list_array );
874  Exit:
875   OSM_LOG_EXIT( p_mgr->p_log );
876   return( p_mtn );
877 }
878
879 /**********************************************************************
880  **********************************************************************/
881 static ib_api_status_t
882 __osm_mcast_mgr_build_spanning_tree(
883   osm_mcast_mgr_t*         const p_mgr,
884   osm_mgrp_t*              const p_mgrp )
885 {
886   const cl_qmap_t*         p_mcm_tbl;
887   const cl_qmap_t*         p_port_tbl;
888   const osm_port_t*        p_port;
889   const osm_mcm_port_t*    p_mcm_port;
890   uint32_t              num_ports;
891   cl_qlist_t               port_list;
892   osm_switch_t*            p_sw;
893   osm_mcast_work_obj_t*    p_wobj;
894   ib_api_status_t          status = IB_SUCCESS;
895   uint8_t                  max_depth = 0;
896   uint32_t              count;
897
898   OSM_LOG_ENTER( p_mgr->p_log, __osm_mcast_mgr_build_spanning_tree );
899
900   cl_qlist_init( &port_list );
901
902   /*
903     TO DO - for now, just blow away the old tree.
904     In the future we'll need to construct the tree based
905     on multicast forwarding table information if the user wants to
906     preserve existing multicast routes.
907   */
908   __osm_mcast_mgr_purge_tree( p_mgr, p_mgrp );
909
910   p_mcm_tbl = &p_mgrp->mcm_port_tbl;
911   p_port_tbl = &p_mgr->p_subn->port_guid_tbl;
912   num_ports = cl_qmap_count( p_mcm_tbl );
913   if( num_ports == 0 )
914   {
915     if( osm_log_is_active( p_mgr->p_log, OSM_LOG_VERBOSE ) )
916     {
917       osm_log( p_mgr->p_log, OSM_LOG_VERBOSE,
918                "__osm_mcast_mgr_build_spanning_tree: "
919                "MLID 0x%X has no members--nothing to do.\n",
920                cl_ntoh16( osm_mgrp_get_mlid( p_mgrp ) ) );
921     }
922     goto Exit;
923   }
924
925   /*
926     This function builds the single spanning tree recursively.
927     At each stage, the ports to be reached are divided into
928     non-overlapping subsets of member ports that can be reached through
929     a given switch port.  Construction then moves down each
930     branch, and the process starts again with each branch computing
931     for its own subset of the member ports.
932
933     The maximum recursion depth is at worst the maximum hop count in the
934     subnet, which is spec limited to 64.
935   */
936
937   /*
938     Locate the switch around which to create the spanning
939     tree for this multicast group.
940   */
941   p_sw = __osm_mcast_mgr_find_root_switch( p_mgr, p_mgrp );
942   if( p_sw == NULL )
943   {
944     osm_log( p_mgr->p_log, OSM_LOG_ERROR,
945              "__osm_mcast_mgr_build_spanning_tree: ERR 0A08: "
946              "Unable to locate a suitable switch for group 0x%X.\n",
947              cl_ntoh16( osm_mgrp_get_mlid( p_mgrp ) ));
948     status = IB_ERROR;
949     goto Exit;
950   }
951
952   /*
953     Build the first "subset" containing all member ports.
954   */
955   for( p_mcm_port = (osm_mcm_port_t*)cl_qmap_head( p_mcm_tbl );
956        p_mcm_port != (osm_mcm_port_t*)cl_qmap_end( p_mcm_tbl );
957        p_mcm_port = (osm_mcm_port_t*)cl_qmap_next(&p_mcm_port->map_item))
958   {
959     /*
960       Acquire the port object for this port guid, then create
961       the new worker object to build the list.
962     */
963     p_port = (osm_port_t*)cl_qmap_get( p_port_tbl,
964                                        ib_gid_get_guid( &p_mcm_port->port_gid ) );
965
966     if( p_port == (osm_port_t*)cl_qmap_end( p_port_tbl ) )
967     {
968       osm_log( p_mgr->p_log, OSM_LOG_ERROR,
969                "__osm_mcast_mgr_build_spanning_tree: ERR 0A09: "
970                "No port object for port 0x%016" PRIx64 ".\n",
971                cl_ntoh64( ib_gid_get_guid( &p_mcm_port->port_gid ) ) );
972       continue;
973     }
974
975     p_wobj = __osm_mcast_work_obj_new( p_port );
976     if( p_wobj == NULL )
977     {
978       osm_log( p_mgr->p_log, OSM_LOG_ERROR,
979                "__osm_mcast_mgr_build_spanning_tree: ERR 0A10: "
980                "Insufficient memory to route port 0x%016" PRIx64 ".\n",
981                cl_ntoh64( osm_port_get_guid( p_port ) ) );
982       continue;
983     }
984
985     cl_qlist_insert_tail( &port_list, &p_wobj->list_item );
986   }
987
988   count = cl_qlist_count( &port_list );
989   p_mgrp->p_root = __osm_mcast_mgr_branch( p_mgr, p_mgrp, p_sw,
990                                            &port_list, 0, 0, &max_depth );
991
992   osm_log( p_mgr->p_log, OSM_LOG_VERBOSE,
993            "__osm_mcast_mgr_build_spanning_tree: "
994            "Configured MLID 0x%X for %u ports, max tree depth = %u.\n",
995            cl_ntoh16( osm_mgrp_get_mlid( p_mgrp ) ),
996            count, max_depth );
997
998  Exit:
999   OSM_LOG_EXIT( p_mgr->p_log );
1000   return( status );
1001 }
1002 #if 0
1003 /* unused */
1004 /**********************************************************************
1005  **********************************************************************/
1006 void
1007 osm_mcast_mgr_set_table(
1008   IN osm_mcast_mgr_t*         const p_mgr,
1009   IN const osm_mgrp_t*     const p_mgrp,
1010   IN const osm_mtree_node_t*  const p_mtn )
1011 {
1012   uint8_t i;
1013   uint8_t max_children;
1014   osm_mtree_node_t* p_child_mtn;
1015   uint16_t mlid_ho;
1016   osm_mcast_tbl_t* p_tbl;
1017   osm_switch_t* p_sw;
1018
1019   OSM_LOG_ENTER( p_mgr->p_log, osm_mcast_mgr_set_table );
1020
1021   mlid_ho = cl_ntoh16( osm_mgrp_get_mlid( p_mgrp ) );
1022   p_sw = osm_mtree_node_get_switch_ptr( p_mtn );
1023
1024   CL_ASSERT( p_sw );
1025
1026   if( osm_log_is_active( p_mgr->p_log, OSM_LOG_VERBOSE ) )
1027   {
1028     osm_log( p_mgr->p_log, OSM_LOG_VERBOSE,
1029              "osm_mcast_mgr_set_table: "
1030              "Configuring MLID 0x%X on switch 0x%" PRIx64 ".\n",
1031              mlid_ho, osm_node_get_node_guid(
1032                osm_switch_get_node_ptr( p_sw ) ) );
1033   }
1034
1035   /*
1036     For every child of this tree node, set the corresponding
1037     bit in the switch's mcast table.
1038   */
1039   p_tbl = osm_switch_get_mcast_tbl_ptr( p_sw );
1040   max_children = osm_mtree_node_get_max_children( p_mtn );
1041
1042   CL_ASSERT( max_children <= osm_switch_get_num_ports( p_sw ) );
1043
1044   osm_mcast_tbl_clear_mlid( p_tbl, mlid_ho );
1045
1046   for( i = 0; i < max_children; i++ )
1047   {
1048     p_child_mtn = osm_mtree_node_get_child( p_mtn, i );
1049     if( p_child_mtn == NULL )
1050       continue;
1051
1052     osm_mcast_tbl_set( p_tbl, mlid_ho, i );
1053   }
1054
1055   OSM_LOG_EXIT( p_mgr->p_log );
1056 }
1057 #endif
1058
1059 /**********************************************************************
1060  **********************************************************************/
1061 static void
1062 __osm_mcast_mgr_clear(
1063   IN osm_mcast_mgr_t*         const p_mgr,
1064   IN osm_mgrp_t*           const p_mgrp )
1065 {
1066   osm_switch_t*            p_sw;
1067   cl_qmap_t*               p_tbl;
1068   osm_mcast_tbl_t*         p_mcast_tbl;
1069
1070
1071   OSM_LOG_ENTER( p_mgr->p_log, __osm_mcast_mgr_clear );
1072
1073   /*
1074     Walk the switches and clear the routing entries for
1075     this MLID.
1076   */
1077   p_tbl = &p_mgr->p_subn->sw_guid_tbl;
1078   p_sw = (osm_switch_t*)cl_qmap_head( p_tbl );
1079   while( p_sw != (osm_switch_t*)cl_qmap_end( p_tbl ) )
1080   {
1081     p_mcast_tbl = osm_switch_get_mcast_tbl_ptr( p_sw );
1082     osm_mcast_tbl_clear_mlid( p_mcast_tbl, cl_ntoh16(p_mgrp->mlid) );
1083     p_sw = (osm_switch_t*)cl_qmap_next( &p_sw->map_item );
1084   }
1085
1086   OSM_LOG_EXIT( p_mgr->p_log );
1087 }
1088
1089 #if 0
1090 /* TO DO - make this real -- at least update spanning tree */
1091 /**********************************************************************
1092    Lock must be held on entry.
1093 **********************************************************************/
1094 ib_api_status_t
1095 osm_mcast_mgr_process_single(
1096   IN osm_mcast_mgr_t*         const p_mgr,
1097   IN ib_net16_t            const mlid,
1098   IN ib_net64_t            const port_guid,
1099   IN uint8_t               const join_state )
1100 {
1101   uint8_t                  port_num;
1102   uint16_t              mlid_ho;
1103   osm_switch_t*            p_sw;
1104   ib_net64_t               sw_guid;
1105   osm_port_t*              p_port;
1106   osm_physp_t*          p_physp;
1107   osm_physp_t*          p_remote_physp;
1108   osm_node_t*              p_remote_node;
1109   cl_qmap_t*               p_port_tbl;
1110   cl_qmap_t*               p_sw_tbl;
1111   osm_mcast_tbl_t*         p_mcast_tbl;
1112   ib_api_status_t          status = IB_SUCCESS;
1113
1114   OSM_LOG_ENTER( p_mgr->p_log, osm_mcast_mgr_process_single );
1115
1116   CL_ASSERT( mlid );
1117   CL_ASSERT( port_guid );
1118
1119   p_port_tbl = &p_mgr->p_subn->port_guid_tbl;
1120   p_sw_tbl = &p_mgr->p_subn->sw_guid_tbl;
1121   mlid_ho = cl_ntoh16( mlid );
1122
1123   if( osm_log_is_active( p_mgr->p_log, OSM_LOG_VERBOSE ) )
1124   {
1125     osm_log( p_mgr->p_log, OSM_LOG_DEBUG,
1126              "osm_mcast_mgr_process_single: "
1127              "Attempting to add port 0x%" PRIx64 " to MLID 0x%X, "
1128              "\n\t\t\t\tjoin state = 0x%X.\n",
1129              cl_ntoh64( port_guid ), mlid_ho, join_state );
1130   }
1131
1132   /*
1133     Acquire the Port object.
1134   */
1135   p_port = (osm_port_t*)cl_qmap_get( p_port_tbl, port_guid );
1136   if( p_port == (osm_port_t*)cl_qmap_end( p_port_tbl ) )
1137   {
1138     osm_log( p_mgr->p_log, OSM_LOG_ERROR,
1139              "osm_mcast_mgr_process_single: ERR 0A01: "
1140              "Unable to acquire port object for 0x%" PRIx64 ".\n",
1141              cl_ntoh64( port_guid ) );
1142     status = IB_ERROR;
1143     goto Exit;
1144   }
1145
1146   p_physp = osm_port_get_default_phys_ptr( p_port );
1147   if( p_physp == NULL )
1148   {
1149     osm_log( p_mgr->p_log, OSM_LOG_ERROR,
1150              "osm_mcast_mgr_process_single: ERR 0A05: "
1151              "Unable to acquire phsyical port object for 0x%" PRIx64 ".\n",
1152              cl_ntoh64( port_guid ) );
1153     status = IB_ERROR;
1154     goto Exit;
1155   }
1156
1157   if( !osm_physp_is_valid( p_physp ) )
1158   {
1159     osm_log( p_mgr->p_log, OSM_LOG_ERROR,
1160              "osm_mcast_mgr_process_single: ERR 0A07: "
1161              "Unable to acquire valid physical port object "
1162              "for 0x%" PRIx64 ".\n",
1163              cl_ntoh64( port_guid ) );
1164     status = IB_ERROR;
1165     goto Exit;
1166   }
1167
1168   p_remote_physp = osm_physp_get_remote( p_physp );
1169   if( p_remote_physp == NULL )
1170   {
1171     osm_log( p_mgr->p_log, OSM_LOG_ERROR,
1172              "osm_mcast_mgr_process_single: ERR 0A11: "
1173              "Unable to acquire remote phsyical port object "
1174              "for 0x%" PRIx64 ".\n",
1175              cl_ntoh64( port_guid ) );
1176     status = IB_ERROR;
1177     goto Exit;
1178   }
1179
1180   if( !osm_physp_is_valid( p_remote_physp ) )
1181   {
1182     osm_log( p_mgr->p_log, OSM_LOG_ERROR,
1183              "osm_mcast_mgr_process_single: ERR 0A21: "
1184              "Unable to acquire valid remote physical port object "
1185              "for 0x%" PRIx64 ".\n",
1186              cl_ntoh64( port_guid ) );
1187     status = IB_ERROR;
1188     goto Exit;
1189   }
1190
1191   p_remote_node = osm_physp_get_node_ptr( p_remote_physp );
1192
1193   CL_ASSERT( p_remote_node );
1194
1195   sw_guid = osm_node_get_node_guid( p_remote_node );
1196
1197   if( osm_node_get_type( p_remote_node ) != IB_NODE_TYPE_SWITCH )
1198   {
1199     osm_log( p_mgr->p_log, OSM_LOG_ERROR,
1200              "osm_mcast_mgr_process_single: ERR 0A22: "
1201              "Remote node not a switch node 0x%" PRIx64 ".\n",
1202              cl_ntoh64( sw_guid ) );
1203     status = IB_ERROR;
1204     goto Exit;
1205   }
1206
1207   p_sw = (osm_switch_t*)cl_qmap_get( p_sw_tbl, sw_guid );
1208   if( p_sw == (osm_switch_t*)cl_qmap_end( p_sw_tbl ) )
1209   {
1210     osm_log( p_mgr->p_log, OSM_LOG_ERROR,
1211              "osm_mcast_mgr_process_single: ERR 0A12: "
1212              "No switch object 0x%" PRIx64 ".\n",
1213              cl_ntoh64( sw_guid ) );
1214     status = IB_ERROR;
1215     goto Exit;
1216   }
1217
1218   if( osm_switch_is_in_mcast_tree( p_sw, mlid_ho ) )
1219   {
1220     /*
1221       We're in luck.  The switch attached to this port
1222       is already in the multicast group, so we can just
1223       add the specified port as a new leaf of the tree.
1224     */
1225     if( join_state & (IB_JOIN_STATE_FULL | IB_JOIN_STATE_NON ) )
1226     {
1227       /*
1228         This node wants to receive multicast frames.
1229         Get the switch port number to which the new member port
1230         is attached, then configure this single mcast table.
1231       */
1232       port_num = osm_physp_get_port_num( p_remote_physp );
1233       CL_ASSERT( port_num );
1234
1235       p_mcast_tbl = osm_switch_get_mcast_tbl_ptr( p_sw );
1236       osm_mcast_tbl_set( p_mcast_tbl, mlid_ho, port_num );
1237     }
1238     else
1239     {
1240       if( join_state & IB_JOIN_STATE_SEND_ONLY )
1241       {
1242         if( osm_log_is_active( p_mgr->p_log, OSM_LOG_VERBOSE ) )
1243         {
1244           osm_log( p_mgr->p_log, OSM_LOG_DEBUG,
1245                    "osm_mcast_mgr_process_single: "
1246                    "Success.  Nothing to do for send"
1247                    "only member.\n" );
1248         }
1249       }
1250       else
1251       {
1252         osm_log( p_mgr->p_log, OSM_LOG_ERROR,
1253                  "osm_mcast_mgr_process_single: ERR 0A13: "
1254                  "Unknown join state 0x%X.\n", join_state );
1255         status = IB_ERROR;
1256         goto Exit;
1257       }
1258     }
1259   }
1260   else
1261   {
1262     if( osm_log_is_active( p_mgr->p_log, OSM_LOG_VERBOSE ) )
1263     {
1264       osm_log( p_mgr->p_log, OSM_LOG_DEBUG,
1265                "osm_mcast_mgr_process_single: "
1266                "Unable to add port.\n" );
1267     }
1268   }
1269
1270  Exit:
1271   OSM_LOG_EXIT( p_mgr->p_log );
1272   return( status );
1273 }
1274 #endif
1275
1276 /**********************************************************************
1277    lock must already be held on entry
1278 **********************************************************************/
1279 ib_api_status_t
1280 osm_mcast_mgr_process_tree(
1281   IN osm_mcast_mgr_t*      const p_mgr,
1282   IN osm_mgrp_t*           const p_mgrp,
1283   IN osm_mcast_req_type_t        req_type,
1284   ib_net64_t                     port_guid )
1285 {
1286   ib_api_status_t          status = IB_SUCCESS;
1287   cl_qmap_t*               p_tbl;
1288   ib_net16_t               mlid;
1289   boolean_t                ui_mcast_fdb_assign_func_defined;
1290
1291   OSM_LOG_ENTER( p_mgr->p_log, osm_mcast_mgr_process_tree );
1292
1293   mlid = osm_mgrp_get_mlid( p_mgrp );
1294   p_tbl = &p_mgr->p_subn->sw_guid_tbl;
1295
1296   if( osm_log_is_active( p_mgr->p_log, OSM_LOG_DEBUG ) )
1297   {
1298     osm_log( p_mgr->p_log, OSM_LOG_DEBUG,
1299              "osm_mcast_mgr_process_tree: "
1300              "Processing multicast group 0x%X.\n", cl_ntoh16( mlid ));
1301   }
1302
1303   /*
1304     If there are no switches in the subnet, then we have nothing to do.
1305   */
1306   if( cl_qmap_count( &p_mgr->p_subn->sw_guid_tbl ) == 0 )
1307   {
1308     if( osm_log_is_active( p_mgr->p_log, OSM_LOG_DEBUG ) )
1309     {
1310       osm_log( p_mgr->p_log, OSM_LOG_DEBUG,
1311                "osm_mcast_mgr_process_tree: "
1312                "No switches in subnet.  Nothing to do.\n" );
1313     }
1314     goto Exit;
1315   }
1316
1317   if (p_mgr->p_subn->opt.pfn_ui_mcast_fdb_assign)
1318     ui_mcast_fdb_assign_func_defined = TRUE;
1319   else
1320     ui_mcast_fdb_assign_func_defined = FALSE;
1321
1322   /*
1323     Clear the multicast tables to start clean, then build
1324     the spanning tree which sets the mcast table bits for each
1325     port in the group.
1326     We will clean the multicast tables if a ui_mcast function isn't
1327     defined, or if such function is defined, but we got here
1328     through a MC_CREATE request - this means we are creating a new
1329     multicast group - clean all old data.
1330   */
1331   if ( ui_mcast_fdb_assign_func_defined == FALSE ||
1332        req_type == OSM_MCAST_REQ_TYPE_CREATE ) 
1333     __osm_mcast_mgr_clear( p_mgr, p_mgrp );
1334
1335   /* If a UI function is defined, then we will call it here. 
1336      If not - the use the regular build spanning tree function */
1337   if ( ui_mcast_fdb_assign_func_defined == FALSE )
1338   {
1339     status = __osm_mcast_mgr_build_spanning_tree( p_mgr, p_mgrp );
1340     if( status != IB_SUCCESS )
1341     {
1342       osm_log( p_mgr->p_log, OSM_LOG_ERROR,
1343                "osm_mcast_mgr_process_tree: ERR 0A17: "
1344                "Unable to create spanning tree (%s).\n",
1345                ib_get_err_str( status ) );
1346       goto Exit;
1347     }
1348   }
1349   else
1350   {
1351     if( osm_log_is_active( p_mgr->p_log, OSM_LOG_DEBUG ) )
1352     {
1353       osm_log( p_mgr->p_log, OSM_LOG_DEBUG,
1354                "osm_mcast_mgr_process_tree: "
1355                "Invoking UI function pfn_ui_mcast_fdb_assign\n");
1356     }
1357     
1358     p_mgr->p_subn->opt.pfn_ui_mcast_fdb_assign(
1359       p_mgr->p_subn->opt.ui_mcast_fdb_assign_ctx,
1360       mlid, req_type, port_guid );
1361   }    
1362   
1363  Exit:
1364   OSM_LOG_EXIT( p_mgr->p_log );
1365   return( status );
1366 }
1367
1368 /**********************************************************************
1369  **********************************************************************/
1370 void
1371 osm_mcast_mgr_dump_mcast_routes(
1372   IN const osm_mcast_mgr_t*   const p_mgr,
1373   IN const osm_switch_t*      const p_sw )
1374 {
1375   osm_mcast_tbl_t*      p_tbl;
1376   int16_t               mlid_ho = 0;
1377   int16_t               mlid_start_ho;
1378   uint8_t               position = 0;
1379   int16_t               block_num = 0;
1380   char                  line[OSM_REPORT_LINE_SIZE];
1381   boolean_t             print_lid;
1382   const osm_node_t*     p_node;
1383   FILE  *               p_mcfdbFile;
1384   uint16_t              i, j;
1385   uint16_t              mask_entry;
1386   char                 *file_name = NULL;
1387
1388   OSM_LOG_ENTER( p_mgr->p_log, osm_mcast_mgr_dump_mcast_routes );
1389   
1390   if( !osm_log_is_active( p_mgr->p_log, OSM_LOG_ROUTING ) )
1391     goto Exit;
1392
1393   file_name = 
1394     (char*)cl_malloc(strlen(p_mgr->p_subn->opt.dump_files_dir) + 12);
1395   
1396   CL_ASSERT(file_name);
1397   
1398   strcpy(file_name, p_mgr->p_subn->opt.dump_files_dir);
1399   strcat(file_name,"/osm.mcfdbs");
1400   
1401   /* Open the file or error */
1402   p_mcfdbFile = fopen(file_name, "a");
1403   if (! p_mcfdbFile)
1404   {
1405     osm_log( p_mgr->p_log, OSM_LOG_ERROR,
1406              "osm_mcast_mgr_dump_mcast_routes: ERR 0A23: "
1407              "Fail to open mcfdb file (%s).\n",
1408              file_name );
1409     goto Exit;
1410   }
1411
1412   p_node = osm_switch_get_node_ptr( p_sw );
1413
1414   p_tbl = osm_switch_get_mcast_tbl_ptr( p_sw );
1415
1416   fprintf( p_mcfdbFile, "\nSwitch 0x%016" PRIx64 "\n"
1417            "LID    : Out Port(s) \n",
1418            cl_ntoh64( osm_node_get_node_guid( p_node ) ) );  
1419   while ( block_num <= p_tbl->max_block_in_use )
1420   {
1421     mlid_start_ho = (uint16_t)(block_num * IB_MCAST_BLOCK_SIZE);
1422     for (i = 0 ; i < IB_MCAST_BLOCK_SIZE ; i++)
1423     {
1424       mlid_ho = mlid_start_ho + i;
1425       position = 0;
1426       print_lid = FALSE;
1427       sprintf( line, "0x%04X :", mlid_ho + IB_LID_MCAST_START_HO );
1428       while ( position <= p_tbl->max_position )
1429       {
1430         mask_entry = cl_ntoh16((*p_tbl->p_mask_tbl)[mlid_ho][position]);
1431         if (mask_entry == 0)
1432         {
1433           position++;
1434           continue;
1435         }
1436         print_lid = TRUE;
1437         for (j = 0 ; j < 16 ; j++)
1438         {
1439           if ( (1 << j) & mask_entry )
1440             sprintf( line, "%s 0x%03X ", line, j+(position*16) );
1441         }
1442         position++;
1443       }
1444       if (print_lid)
1445       {
1446         fprintf( p_mcfdbFile, "%s\n", line );
1447       }
1448     }
1449     block_num++;
1450   }
1451
1452   fclose(p_mcfdbFile);
1453
1454  Exit:
1455   if (file_name)
1456     cl_free(file_name);
1457   OSM_LOG_EXIT( p_mgr->p_log );
1458 }
1459
1460 /**********************************************************************
1461  Process the entire group.
1462
1463  NOTE : The lock should be held externally!
1464  **********************************************************************/
1465 osm_signal_t
1466 osm_mcast_mgr_process_mgrp(
1467   IN osm_mcast_mgr_t*      const p_mgr,
1468   IN osm_mgrp_t*           const p_mgrp,
1469   IN osm_mcast_req_type_t        req_type,
1470   IN ib_net64_t                  port_guid )
1471 {
1472   osm_signal_t          signal = OSM_SIGNAL_DONE;
1473   ib_api_status_t          status;
1474   osm_switch_t*            p_sw;
1475   cl_qmap_t*               p_tbl;
1476   boolean_t             pending_transactions = FALSE;
1477
1478   OSM_LOG_ENTER( p_mgr->p_log, osm_mcast_mgr_process_mgrp );
1479
1480   p_tbl = &p_mgr->p_subn->sw_guid_tbl;
1481
1482   status = osm_mcast_mgr_process_tree( p_mgr, p_mgrp, req_type, port_guid );
1483   if( status != IB_SUCCESS )
1484   {
1485     osm_log( p_mgr->p_log, OSM_LOG_ERROR,
1486              "osm_mcast_mgr_process_mgrp: ERR 0A19: "
1487              "Unable to create spanning tree (%s).\n",
1488              ib_get_err_str( status ) );
1489
1490     goto Exit;
1491   }
1492
1493   /* initialize the mc fdb dump file: */
1494   if( osm_log_is_active( p_mgr->p_log, OSM_LOG_ROUTING ) )
1495     unlink("/tmp/osm.mcfdbs");
1496
1497   /*
1498     Walk the switches and download the tables for each.
1499   */
1500   p_sw = (osm_switch_t*)cl_qmap_head( p_tbl );
1501   while( p_sw != (osm_switch_t*)cl_qmap_end( p_tbl ) )
1502   {
1503     signal = __osm_mcast_mgr_set_tbl( p_mgr, p_sw );
1504     if( signal == OSM_SIGNAL_DONE_PENDING )
1505       pending_transactions = TRUE;
1506
1507     osm_mcast_mgr_dump_mcast_routes( p_mgr, p_sw );
1508
1509     p_sw = (osm_switch_t*)cl_qmap_next( &p_sw->map_item );
1510   }
1511
1512  Exit:
1513   OSM_LOG_EXIT( p_mgr->p_log );
1514
1515   if( pending_transactions == TRUE )
1516     return( OSM_SIGNAL_DONE_PENDING );
1517   else
1518     return( OSM_SIGNAL_DONE );
1519 }
1520
1521 /**********************************************************************
1522  **********************************************************************/
1523 osm_signal_t
1524 osm_mcast_mgr_process(
1525   IN osm_mcast_mgr_t*         const p_mgr )
1526 {
1527   osm_signal_t          signal;
1528   osm_switch_t*            p_sw;
1529   cl_qmap_t*               p_tbl;
1530   cl_qmap_t*               p_mcast_tbl;
1531   osm_mgrp_t*              p_mgrp;
1532   ib_api_status_t          status;
1533   boolean_t             pending_transactions = FALSE;
1534
1535   OSM_LOG_ENTER( p_mgr->p_log, osm_mcast_mgr_process );
1536
1537   p_tbl = &p_mgr->p_subn->sw_guid_tbl;
1538
1539   p_mcast_tbl = &p_mgr->p_subn->mgrp_mlid_tbl;
1540   /*
1541     While holding the lock, iterate over all the established
1542     multicast groups, servicing each in turn.
1543
1544     Then, download the multicast tables to the switches.
1545   */
1546   CL_PLOCK_EXCL_ACQUIRE( p_mgr->p_lock );
1547
1548   p_mgrp = (osm_mgrp_t*)cl_qmap_head( p_mcast_tbl );
1549   while( p_mgrp != (osm_mgrp_t*)cl_qmap_end( p_mcast_tbl ) )
1550   {
1551     /* We reached here due to some change that caused a heavy sweep
1552        of the subnet. Not due to a specific multicast request.
1553        So the request type is subnet_change and the port guid is 0. */
1554     status = osm_mcast_mgr_process_tree( p_mgr, p_mgrp,
1555                                          OSM_MCAST_REQ_TYPE_SUBNET_CHANGE, 0);
1556     if( status != IB_SUCCESS )
1557     {
1558       osm_log( p_mgr->p_log, OSM_LOG_ERROR,
1559                "osm_mcast_mgr_process: ERR 0A20: "
1560                "Unable to create spanning tree (%s).\n",
1561                ib_get_err_str( status ) );
1562     }
1563
1564     p_mgrp = (osm_mgrp_t*)cl_qmap_next( &p_mgrp->map_item );
1565   }
1566
1567   /* initialize the mc fdb dump file: */
1568   if( osm_log_is_active( p_mgr->p_log, OSM_LOG_ROUTING ) )
1569     unlink("/tmp/osm.mcfdbs");
1570
1571   /*
1572     Walk the switches and download the tables for each.
1573   */
1574   p_sw = (osm_switch_t*)cl_qmap_head( p_tbl );
1575   while( p_sw != (osm_switch_t*)cl_qmap_end( p_tbl ) )
1576   {
1577     signal = __osm_mcast_mgr_set_tbl( p_mgr, p_sw );
1578     if( signal == OSM_SIGNAL_DONE_PENDING )
1579       pending_transactions = TRUE;
1580
1581     osm_mcast_mgr_dump_mcast_routes( p_mgr, p_sw );
1582
1583     p_sw = (osm_switch_t*)cl_qmap_next( &p_sw->map_item );
1584   }
1585
1586   CL_PLOCK_RELEASE( p_mgr->p_lock );
1587
1588   OSM_LOG_EXIT( p_mgr->p_log );
1589
1590   if( pending_transactions == TRUE )
1591     return( OSM_SIGNAL_DONE_PENDING );
1592   else
1593     return( OSM_SIGNAL_DONE );
1594 }
1595
1596 /**********************************************************************
1597  **********************************************************************/
1598
1599 static
1600 osm_mgrp_t *
1601 __get_mgrp_by_mlid(
1602   IN osm_mcast_mgr_t* const p_mgr,
1603   IN ib_net16_t const mlid)
1604 {
1605   cl_map_item_t *map_item;
1606
1607   map_item = cl_qmap_get(&p_mgr->p_subn->mgrp_mlid_tbl, mlid);
1608   if(map_item == cl_qmap_end(&p_mgr->p_subn->mgrp_mlid_tbl))
1609   {
1610     return NULL;
1611   }
1612   return (osm_mgrp_t *)map_item;
1613 }
1614
1615 /**********************************************************************
1616   This is the function that is invoked during idle time to handle the 
1617   process request. Context1 is simply the osm_mcast_mgr_t*, Context2
1618   hold the mlid, port guid and action (join/leave/delete) required.
1619  **********************************************************************/
1620 osm_signal_t
1621 osm_mcast_mgr_process_mgrp_cb(
1622   IN void*              const Context1,
1623   IN void*              const Context2 )
1624 {
1625   osm_mcast_mgr_t* p_mgr = (osm_mcast_mgr_t*)Context1;
1626   osm_mgrp_t* p_mgrp;
1627   ib_net16_t  mlid;
1628   osm_signal_t signal;
1629   osm_mcast_mgr_ctxt_t* p_ctxt = (osm_mcast_mgr_ctxt_t*)Context2;
1630   osm_mcast_req_type_t req_type = p_ctxt->req_type;
1631   ib_net64_t port_guid = p_ctxt->port_guid;
1632
1633   OSM_LOG_ENTER( p_mgr->p_log, osm_mcast_mgr_process_mgrp_cb );
1634  
1635   /* nice copy no warning on size diff */
1636   cl_memcpy(&mlid, &p_ctxt->mlid, sizeof(mlid));
1637
1638   /* we can destroy the context now */
1639   cl_free(p_ctxt);
1640
1641   /* we need a lock to make sure the p_mgrp is not change other ways */
1642   CL_PLOCK_EXCL_ACQUIRE( p_mgr->p_lock );
1643   p_mgrp = __get_mgrp_by_mlid( p_mgr, mlid);
1644
1645   /* since we delayed the execution we prefer to pass the
1646      mlid as the mgrp identifier and then find it or abort */
1647
1648   if (p_mgrp)
1649   {
1650
1651     /* if there was no change from the last time we processed the group
1652        we can skip doing anything
1653     */
1654     if ( p_mgrp->last_change_id == p_mgrp->last_tree_id)
1655     {
1656       signal = OSM_SIGNAL_DONE;
1657       osm_log( p_mgr->p_log, OSM_LOG_DEBUG,
1658                "osm_mcast_mgr_process_mgrp_cb: "
1659                "Skip processing mgrp with lid:0x%X change id:%u \n",
1660                cl_ntoh16(mlid), p_mgrp->last_change_id );
1661     }
1662     else
1663     {
1664       osm_log( p_mgr->p_log, OSM_LOG_DEBUG,
1665                "osm_mcast_mgr_process_mgrp_cb: "
1666                "Processing mgrp with lid:0x%X change id:%u \n",
1667                cl_ntoh16(mlid), p_mgrp->last_change_id );
1668
1669       signal =
1670         osm_mcast_mgr_process_mgrp( p_mgr, p_mgrp, req_type, port_guid );
1671       p_mgrp->last_tree_id = p_mgrp->last_change_id;
1672     }
1673     CL_PLOCK_RELEASE( p_mgr->p_lock );
1674
1675     /* Remove MGRP only if osm_mcm_port_t count is 0 and
1676      * Not a well known group
1677      */
1678     if((0x0 == cl_qmap_count(&p_mgrp->mcm_port_tbl)) &&
1679        (p_mgrp->well_known == FALSE))
1680     {
1681       osm_log( p_mgr->p_log, OSM_LOG_DEBUG,
1682                "osm_mcast_mgr_process_mgrp_cb: "
1683                "Destroying mgrp with lid:0x%X \n",
1684                cl_ntoh16(mlid) );
1685
1686      /* Send a Report to any InformInfo registerd for
1687          Trap 67 : MCGroup delete */
1688       osm_mgrp_send_delete_notice( p_mgr->p_subn, p_mgr->p_log, p_mgrp );
1689
1690       CL_PLOCK_EXCL_ACQUIRE( p_mgr->p_lock );
1691       cl_qmap_remove_item(&p_mgr->p_subn->mgrp_mlid_tbl,
1692                           (cl_map_item_t *)p_mgrp );
1693
1694       osm_mgrp_destroy(p_mgrp);
1695       CL_PLOCK_RELEASE( p_mgr->p_lock );
1696     }
1697     /* no need for CL_PLOCK_RELEASE( p_mgr->p_lock ) - internally done */
1698     OSM_LOG_EXIT( p_mgr->p_log );
1699     return signal;
1700   }
1701   else
1702   {
1703     CL_PLOCK_RELEASE( p_mgr->p_lock );
1704     OSM_LOG_EXIT( p_mgr->p_log );
1705     return OSM_SIGNAL_DONE;
1706   }
1707
1708 }