[ipoib] mcast garbage collector and igmp V2 support.
authortzachid <tzachid@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Sun, 27 Jul 2008 07:48:26 +0000 (07:48 +0000)
committertzachid <tzachid@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Sun, 27 Jul 2008 07:48:26 +0000 (07:48 +0000)
signed by: Slava Strebkov [slavas@voltaire.com]

git-svn-id: svn://openib.tc.cornell.edu/gen1/trunk@1450 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86

inc/kernel/ip_packet.h
ulp/ipoib/kernel/ipoib_adapter.c
ulp/ipoib/kernel/ipoib_adapter.h
ulp/ipoib/kernel/ipoib_driver.c
ulp/ipoib/kernel/ipoib_endpoint.h
ulp/ipoib/kernel/ipoib_port.c
ulp/ipoib/kernel/ipoib_port.h
ulp/ipoib/kernel/netipoib.inx

index 481cb9c..cb887b7 100644 (file)
@@ -196,6 +196,7 @@ typedef struct _arp_pkt
 #define IP_PROT_IP                     4\r
 #define IP_PROT_TCP                    6\r
 #define IP_PROT_UDP                    17\r
+#define IP_PROT_IGMP           2\r
 \r
 \r
 #include <complib/cl_packon.h>\r
@@ -355,6 +356,55 @@ typedef struct _udp_hdr
 *********/\r
 #include <complib/cl_packoff.h>\r
 \r
+#define IGMP_V2_MEMBERSHIP_QUERY       0x11\r
+#define IGMP_V2_MEMBERSHIP_REPORT      0x16\r
+#define IGMP_V1_MEMBERSHIP_REPORT      0x12    // for backward compatibility with IGMPv1\r
+#define IGMP_V2_LEAVE_GROUP                    0x17\r
+#include <complib/cl_packon.h>\r
+/****s* IB Network Drivers/igmp__v2_hdr_t\r
+* NAME\r
+*      igmp_v2_hdr_t\r
+*\r
+* DESCRIPTION\r
+*      Defines the IGMPv2 header for IP packets.\r
+*\r
+* SYNOPSIS\r
+*/\r
+typedef struct _igmp_v2_hdr\r
+{\r
+       uint8_t         type;\r
+       uint8_t         max_resp_time;\r
+       net16_t         chksum;\r
+       net32_t         group_address;\r
+}      PACK_SUFFIX igmp_v2_hdr_t;\r
+/*\r
+* FIELDS\r
+*      type\r
+*              type of IGMPv2 message: query/report/leave\r
+*\r
+*      max_resp_time\r
+*              The Max Response Time field is meaningful only in Membership Query\r
+*              messages, and specifies the maximum allowed time before sending a\r
+*              responding report in units of 1/10 second.  In all other messages, it\r
+*              is set to zero by the sender and ignored by receivers.\r
+*\r
+*      checksum\r
+*              The checksum is the 16-bit one's complement of the one's complement\r
+*      sum of the whole IGMP message (the entire IP payload).  \r
+*\r
+*      group_address\r
+*              In a Membership Query message, the group address field is set to zero\r
+*       when sending a General Query, and set to the group address being\r
+*       queried when sending a Group-Specific Query.\r
+*\r
+*       In a Membership Report or Leave Group message, the group address\r
+*       field holds the IP multicast group address of the group being\r
+*       reported or left.\r
+*\r
+* SEE ALSO\r
+*      IB Network Drivers, eth_hdr_t, arp_pkt_t, ip_hdr_t, tcp_hdr_t\r
+*********/\r
+#include <complib/cl_packoff.h>\r
 \r
 #define DHCP_PORT_SERVER               CL_HTON16(67)\r
 #define DHCP_PORT_CLIENT               CL_HTON16(68)\r
index c7d3e35..a563a27 100644 (file)
@@ -762,8 +762,13 @@ ipoib_refresh_mcast(
 \r
                        if( j != p_adapter->mcast_array_size )\r
                                continue;\r
-\r
-                       ipoib_port_join_mcast( p_port, p_mac_array[i] ,IB_MC_REC_STATE_FULL_MEMBER);\r
+                       if ( ( p_mac_array[i].addr[0] == 1 && p_mac_array[i].addr[1] == 0 && p_mac_array[i].addr[2] == 0x5e &&\r
+                                  p_mac_array[i].addr[3] == 0 && p_mac_array[i].addr[4] == 0 && p_mac_array[i].addr[5] == 1 ) ||\r
+                                 !( p_mac_array[i].addr[0] == 1 && p_mac_array[i].addr[1] == 0 && p_mac_array[i].addr[2] == 0x5e )\r
+                               )\r
+                       {\r
+                               ipoib_port_join_mcast( p_port, p_mac_array[i], IB_MC_REC_STATE_FULL_MEMBER );\r
+                       }\r
                }\r
        }\r
 \r
index b895b5f..c23a342 100644 (file)
@@ -75,7 +75,7 @@ typedef struct _ipoib_params
        uint32_t        payload_mtu;\r
        uint32_t        xfer_block_size;\r
        mac_addr_t      conf_mac;\r
-\r
+       uint32_t        mc_leave_rescan;\r
 }      ipoib_params_t;\r
 /*\r
 * FIELDS\r
index c8f3431..3d1056f 100644 (file)
@@ -153,7 +153,8 @@ IPOIB_REG_ENTRY HCARegTable[] = {
        {NDIS_STRING_CONST("SaTimeout"),        1, IPOIB_OFFSET(sa_timeout),            IPOIB_SIZE(sa_timeout),         1000,       250,    UINT_MAX},\r
        {NDIS_STRING_CONST("SaRetries"),        1, IPOIB_OFFSET(sa_retry_cnt),          IPOIB_SIZE(sa_retry_cnt),       10,         1,      UINT_MAX},\r
        {NDIS_STRING_CONST("RecvRatio"),        1, IPOIB_OFFSET(recv_pool_ratio),       IPOIB_SIZE(recv_pool_ratio),    1,          1,      10},\r
-       {NDIS_STRING_CONST("PayloadMtu"),       1, IPOIB_OFFSET(payload_mtu),           IPOIB_SIZE(payload_mtu),        2044,         60,   4092}\r
+       {NDIS_STRING_CONST("PayloadMtu"),       1, IPOIB_OFFSET(payload_mtu),           IPOIB_SIZE(payload_mtu),        2044,       60,   4092},\r
+       {NDIS_STRING_CONST("MCLeaveRescan"),    1, IPOIB_OFFSET(mc_leave_rescan),       IPOIB_SIZE(mc_leave_rescan),    260,        1,    3600}\r
 };  \r
 \r
 #define IPOIB_NUM_REG_PARAMS (sizeof (HCARegTable) / sizeof(IPOIB_REG_ENTRY))\r
index 6887fd6..30d1edf 100644 (file)
@@ -62,7 +62,8 @@ typedef struct _ipoib_endpt
        ib_av_handle_t                  h_av;\r
        boolean_t                               expired;\r
        ib_al_ifc_t                             *p_ifc;\r
-\r
+       boolean_t                       is_in_use;\r
+       boolean_t                               is_mcast_listener;\r
 }      ipoib_endpt_t;\r
 /*\r
 * FIELDS\r
index 1e0f946..93bb194 100644 (file)
@@ -67,7 +67,8 @@ ib_gid_t      bcast_mgid_template = {
 ipoib_port_t   *gp_ipoib_port;\r
 #endif\r
 \r
-\r
+static void __port_mcast_garbage_dpc(KDPC *p_gc_dpc,void *context,void *s_arg1, void *s_arg2);\r
+static void __port_do_mcast_garbage(ipoib_port_t* const        p_port );\r
 /******************************************************************************\r
 *\r
 * Declarations\r
@@ -290,6 +291,14 @@ __send_mgr_filter_ip(
        IN                              size_t                                          buf_len,\r
        IN      OUT                     ipoib_send_desc_t* const        p_desc );\r
 \r
+static NDIS_STATUS\r
+__send_mgr_filter_igmp_v2(\r
+       IN                              ipoib_port_t* const                     p_port,\r
+    IN         const   ip_hdr_t* const                         p_ip_hdr,\r
+       IN                              size_t                                          iph_options_size,\r
+       IN                              NDIS_BUFFER*                            p_buf,\r
+       IN                              size_t                                          buf_len );\r
+\r
 static NDIS_STATUS\r
 __send_mgr_filter_udp(\r
        IN                              ipoib_port_t* const                     p_port,\r
@@ -490,6 +499,13 @@ inline void ipoib_port_deref(ipoib_port_t * p_port, int type)
 #endif\r
 }\r
 \r
+/* function returns pointer to payload that is going after IP header.\r
+*  asssuming that payload and IP header are in the same buffer\r
+*/\r
+static void* GetIpPayloadPtr(const     ip_hdr_t* const p_ip_hdr)\r
+{\r
+       return (void*)((uint8_t*)p_ip_hdr + 4*(p_ip_hdr->ver_hl & 0xf));\r
+}\r
 \r
 /******************************************************************************\r
 *\r
@@ -653,6 +669,9 @@ __port_init(
                        p_adapter->p_ifc->get_err_str( status )) );\r
                return status;\r
        }\r
+        /* Initialize multicast garbage collector timer and DPC object */\r
+        KeInitializeDpc(&p_port->gc_dpc,(PKDEFERRED_ROUTINE)__port_mcast_garbage_dpc,p_port);\r
+        KeInitializeTimerEx(&p_port->gc_timer,SynchronizationTimer);\r
 \r
        /* We only ever destroy from the PnP callback thread. */\r
        cl_status = cl_obj_init( &p_port->obj, CL_DESTROY_SYNC,\r
@@ -748,6 +767,8 @@ __port_free(
 \r
        p_port = PARENT_STRUCT( p_obj, ipoib_port_t, obj );\r
 \r
+       KeCancelTimer(&p_port->gc_timer);\r
+       KeFlushQueuedDpcs();\r
        __endpt_mgr_destroy( p_port );\r
        __recv_mgr_destroy( p_port );\r
        __send_mgr_destroy( p_port );\r
@@ -2133,6 +2154,9 @@ __recv_gen(
        p_eth->hdr.src = p_src->mac;\r
        p_eth->hdr.dst = p_dst->mac;\r
 \r
+       if (p_dst->h_mcast) {\r
+               p_dst->is_in_use = TRUE;\r
+       }\r
        IPOIB_EXIT( IPOIB_DBG_RECV );\r
        return IB_SUCCESS;\r
 }\r
@@ -3106,6 +3130,26 @@ __send_mgr_filter_ip(
        if( p_ip_hdr->offset ||\r
                p_ip_hdr->prot != IP_PROT_UDP )\r
        {\r
+               /* Check if this packet is IGMP */\r
+               if ( p_ip_hdr->prot == IP_PROT_IGMP ) \r
+               {\r
+                       /*\r
+                           In igmp packet I saw that iph arrive in 2 NDIS_BUFFERs:\r
+                               1. iph\r
+                               2. ip options\r
+                               So to get the IGMP packet we need to skip the ip options NDIS_BUFFER\r
+                       */\r
+                       size_t iph_size_in_bytes = (p_ip_hdr->ver_hl & 0xf) * 4;\r
+                       size_t iph_options_size = iph_size_in_bytes - buf_len;\r
+                       buf_len -= sizeof(ip_hdr_t);//without ipheader\r
+\r
+                       /*\r
+                           Could be a case that arrived igmp packet not from type IGMPv2 ,\r
+                               but IGMPv1 or IGMPv3.\r
+                               We anyway pass it to __send_mgr_filter_igmp_v2().\r
+                       */\r
+                       __send_mgr_filter_igmp_v2(p_port, p_ip_hdr, iph_options_size, p_buf, buf_len);\r
+               }\r
                /* Not a UDP packet. */\r
                cl_perf_start( SendTcp );\r
                status = __send_gen( p_port, p_desc );\r
@@ -3125,6 +3169,133 @@ __send_mgr_filter_ip(
        return status;\r
 }\r
 \r
+static NDIS_STATUS\r
+__send_mgr_filter_igmp_v2(\r
+       IN                              ipoib_port_t* const                     p_port,\r
+       IN              const   ip_hdr_t* const                         p_ip_hdr,\r
+       IN                              size_t                                          iph_options_size,\r
+       IN                              NDIS_BUFFER*                            p_buf,\r
+       IN                              size_t                                          buf_len )\r
+{\r
+       igmp_v2_hdr_t           *p_igmp_v2_hdr = NULL;\r
+       NDIS_STATUS                     endpt_status;\r
+       ipoib_endpt_t*          p_endpt = NULL;\r
+       mac_addr_t                      fake_mcast_mac;\r
+\r
+       IPOIB_ENTER( IPOIB_DBG_SEND );\r
+\r
+       IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,\r
+                        ("buf_len = %d,iph_options_size = %d\n",(int)buf_len,(int)iph_options_size ) );\r
+\r
+       if( !buf_len )\r
+       {\r
+               // To get the IGMP packet we need to skip the ip options NDIS_BUFFER (if exists)\r
+               while ( iph_options_size )\r
+               {\r
+                       NdisGetNextBuffer( p_buf, &p_buf );\r
+                       if( !p_buf )\r
+                       {\r
+                               IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,\r
+                                       ("Failed to get IGMPv2 header buffer.\n") );\r
+                               return NDIS_STATUS_FAILURE;\r
+                       }\r
+                       NdisQueryBufferSafe( p_buf, &p_igmp_v2_hdr, &buf_len, NormalPagePriority );\r
+                       if( !p_igmp_v2_hdr )\r
+                       {\r
+                               IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,\r
+                                       ("Failed to query IGMPv2 header buffer.\n") );\r
+                               return NDIS_STATUS_FAILURE;\r
+                       }\r
+                       iph_options_size-=buf_len;\r
+               }\r
+        \r
+               NdisGetNextBuffer( p_buf, &p_buf );\r
+               if( !p_buf )\r
+               {\r
+                       IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,\r
+                               ("Failed to get IGMPv2 header buffer.\n") );\r
+                       return NDIS_STATUS_FAILURE;\r
+               }\r
+               NdisQueryBufferSafe( p_buf, &p_igmp_v2_hdr, &buf_len, NormalPagePriority );\r
+               if( !p_igmp_v2_hdr )\r
+               {\r
+                       IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,\r
+                               ("Failed to query IGMPv2 header buffer.\n") );\r
+                       return NDIS_STATUS_FAILURE;\r
+               }\r
+       }\r
+       else\r
+       {\r
+               /* assuming ip header and options are in the same packet */\r
+               p_igmp_v2_hdr = GetIpPayloadPtr(p_ip_hdr);\r
+       }\r
+       /* Get the IGMP header length. */\r
+       if( buf_len < sizeof(igmp_v2_hdr_t) )\r
+       {\r
+               IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,\r
+                       ("Buffer not large enough for IGMPv2 packet.\n") );\r
+               return NDIS_STATUS_BUFFER_TOO_SHORT;\r
+       }\r
+\r
+       // build fake mac from igmp packet group address\r
+       fake_mcast_mac.addr[0] = 1;\r
+       fake_mcast_mac.addr[1] = ((unsigned char*)&p_igmp_v2_hdr->group_address)[0] & 0x0f;\r
+       fake_mcast_mac.addr[2] = 0x5E;\r
+       fake_mcast_mac.addr[3] = ((unsigned char*)&p_igmp_v2_hdr->group_address)[1];\r
+       fake_mcast_mac.addr[4] = ((unsigned char*)&p_igmp_v2_hdr->group_address)[2];\r
+       fake_mcast_mac.addr[5] = ((unsigned char*)&p_igmp_v2_hdr->group_address)[3];\r
+\r
+       switch ( p_igmp_v2_hdr->type )\r
+       {\r
+       case IGMP_V2_MEMBERSHIP_REPORT:\r
+               /* \r
+                       This mean that some body open listener on this group \r
+                       Change type of mcast endpt to SEND_RECV endpt. So mcast garbage collector \r
+                       will not delete this mcast endpt.\r
+               */\r
+               IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,\r
+                       ("Catched IGMP_V2_MEMBERSHIP_REPORT message\n") );\r
+               endpt_status = __endpt_mgr_ref( p_port, fake_mcast_mac, &p_endpt );\r
+               if ( p_endpt )\r
+               {\r
+                       cl_obj_lock( &p_port->obj );\r
+                       p_endpt->is_mcast_listener = TRUE;\r
+                       cl_obj_unlock( &p_port->obj );\r
+            ipoib_endpt_deref( p_endpt );\r
+               }\r
+               break;\r
+\r
+       case IGMP_V2_LEAVE_GROUP:\r
+               /* \r
+                       This mean that somebody CLOSE listener on this group .\r
+                   Change type of mcast endpt to SEND_ONLY endpt. So mcast \r
+                       garbage collector will delete this mcast endpt next time.\r
+               */\r
+               IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,\r
+                            ("Catched IGMP_V2_LEAVE_GROUP message\n") );\r
+               endpt_status = __endpt_mgr_ref( p_port, fake_mcast_mac, &p_endpt );\r
+               if ( p_endpt )\r
+               {\r
+                       cl_obj_lock( &p_port->obj );\r
+                       p_endpt->is_mcast_listener = FALSE;\r
+                       p_endpt->is_in_use = FALSE;\r
+                       cl_obj_unlock( &p_port->obj );\r
+                       ipoib_endpt_deref( p_endpt );\r
+               }\r
+\r
+               __port_do_mcast_garbage(p_port);\r
+\r
+               break;\r
+\r
+       default:\r
+               IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,\r
+                            ("Send Unknown IGMP message: 0x%x \n", p_igmp_v2_hdr->type ) );\r
+               break;\r
+       }\r
+\r
+       IPOIB_EXIT( IPOIB_DBG_SEND );\r
+       return NDIS_STATUS_SUCCESS;\r
+}\r
 \r
 static NDIS_STATUS\r
 __send_mgr_filter_udp(\r
@@ -3163,7 +3334,7 @@ __send_mgr_filter_udp(
        }\r
        else\r
        {\r
-               p_udp_hdr = (udp_hdr_t*)(p_ip_hdr + 1);\r
+               p_udp_hdr = (udp_hdr_t*)GetIpPayloadPtr(p_ip_hdr);\r
        }\r
        /* Get the UDP header and check the destination port numbers. */\r
        if( buf_len < sizeof(udp_hdr_t) )\r
@@ -3211,7 +3382,6 @@ __send_mgr_filter_udp(
        return status;\r
 }\r
 \r
-\r
 unsigned short ipchksum(unsigned short *ip, int len)\r
 {\r
     unsigned long sum = 0;\r
@@ -3597,6 +3767,14 @@ __send_mgr_queue(
                        return NDIS_STATUS_PENDING;\r
                }\r
        }\r
+       else if ( status == NDIS_STATUS_SUCCESS && \r
+                         ETH_IS_MULTICAST( p_eth_hdr->dst.addr ) &&  \r
+                         !ETH_IS_BROADCAST( p_eth_hdr->dst.addr ) )\r
+       {\r
+               CL_ASSERT( (*pp_endpt) );\r
+               CL_ASSERT((*pp_endpt)->h_mcast != NULL);\r
+               (*pp_endpt)->is_in_use = TRUE;\r
+       }\r
 \r
        IPOIB_EXIT( IPOIB_DBG_SEND );\r
        return status;\r
@@ -3775,6 +3953,44 @@ ipoib_port_send(
                }\r
 \r
                cl_perf_start( SendMgrQueue );\r
+\r
+               if ( ETH_IS_MULTICAST( p_eth_hdr->dst.addr ) && \r
+                        p_eth_hdr->type == ETH_PROT_TYPE_IP &&\r
+                        !ETH_IS_BROADCAST( p_eth_hdr->dst.addr ) ) \r
+               {\r
+                       ip_hdr_t                        *p_ip_hdr;\r
+                       NDIS_BUFFER                     *p_ip_hdr_buf;\r
+                       UINT                            ip_hdr_buf_len;\r
+\r
+                       // Extract the ip hdr \r
+                       NdisGetNextBuffer( p_buf, &p_ip_hdr_buf );\r
+                       if( !p_ip_hdr_buf )\r
+                       {\r
+                               IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,\r
+                                       ("Failed to get IP header buffer.\n") );\r
+                               goto h_end;\r
+                       }\r
+       \r
+                       NdisQueryBufferSafe( p_ip_hdr_buf, &p_ip_hdr, &ip_hdr_buf_len, NormalPagePriority );\r
+                       if( !p_ip_hdr )\r
+                       {\r
+                               IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,\r
+                                       ("Failed to query IP header buffer.\n") );\r
+                               goto h_end;\r
+                       }\r
+\r
+                       if( ip_hdr_buf_len < sizeof(ip_hdr_t) )\r
+                       {\r
+                               /* This buffer is done for.  Get the next buffer. */\r
+                               IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,\r
+                                       ("Buffer too small for IP packet.\n") );\r
+                               goto h_end;\r
+                       }\r
+               \r
+                       p_eth_hdr->dst.addr[1] = ((unsigned char*)&p_ip_hdr->dst_ip)[0] & 0x0f;\r
+                       p_eth_hdr->dst.addr[3] = ((unsigned char*)&p_ip_hdr->dst_ip)[1];\r
+               }\r
+h_end:\r
                status = __send_mgr_queue( p_port, p_eth_hdr, &desc.p_endpt );\r
                cl_perf_stop( &p_port->p_adapter->perf, SendMgrQueue );\r
                if( status == NDIS_STATUS_PENDING )\r
@@ -4600,6 +4816,7 @@ __endpt_mgr_add_bcast(
                return IB_INSUFFICIENT_RESOURCES;\r
        }\r
        /* set reference to transport to be used while is not attached to the port */\r
+       p_endpt->is_mcast_listener = TRUE;\r
        p_endpt->p_ifc = p_port->p_adapter->p_ifc;\r
        status = ipoib_endpt_set_mcast( p_endpt, p_port->ib_mgr.h_pd,\r
                p_port->port_num, p_mcast_rec );\r
@@ -5251,6 +5468,10 @@ ipoib_port_down(
        KeWaitForSingleObject(\r
                &p_port->sa_event, Executive, KernelMode, FALSE, NULL );\r
 \r
+       /* garbage collector timer is not needed when link is down */\r
+       KeCancelTimer(&p_port->gc_timer);\r
+       KeFlushQueuedDpcs();\r
+\r
        /*\r
         * Put the QP in the error state.  This removes the need to\r
         * synchronize with send/receive callbacks.\r
@@ -5292,6 +5513,7 @@ __bcast_cb(
 {\r
        ipoib_port_t    *p_port;\r
        ib_api_status_t status;\r
+       LARGE_INTEGER   gc_due_time;\r
 \r
        IPOIB_ENTER( IPOIB_DBG_INIT );\r
 \r
@@ -5403,6 +5625,11 @@ err:
        /* Notify the adapter that we now have an active connection. */\r
        ipoib_set_active( p_port->p_adapter );\r
 \r
+       /* garbage collector timer is needed when link is active */\r
+       gc_due_time.QuadPart = -(int64_t)(((uint64_t)p_port->p_adapter->params.mc_leave_rescan * 2000000) * 10);\r
+       KeSetTimerEx(&p_port->gc_timer,gc_due_time,\r
+                           (LONG)p_port->p_adapter->params.mc_leave_rescan*1000,&p_port->gc_dpc);\r
+\r
        KeSetEvent( &p_port->sa_event, EVENT_INCREMENT, FALSE );\r
        ipoib_port_deref( p_port, ref_join_bcast );\r
        IPOIB_EXIT( IPOIB_DBG_INIT );\r
@@ -5529,15 +5756,15 @@ ipoib_port_join_mcast(
        mcast_req.member_rec.mlid = 0;\r
        ib_member_set_state( &mcast_req.member_rec.scope_state,state);\r
 \r
-       if( mac.addr[0] == 1 && mac.addr[1] == 0 && mac.addr[2] == 0x5E )\r
+       if( (mac.addr[0] == 1) && (mac.addr[2] == 0x5E ))\r
        {\r
                /*\r
                 * Update the address portion of the MGID with the 28 lower bits of the\r
-                * IP address.  Since we're given a MAC address, we end up using only\r
-                * the 24 lower bits of that network-byte-ordered value (assuming MSb\r
-                * is zero).\r
+                * IP address.  Since we're given a MAC address, we are using \r
+                * 24 lower bits of that network-byte-ordered value (assuming MSb\r
+                * is zero) and 4 lsb bits of the first byte of IP address.\r
                 */\r
-               mcast_req.member_rec.mgid.raw[12] = 0;\r
+               mcast_req.member_rec.mgid.raw[12] = mac.addr[1];\r
                mcast_req.member_rec.mgid.raw[13] = mac.addr[3];\r
                mcast_req.member_rec.mgid.raw[14] = mac.addr[4];\r
                mcast_req.member_rec.mgid.raw[15] = mac.addr[5];\r
@@ -5695,6 +5922,8 @@ __mcast_cb(
                        &p_port->endpt_mgr.lid_endpts, p_endpt->dlid, &p_endpt->lid_item );\r
                CL_ASSERT( p_qitem == &p_endpt->lid_item );\r
        }\r
+       /* set flag that endpoint is use */\r
+       p_endpt->is_in_use = TRUE;\r
        cl_obj_unlock( &p_port->obj );\r
        \r
        /* Try to send all pending sends. */\r
@@ -5751,6 +5980,82 @@ __leave_error_mcast_cb(
        IPOIB_EXIT( IPOIB_DBG_MCAST );\r
 }\r
 \r
+static void __port_do_mcast_garbage(ipoib_port_t* const        p_port)\r
+{\r
+    const mac_addr_t DEFAULT_MCAST_GROUP = {0x01, 0x00, 0x5e, 0x00, 0x00, 0x01};\r
+       /* Do garbage collecting... */\r
 \r
+       cl_map_item_t   *p_item;\r
+       ipoib_endpt_t   *p_endpt;\r
+       cl_qlist_t              destroy_mc_list;\r
+       uint8_t                 cnt;\r
+       const static GC_MAX_LEAVE_NUM = 80;\r
 \r
+       cl_qlist_init( &destroy_mc_list );\r
+\r
+       cl_obj_lock( &p_port->obj );\r
+       cnt = 0;\r
+       p_item = cl_qmap_head( &p_port->endpt_mgr.mac_endpts );\r
+       while( (p_item != cl_qmap_end( &p_port->endpt_mgr.mac_endpts )) && (cnt < GC_MAX_LEAVE_NUM))\r
+       {\r
+               p_endpt = PARENT_STRUCT( p_item, ipoib_endpt_t, mac_item );\r
+               p_item = cl_qmap_next( p_item );\r
+\r
+               /* Check if the current endpoint is not a multicast listener */\r
+\r
+               if( p_endpt->h_mcast && \r
+                       (!p_endpt->is_mcast_listener) &&\r
+                       ( cl_memcmp( &p_endpt->mac, &DEFAULT_MCAST_GROUP, sizeof(mac_addr_t) ) &&\r
+                        (!p_endpt->is_in_use) ))\r
+               {\r
+                       cl_qmap_remove_item( &p_port->endpt_mgr.mac_endpts,\r
+                               &p_endpt->mac_item );\r
+                       cl_fmap_remove_item( &p_port->endpt_mgr.gid_endpts,\r
+                               &p_endpt->gid_item );\r
+\r
+                       if( p_endpt->dlid )\r
+                       {\r
+                               cl_qmap_remove_item( &p_port->endpt_mgr.lid_endpts,\r
+                                       &p_endpt->lid_item );\r
+                               p_endpt->dlid = 0;\r
+                       }\r
+\r
+                       cl_qlist_insert_tail(\r
+                               &destroy_mc_list, &p_endpt->mac_item.pool_item.list_item );\r
+                       cnt++;\r
+               }\r
+               else\r
+                       p_endpt->is_in_use = FALSE;\r
+       }\r
+       cl_obj_unlock( &p_port->obj );\r
+\r
+       /* Destroy all multicast endpoints now that we have released the lock. */\r
+       while( cl_qlist_count( &destroy_mc_list ) )\r
+       {\r
+               p_endpt = PARENT_STRUCT( cl_qlist_head( &destroy_mc_list ),\r
+                                                                ipoib_endpt_t, mac_item.pool_item.list_item );\r
+               IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_ENDPT,\r
+                       ("mcast garbage collector: destroying endpoint %02x:%02x:%02x:%02x:%02x:%02x \n", \r
+                                p_endpt->mac.addr[0],\r
+                                p_endpt->mac.addr[1],\r
+                                p_endpt->mac.addr[2],\r
+                                p_endpt->mac.addr[3],\r
+                                p_endpt->mac.addr[4],\r
+                                p_endpt->mac.addr[5]) );\r
+\r
+               cl_obj_destroy( &PARENT_STRUCT( cl_qlist_remove_head( &destroy_mc_list ),\r
+                       ipoib_endpt_t, mac_item.pool_item.list_item )->obj );\r
+       }\r
+}\r
+\r
+static void __port_mcast_garbage_dpc(KDPC *p_gc_dpc,void *context,void *s_arg1, void *s_arg2)\r
+{\r
+       ipoib_port_t *p_port = context;\r
+\r
+       UNREFERENCED_PARAMETER(p_gc_dpc);\r
+       UNREFERENCED_PARAMETER(s_arg1);\r
+       UNREFERENCED_PARAMETER(s_arg2);\r
+\r
+       __port_do_mcast_garbage(p_port);\r
+}\r
 \r
index b312210..1b7f107 100644 (file)
@@ -509,6 +509,8 @@ typedef struct _ipoib_port
 \r
        atomic32_t                              hdr_idx;\r
        uint16_t                                pkey_index;\r
+       KDPC                                    gc_dpc;\r
+       KTIMER                                  gc_timer;\r
        ipoib_hdr_t                             hdr[1]; /* Must be last! */\r
 \r
 }      ipoib_port_t;\r
index acbcc9a..7cf7b31 100644 (file)
@@ -127,6 +127,12 @@ HKR, Ndi\Params\PayloadMtu,                Default,        0, "2044"
 HKR, Ndi\Params\PayloadMtu,            Min,            0, "60"\r
 HKR, Ndi\Params\PayloadMtu,            Max,            0, "4092"\r
 \r
+HKR, Ndi\Params\MCLeaveRescan,         ParamDesc,      0, "MC leave rescan (sec)"\r
+HKR, Ndi\Params\MCLeaveRescan,         Type,           0, "dword"\r
+HKR, Ndi\Params\MCLeaveRescan,         Default,        0, "260"\r
+HKR, Ndi\Params\MCLeaveRescan,         Optional,       0, "0"\r
+HKR, Ndi\Params\MCLeaveRescan,         Min,            0, "1"\r
+HKR, Ndi\Params\MCLeaveRescan,         Max,            0, "3600"\r
 [IpoibService]\r
 DisplayName     = %IpoibServiceDispName%\r
 ServiceType     = 1 ;%SERVICE_KERNEL_DRIVER%\r