[ipoib] Always connect to mcast groups as a full member. (mlnx: 2845)
[mirror/winof/.git] / ulp / ipoib / kernel / ipoib_port.c
index 60ffb9e..1e0f946 100644 (file)
@@ -1,6 +1,7 @@
 /*\r
  * Copyright (c) 2005 SilverStorm Technologies.  All rights reserved.\r
  * Copyright (c) 2006 Mellanox Technologies.  All rights reserved.\r
+ * Portions Copyright (c) 2008 Microsoft Corporation.  All rights reserved.\r
  *\r
  * This software is available to you under the OpenIB.org BSD license\r
  * below:\r
@@ -449,6 +450,10 @@ static void
 __mcast_cb(\r
        IN                              ib_mcast_rec_t                          *p_mcast_rec );\r
 \r
+void\r
+__leave_error_mcast_cb(\r
+       IN                              void                            *context );\r
+\r
 \r
 static intn_t\r
 __gid_cmp(\r
@@ -573,7 +578,8 @@ __port_construct(
        __endpt_mgr_construct( p_port );\r
 \r
        KeInitializeEvent( &p_port->sa_event, NotificationEvent, TRUE );\r
-\r
+       KeInitializeEvent( &p_port->leave_mcast_event, NotificationEvent, TRUE );\r
+       \r
        IPOIB_EXIT( IPOIB_DBG_INIT );\r
 }\r
 \r
@@ -596,7 +602,7 @@ __port_init(
        if( cl_status != CL_SUCCESS )\r
        {\r
                IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,\r
-                       ("cl_spinlock_init returned %s\n", cl_status_text[cl_status]) );\r
+                       ("cl_spinlock_init returned %#x\n", cl_status) );\r
                return IB_ERROR;\r
        }\r
 \r
@@ -604,7 +610,7 @@ __port_init(
        if( cl_status != CL_SUCCESS )\r
        {\r
                IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,\r
-                       ("cl_spinlock_init returned %s\n", cl_status_text[cl_status]) );\r
+                       ("cl_spinlock_init returned %#x\n", cl_status) );\r
                return IB_ERROR;\r
        }\r
 \r
@@ -661,7 +667,7 @@ __port_init(
        if( cl_status != CL_SUCCESS )\r
        {\r
                IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,\r
-                       ("cl_obj_init returned %s\n", cl_status_text[cl_status]) );\r
+                       ("cl_obj_init returned %#x\n", cl_status) );\r
                return IB_ERROR;\r
        }\r
 \r
@@ -669,7 +675,7 @@ __port_init(
        if( cl_status != CL_SUCCESS )\r
        {\r
                IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,\r
-                       ("cl_obj_insert_rel returned %s\n", cl_status_text[cl_status]) );\r
+                       ("cl_obj_insert_rel returned %#x\n", cl_status) );\r
                cl_obj_destroy( &p_port->obj );\r
                return IB_ERROR;\r
        }\r
@@ -788,6 +794,8 @@ __ib_mgr_init(
        uint64_t                        vaddr;\r
        net32_t                         rkey;\r
        ib_qp_attr_t            qp_attr;\r
+       ib_ca_attr_t            *ca_attr;\r
+       uint32_t                        ca_size;\r
 \r
        IPOIB_ENTER( IPOIB_DBG_INIT );\r
 \r
@@ -805,6 +813,52 @@ __ib_mgr_init(
                return status;\r
        }\r
 \r
+       /* Query the CA for Pkey table */\r
+       status = p_port->p_adapter->p_ifc->query_ca(p_port->ib_mgr.h_ca, NULL, &ca_size);\r
+       if(status != IB_INSUFFICIENT_MEMORY)\r
+       {\r
+               IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,\r
+                       ("ib_query_ca failed\n"));\r
+                       return status;\r
+       }\r
+\r
+       ca_attr = (ib_ca_attr_t*)cl_zalloc(ca_size);\r
+       if      (!ca_attr)\r
+       {\r
+               IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,\r
+                       ("cl_zalloc can't allocate %d\n",ca_size));\r
+               return IB_INSUFFICIENT_RESOURCES;\r
+       }\r
+\r
+       status = p_port->p_adapter->p_ifc->query_ca(p_port->ib_mgr.h_ca, ca_attr,&ca_size);     \r
+       if( status != IB_SUCCESS )\r
+       {\r
+               cl_free(ca_attr);\r
+               IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,\r
+                       ("ib_query_ca returned %s\n", \r
+                       p_port->p_adapter->p_ifc->get_err_str( status )) );\r
+               return status;\r
+       }\r
+       if( ca_attr->p_port_attr->link_state == IB_LINK_ACTIVE)\r
+       {\r
+               uint16_t index;\r
+               CL_ASSERT(ca_attr->p_port_attr->p_pkey_table[0] == IB_DEFAULT_PKEY);\r
+               for(index = 0; index < ca_attr->p_port_attr->num_pkeys; index++)\r
+               {\r
+                       if(p_port->p_adapter->guids.port_guid.pkey == ca_attr->p_port_attr->p_pkey_table[index])\r
+                               break;\r
+               }\r
+               if(index >= ca_attr->p_port_attr->num_pkeys)\r
+               {\r
+                       IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,\r
+                       ("Pkey table is invalid, index not found\n"));\r
+                   return IB_NOT_FOUND;\r
+               }\r
+               p_port->pkey_index = index;\r
+               IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_IB,\r
+                       ("for PKEY = 0x%04X got index = %d\n",p_port->p_adapter->guids.port_guid.pkey,index));\r
+       }\r
+       cl_free(ca_attr);\r
        /* Allocate the PD. */\r
        status = p_port->p_adapter->p_ifc->alloc_pd(\r
                p_port->ib_mgr.h_ca, IB_PDT_UD, p_port, &p_port->ib_mgr.h_pd );\r
@@ -993,8 +1047,8 @@ __buf_mgr_init(
                NdisWriteErrorLogEntry( p_port->p_adapter->h_adapter,\r
                        EVENT_IPOIB_RECV_POOL, 1, cl_status );\r
                IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,\r
-                       ("cl_qpool_init for recvs returned %s\n",\r
-                       cl_status_text[cl_status]) );\r
+                       ("cl_qpool_init for recvs returned %#x\n",\r
+                       cl_status) );\r
                return IB_INSUFFICIENT_MEMORY;\r
        }\r
 \r
@@ -1241,9 +1295,9 @@ __buf_mgr_put_recv_list(
        IN                              ipoib_port_t* const                     p_port,\r
        IN                              cl_qlist_t* const                       p_list )\r
 {\r
-       IPOIB_ENTER(  IPOIB_DBG_RECV );\r
+       //IPOIB_ENTER(  IPOIB_DBG_RECV );\r
        cl_qpool_put_list( &p_port->buf_mgr.recv_pool, p_list );\r
-       IPOIB_EXIT(  IPOIB_DBG_RECV );\r
+       //IPOIB_EXIT(  IPOIB_DBG_RECV );\r
 }\r
 \r
 \r
@@ -1388,7 +1442,7 @@ __recv_mgr_repost(
                if( !p_next )\r
                {\r
                        IPOIB_PRINT(TRACE_LEVEL_INFORMATION, IPOIB_DBG_RECV,\r
-                               ("Out of receive descriptors!\n") );\r
+                               ("Out of receive descriptors! recv queue depath 0x%x\n",p_port->recv_mgr.depth) );\r
                        break;\r
                }\r
 \r
@@ -1770,6 +1824,7 @@ __recv_get_endpts(
                        status = __endpt_mgr_insert( p_port, mac, *pp_src );\r
                        if( status != IB_SUCCESS )\r
                        {\r
+                               cl_obj_unlock( &p_port->obj );\r
                                IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,\r
                                        ("__endpt_mgr_insert returned %s\n",\r
                                        p_port->p_adapter->p_ifc->get_err_str( status )) );\r
@@ -1848,7 +1903,7 @@ __recv_mgr_filter(
 \r
        for( p_wc = p_done_wc_list; p_wc; p_wc = p_wc->p_next )\r
        {\r
-               CL_ASSERT( p_wc->wc_type == IB_WC_RECV );\r
+               CL_ASSERT( p_wc->status != IB_WCS_SUCCESS || p_wc->wc_type == IB_WC_RECV );\r
                p_desc = (ipoib_recv_desc_t*)(uintn_t)p_wc->wr_id;\r
                recv_cnt++;\r
 \r
@@ -1899,6 +1954,7 @@ __recv_mgr_filter(
                        \r
                }\r
                /* Successful completion.  Get the receive information. */\r
+               p_desc->ndis_csum.Value = (ULONG) p_wc->csum_ok;\r
                cl_perf_start( GetRecvEndpts );\r
                __recv_get_endpts( p_port, p_desc, p_wc, &p_src, &p_dst );\r
                cl_perf_stop( &p_port->p_adapter->perf, GetRecvEndpts );\r
@@ -1973,6 +2029,11 @@ __recv_mgr_filter(
                                        status = IB_INVALID_SETTING;\r
                                        break;\r
                                }\r
+                               if ((p_ipoib->type.ip.hdr.ver_hl & 0x0f) != 5 ) {\r
+                                       // If there are IP options in this message, we are in trouble in any case\r
+                                       status = IB_INVALID_SETTING;\r
+                                       break;                                  \r
+                               }\r
                                /* UDP packet with BOOTP ports in src/dst port numbers. */\r
                                cl_perf_start( RecvDhcp );\r
                                status = __recv_dhcp( p_port, p_ipoib, p_eth, p_src, p_dst );\r
@@ -2128,7 +2189,7 @@ __recv_dhcp(
        }\r
 \r
        p_option = &p_dhcp->options[4];\r
-       while( *p_option != DHCP_OPT_END )\r
+       while( *p_option != DHCP_OPT_END && p_option < &p_dhcp->options[312] )\r
        {\r
                switch( *p_option )\r
                {\r
@@ -2351,6 +2412,7 @@ __recv_arp(
                status = __endpt_mgr_insert( p_port, mac, *pp_src );\r
                if( status != IB_SUCCESS )\r
                {\r
+                       cl_obj_unlock( &p_port->obj );\r
                        IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,\r
                                ("__endpt_mgr_insert return %s \n",\r
                                p_port->p_adapter->p_ifc->get_err_str( status )) );\r
@@ -2440,7 +2502,6 @@ __recv_mgr_prepare_pkt(
        NDIS_STATUS                                                     status;\r
        uint32_t                                                        pkt_filter;\r
        ip_stat_sel_t                                           type;\r
-       NDIS_TCP_IP_CHECKSUM_PACKET_INFO        chksum;\r
        PERF_DECLARE( GetNdisPkt );\r
 \r
        IPOIB_ENTER( IPOIB_DBG_RECV );\r
@@ -2521,14 +2582,9 @@ __recv_mgr_prepare_pkt(
                return IB_INSUFFICIENT_RESOURCES;\r
        }\r
 \r
-       /* Flag the checksums as having been calculated. */\r
-       chksum.Value = 0;\r
-       chksum.Receive.NdisPacketTcpChecksumSucceeded = TRUE;\r
-       chksum.Receive.NdisPacketUdpChecksumSucceeded = TRUE;\r
-       chksum.Receive.NdisPacketIpChecksumSucceeded = TRUE;\r
-       NDIS_PER_PACKET_INFO_FROM_PACKET( *pp_packet, TcpIpChecksumPacketInfo ) =\r
-               (void*)(uintn_t)chksum.Value;\r
-\r
+       /* Get the checksums directly from packet information. */\r
+       NDIS_PER_PACKET_INFO_FROM_PACKET( *pp_packet, TcpIpChecksumPacketInfo ) = \r
+               (PVOID) (uintn_t) (p_desc->ndis_csum.Value);\r
        ipoib_inc_recv_stat( p_port->p_adapter, type, p_desc->len );\r
 \r
        IPOIB_EXIT( IPOIB_DBG_RECV );\r
@@ -2620,15 +2676,14 @@ __send_mgr_construct(
 }\r
 \r
 \r
-static void\r
-__send_mgr_destroy(\r
+static void \r
+__pending_list_destroy(\r
        IN                              ipoib_port_t* const                     p_port )\r
 {\r
        cl_list_item_t  *p_item;\r
        NDIS_PACKET             *p_packet;\r
-\r
-       IPOIB_ENTER( IPOIB_DBG_SEND );\r
-\r
+       \r
+       cl_spinlock_acquire( &p_port->send_lock );\r
        /* Complete any pending packets. */\r
        for( p_item = cl_qlist_remove_head( &p_port->send_mgr.pending_list );\r
                p_item != cl_qlist_end( &p_port->send_mgr.pending_list );\r
@@ -2638,6 +2693,15 @@ __send_mgr_destroy(
                NdisMSendComplete( p_port->p_adapter->h_adapter, p_packet,\r
                        NDIS_STATUS_RESET_IN_PROGRESS );\r
        }\r
+       cl_spinlock_release( &p_port->send_lock );\r
+}\r
+\r
+static void\r
+__send_mgr_destroy(\r
+       IN                              ipoib_port_t* const                     p_port )\r
+{\r
+       IPOIB_ENTER( IPOIB_DBG_SEND );\r
+       __pending_list_destroy(p_port);\r
 \r
        IPOIB_EXIT( IPOIB_DBG_SEND );\r
 }\r
@@ -3148,6 +3212,19 @@ __send_mgr_filter_udp(
 }\r
 \r
 \r
+unsigned short ipchksum(unsigned short *ip, int len)\r
+{\r
+    unsigned long sum = 0;\r
+\r
+    len >>= 1;\r
+    while (len--) {\r
+        sum += *(ip++);\r
+        if (sum > 0xFFFF)\r
+            sum -= 0xFFFF;\r
+    }\r
+    return (unsigned short)((~sum) & 0x0000FFFF);\r
+}\r
+\r
 static NDIS_STATUS\r
 __send_mgr_filter_dhcp(\r
        IN                              ipoib_port_t* const                     p_port,\r
@@ -3199,7 +3276,7 @@ __send_mgr_filter_dhcp(
 \r
        /* Now scan through the options looking for the client identifier. */\r
        p_option = &p_ib_dhcp->options[4];\r
-       while( *p_option != DHCP_OPT_END )\r
+       while( *p_option != DHCP_OPT_END && p_option < &p_ib_dhcp->options[312] )\r
        {\r
                switch( *p_option )\r
                {\r
@@ -3249,7 +3326,7 @@ __send_mgr_filter_dhcp(
                                {\r
                                        IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,\r
                                                ("Can't convert CID to IPoIB format.\n") );\r
-                                       return IB_INSUFFICIENT_MEMORY;\r
+                                       return NDIS_STATUS_RESOURCES;\r
                                }\r
                                /* Move the existing options down, and add a new CID option */\r
                                len = p_option - ( p_cid + p_cid[1] + 2 );\r
@@ -3284,17 +3361,26 @@ __send_mgr_filter_dhcp(
                        p_option[0] = DHCP_OPT_END;\r
                        p_cid[0] = DHCP_OPT_CLIENT_ID;\r
                        p_cid[1] = 21;\r
+                       p_cid[2] = DHCP_HW_TYPE_IB;\r
                }\r
 \r
                CL_ASSERT( p_cid[1] == 21 );\r
                p_cid[23]= DHCP_OPT_END;\r
-               ib_gid_set_default( &gid, p_port->p_adapter->guids.port_guid );\r
+               ib_gid_set_default( &gid, p_port->p_adapter->guids.port_guid.guid );\r
                cl_memcpy( &p_cid[7], &gid, sizeof(ib_gid_t) );\r
-               cl_memcpy( &p_cid[3], &p_port->ib_mgr.qpn, sizeof(p_port->ib_mgr.qpn) );\r
-               /* Clear the hardware address. */\r
+               cl_memcpy( &p_cid[3], &p_port->ib_mgr.qpn, sizeof(p_port->ib_mgr.qpn) );                \r
                p_ib_dhcp->htype = DHCP_HW_TYPE_IB;\r
-               p_ib_dhcp->hlen = 0;\r
-               cl_memclr( p_ib_dhcp->chaddr, sizeof(p_ib_dhcp->chaddr) );\r
+\r
+               /* update lengths to include any change we made */\r
+               p_desc->p_buf->ip.hdr.length = cl_ntoh16( sizeof(ip_hdr_t) + sizeof(udp_hdr_t) + sizeof(dhcp_pkt_t) );\r
+               p_desc->p_buf->ip.prot.udp.hdr.length = cl_ntoh16( sizeof(udp_hdr_t) + sizeof(dhcp_pkt_t) );\r
+\r
+               /* update crc in ip header */\r
+               if( !p_port->p_adapter->params.send_chksum_offload )\r
+               {\r
+                       p_desc->p_buf->ip.hdr.chksum = 0;\r
+                       p_desc->p_buf->ip.hdr.chksum = ipchksum((unsigned short*) &p_desc->p_buf->ip.hdr, sizeof(ip_hdr_t));\r
+               }\r
                break;\r
 \r
        /* Server messages. */\r
@@ -3391,7 +3477,7 @@ __send_mgr_filter_arp(
        p_ib_arp->op = p_arp->op;\r
        p_ib_arp->src_hw.flags_qpn = p_port->ib_mgr.qpn;\r
        ib_gid_set_default( &p_ib_arp->src_hw.gid,\r
-               p_port->p_adapter->guids.port_guid );\r
+               p_port->p_adapter->guids.port_guid.guid );\r
        p_ib_arp->src_ip = p_arp->src_ip;\r
        if( cl_memcmp( &p_arp->dst_hw, &null_hw, sizeof(mac_addr_t) ) )\r
        {\r
@@ -3503,7 +3589,8 @@ __send_mgr_queue(
        if( status == NDIS_STATUS_NO_ROUTE_TO_DESTINATION &&\r
                ETH_IS_MULTICAST( p_eth_hdr->dst.addr ) )\r
        {\r
-               if( ipoib_port_join_mcast( p_port, p_eth_hdr->dst ) == IB_SUCCESS )\r
+               if( ipoib_port_join_mcast( p_port, p_eth_hdr->dst, \r
+                       IB_MC_REC_STATE_FULL_MEMBER) == IB_SUCCESS )\r
                {\r
                        IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_SEND,\r
                                ("Multicast Mac - trying to join.\n") );\r
@@ -3526,6 +3613,8 @@ __build_send_desc(
 {\r
        NDIS_STATUS                     status;\r
        int32_t                         hdr_idx;\r
+       PNDIS_PACKET_EXTENSION                          PktExt;\r
+       PNDIS_TCP_IP_CHECKSUM_PACKET_INFO       pChecksumPktInfo; //NDIS 5.1\r
 \r
        PERF_DECLARE( SendMgrFilter );\r
 \r
@@ -3560,12 +3649,29 @@ __build_send_desc(
        p_desc->wr.wr_id = (uintn_t)p_desc->p_pkt;\r
        p_desc->wr.wr_type = WR_SEND;\r
        p_desc->wr.send_opt = IB_SEND_OPT_SIGNALED;\r
+       \r
+       PktExt = NDIS_PACKET_EXTENSION_FROM_PACKET(p_desc->p_pkt);\r
+       pChecksumPktInfo = (PNDIS_TCP_IP_CHECKSUM_PACKET_INFO)&PktExt->NdisPacketInfo[TcpIpChecksumPacketInfo];\r
+       if(p_port->p_adapter->params.send_chksum_offload & \r
+               (pChecksumPktInfo->Transmit.NdisPacketChecksumV4 || pChecksumPktInfo->Transmit.NdisPacketChecksumV6))\r
+       {\r
+               // Set transimition checksum offloading \r
+               if (pChecksumPktInfo->Transmit.NdisPacketIpChecksum) \r
+               {\r
+                       p_desc->wr.send_opt |= IB_SEND_OPT_TX_IP_CSUM;\r
+               }\r
+               if(pChecksumPktInfo->Transmit.NdisPacketTcpChecksum  ) \r
+               {\r
+                       p_desc->wr.send_opt |= IB_SEND_OPT_TX_TCP_UDP_CSUM;\r
+               }\r
+       }\r
+       \r
        p_desc->wr.ds_array = p_desc->local_ds;\r
 \r
        p_desc->wr.dgrm.ud.remote_qp = p_desc->p_endpt->qpn;\r
        p_desc->wr.dgrm.ud.remote_qkey = p_port->ib_mgr.bcast_rec.qkey;\r
        p_desc->wr.dgrm.ud.h_av = p_desc->p_endpt->h_av;\r
-       p_desc->wr.dgrm.ud.pkey_index = 0;\r
+       p_desc->wr.dgrm.ud.pkey_index = p_port->pkey_index;\r
        p_desc->wr.dgrm.ud.rsvd = NULL;\r
 \r
        /* Store context in our reserved area of the packet. */\r
@@ -3627,6 +3733,27 @@ ipoib_port_send(
 \r
        IPOIB_ENTER( IPOIB_DBG_SEND );\r
 \r
+\r
+       cl_obj_lock( &p_port->obj );\r
+       if( p_port->state != IB_QPS_RTS )\r
+       {\r
+               cl_obj_unlock( &p_port->obj );\r
+               for( i = 0; i < num_packets; ++i )\r
+               {\r
+                       ipoib_inc_send_stat( p_port->p_adapter, IP_STAT_DROPPED, 0 );\r
+                       /* Complete the packet. */\r
+                       NdisMSendComplete( p_port->p_adapter->h_adapter,\r
+                               p_packet_array[i], NDIS_STATUS_ADAPTER_NOT_READY );\r
+                       \r
+               }\r
+\r
+               IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_SEND,\r
+                       ("Invalid state - Aborting.\n") );\r
+               return;\r
+       }\r
+       cl_obj_unlock( &p_port->obj );\r
+\r
+       \r
        cl_spinlock_acquire( &p_port->send_lock );\r
        for( i = 0; i < num_packets; i++ )\r
        {\r
@@ -3730,6 +3857,17 @@ ipoib_port_resume(
 \r
        IPOIB_ENTER( IPOIB_DBG_SEND );\r
 \r
+\r
+       cl_obj_lock( &p_port->obj );\r
+       if( p_port->state != IB_QPS_RTS )\r
+       {\r
+               IPOIB_PRINT_EXIT( TRACE_LEVEL_WARNING, IPOIB_DBG_SEND,\r
+                       ("Invalid state - Aborting.\n") );\r
+               cl_obj_unlock( &p_port->obj );\r
+               return;\r
+       }\r
+       cl_obj_unlock( &p_port->obj );\r
+\r
        cl_spinlock_acquire( &p_port->send_lock );\r
 \r
        for( p_item = cl_qlist_head( &p_port->send_mgr.pending_list );\r
@@ -3739,7 +3877,7 @@ ipoib_port_resume(
                /* Check the send queue and pend the request if not empty. */\r
                if( p_port->send_mgr.depth == p_port->p_adapter->params.sq_depth )\r
                {\r
-                       IPOIB_PRINT_EXIT( TRACE_LEVEL_WARNING, IPOIB_DBG_SEND,\r
+                       IPOIB_PRINT( TRACE_LEVEL_WARNING, IPOIB_DBG_SEND,\r
                                ("No available WQEs.\n") );\r
                        break;\r
                }\r
@@ -3773,12 +3911,10 @@ ipoib_port_resume(
                {\r
                        ASSERT( status == NDIS_STATUS_NO_ROUTE_TO_DESTINATION );\r
 \r
-                       if( p_eth_hdr->dst.addr[0] == 0x01 &&\r
-                               p_eth_hdr->dst.addr[1] == 0x00 &&\r
-                               p_eth_hdr->dst.addr[2] == 0x5E )\r
+                       if( ETH_IS_MULTICAST( p_eth_hdr->dst.addr ) )\r
                        {\r
-                               if( ipoib_port_join_mcast( p_port, p_eth_hdr->dst ) ==\r
-                                       IB_SUCCESS )\r
+                               if( ipoib_port_join_mcast( p_port, p_eth_hdr->dst,\r
+                                       IB_MC_REC_STATE_FULL_MEMBER) == IB_SUCCESS )\r
                                {\r
                                        IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_SEND,\r
                                                ("Multicast Mac - trying to join.\n") );\r
@@ -3886,7 +4022,7 @@ __send_cb(
                while( p_wc )\r
                {\r
                        cl_perf_start( SendComp );\r
-                       CL_ASSERT( p_wc->wc_type == IB_WC_SEND );\r
+                       CL_ASSERT( p_wc->status != IB_WCS_SUCCESS || p_wc->wc_type == IB_WC_SEND );\r
                        p_packet = (NDIS_PACKET*)(uintn_t)p_wc->wr_id;\r
                        CL_ASSERT( p_packet );\r
                        CL_ASSERT( IPOIB_PORT_FROM_PACKET( p_packet ) == p_port );\r
@@ -4048,8 +4184,10 @@ __endpt_mgr_reset_all(
        IN                              ipoib_port_t* const                     p_port )\r
 {\r
        cl_map_item_t   *p_item;\r
-       ipoib_endpt_t   *p_endpt;\r
-       cl_qlist_t              mc_list;\r
+       ipoib_endpt_t           *p_endpt;\r
+       cl_qlist_t                      mc_list;\r
+       uint32_t                        local_exist = 0;\r
+\r
 \r
        IPOIB_ENTER( IPOIB_DBG_ENDPT );\r
 \r
@@ -4071,6 +4209,7 @@ __endpt_mgr_reset_all(
                \r
                cl_qlist_insert_head(\r
                        &mc_list, &p_port->p_local_endpt->mac_item.pool_item.list_item );\r
+               local_exist = 1;\r
 \r
                p_port->p_local_endpt = NULL;\r
        }\r
@@ -4109,8 +4248,22 @@ __endpt_mgr_reset_all(
                }\r
                \r
        }\r
+\r
        cl_obj_unlock( &p_port->obj );\r
 \r
+\r
+       if(cl_qlist_count( &mc_list ) - local_exist)\r
+       {\r
+               p_port->mcast_cnt =  (uint32_t)cl_qlist_count( &mc_list ) - local_exist;\r
+       }\r
+       else\r
+       {\r
+               p_port->mcast_cnt = 0;\r
+               KeSetEvent( &p_port->leave_mcast_event, EVENT_INCREMENT, FALSE );\r
+       }       \r
+\r
+       IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_ENDPT,("p_port->mcast_cnt = %d\n", p_port->mcast_cnt - local_exist));\r
+\r
        /* Destroy all multicast endpoints now that we have released the lock. */\r
        while( cl_qlist_count( &mc_list ) )\r
        {\r
@@ -4617,7 +4770,7 @@ ipoib_port_up(
        cl_memclr( &query, sizeof(ib_query_req_t) );\r
        query.query_type = IB_QUERY_USER_DEFINED;\r
        query.p_query_input = &info;\r
-       query.port_guid = p_port->p_adapter->guids.port_guid;\r
+       query.port_guid = p_port->p_adapter->guids.port_guid.guid;\r
        query.timeout_ms = p_port->p_adapter->params.sa_timeout;\r
        query.retry_cnt = p_port->p_adapter->params.sa_retry_cnt;\r
        query.query_context = p_port;\r
@@ -4655,7 +4808,7 @@ __endpt_mgr_add_local(
 \r
        IPOIB_ENTER( IPOIB_DBG_INIT );\r
 \r
-       ib_gid_set_default( &gid, p_port->p_adapter->guids.port_guid );\r
+       ib_gid_set_default( &gid, p_port->p_adapter->guids.port_guid.guid );\r
        p_endpt = ipoib_endpt_create(\r
                &gid, p_port_info->base_lid, p_port->ib_mgr.qpn );\r
        if( !p_endpt )\r
@@ -4712,7 +4865,7 @@ __port_info_cb(
 \r
        IPOIB_ENTER( IPOIB_DBG_INIT );\r
 \r
-       p_port = (ipoib_port_t* __ptr64)p_query_rec->query_context;\r
+       p_port = (ipoib_port_t*)p_query_rec->query_context;\r
 \r
        cl_obj_lock( &p_port->obj );\r
        p_port->ib_mgr.h_query = NULL;\r
@@ -4828,10 +4981,13 @@ __port_get_bcast(
        cl_memclr( &member_rec, sizeof(ib_member_rec_t) );\r
        member_rec.mgid = bcast_mgid_template;\r
 \r
+    member_rec.mgid.raw[4] = (uint8_t) (p_port->p_adapter->guids.port_guid.pkey >> 8) ;\r
+       member_rec.mgid.raw[5] = (uint8_t) p_port->p_adapter->guids.port_guid.pkey;\r
+       member_rec.pkey = p_port->p_adapter->guids.port_guid.pkey;\r
        cl_memclr( &query, sizeof(ib_query_req_t) );\r
        query.query_type = IB_QUERY_USER_DEFINED;\r
        query.p_query_input = &info;\r
-       query.port_guid = p_port->p_adapter->guids.port_guid;\r
+       query.port_guid = p_port->p_adapter->guids.port_guid.guid;\r
        query.timeout_ms = p_port->p_adapter->params.sa_timeout;\r
        query.retry_cnt = p_port->p_adapter->params.sa_retry_cnt;\r
        query.query_context = p_port;\r
@@ -4866,7 +5022,7 @@ __bcast_get_cb(
 \r
        IPOIB_ENTER( IPOIB_DBG_INIT );\r
 \r
-       p_port = (ipoib_port_t* __ptr64)p_query_rec->query_context;\r
+       p_port = (ipoib_port_t*)p_query_rec->query_context;\r
 \r
        cl_obj_lock( &p_port->obj );\r
        p_port->ib_mgr.h_query = NULL;\r
@@ -4965,14 +5121,14 @@ __port_join_bcast(
        /* We specify our port GID for the join operation. */\r
        mcast_req.member_rec.port_gid.unicast.prefix = IB_DEFAULT_SUBNET_PREFIX;\r
        mcast_req.member_rec.port_gid.unicast.interface_id =\r
-               p_port->p_adapter->guids.port_guid;\r
+               p_port->p_adapter->guids.port_guid.guid;\r
 \r
        mcast_req.mcast_context = p_port;\r
        mcast_req.pfn_mcast_cb = __bcast_cb;\r
        mcast_req.timeout_ms = p_port->p_adapter->params.sa_timeout;\r
        mcast_req.retry_cnt = p_port->p_adapter->params.sa_retry_cnt;\r
-       mcast_req.port_guid = p_port->p_adapter->guids.port_guid;\r
-       mcast_req.pkey_index = 0;\r
+       mcast_req.port_guid = p_port->p_adapter->guids.port_guid.guid;\r
+       mcast_req.pkey_index = p_port->pkey_index;\r
 \r
        if( ib_member_get_state( mcast_req.member_rec.scope_state ) !=\r
                IB_MC_REC_STATE_FULL_MEMBER )\r
@@ -5019,8 +5175,10 @@ __port_create_bcast(
         * We specify the MGID since we don't want the SA to generate it for us.\r
         */\r
        mcast_req.member_rec.mgid = bcast_mgid_template;\r
+       mcast_req.member_rec.mgid.raw[4] = (uint8_t) (p_port->p_adapter->guids.port_guid.pkey >> 8); \r
+       mcast_req.member_rec.mgid.raw[5] = (uint8_t) p_port->p_adapter->guids.port_guid.pkey;\r
        ib_gid_set_default( &mcast_req.member_rec.port_gid,\r
-               p_port->p_adapter->guids.port_guid );\r
+               p_port->p_adapter->guids.port_guid.guid );\r
        /*\r
         * IPOIB spec requires that the QKEY have the MSb set so that the QKEY\r
         * from the QP is used rather than the QKEY in the send WR.\r
@@ -5028,9 +5186,9 @@ __port_create_bcast(
        mcast_req.member_rec.qkey =\r
                (uint32_t)(uintn_t)p_port | IB_QP_PRIVILEGED_Q_KEY;\r
        mcast_req.member_rec.mtu =\r
-               (IB_PATH_SELECTOR_EXACTLY << 6) | IB_MTU_2048;\r
+               (IB_PATH_SELECTOR_EXACTLY << 6) | IB_MTU_LEN_2048;\r
 \r
-       mcast_req.member_rec.pkey = IB_DEFAULT_PKEY;\r
+       mcast_req.member_rec.pkey = p_port->p_adapter->guids.port_guid.pkey;\r
 \r
        mcast_req.member_rec.sl_flow_hop = ib_member_set_sl_flow_hop( 0, 0, 0 );\r
        mcast_req.member_rec.scope_state =\r
@@ -5040,8 +5198,8 @@ __port_create_bcast(
        mcast_req.pfn_mcast_cb = __bcast_cb;\r
        mcast_req.timeout_ms = p_port->p_adapter->params.sa_timeout;\r
        mcast_req.retry_cnt = p_port->p_adapter->params.sa_retry_cnt;\r
-       mcast_req.port_guid = p_port->p_adapter->guids.port_guid;\r
-       mcast_req.pkey_index = 0;\r
+       mcast_req.port_guid = p_port->p_adapter->guids.port_guid.guid;\r
+       mcast_req.pkey_index = p_port->pkey_index;\r
 \r
        /* reference the object for the multicast join request. */\r
        ipoib_port_ref( p_port, ref_join_bcast );\r
@@ -5110,9 +5268,16 @@ ipoib_port_down(
                return;\r
        }\r
 \r
+       KeResetEvent(&p_port->leave_mcast_event);\r
+\r
        /* Reset all endpoints so we don't flush our ARP cache. */\r
        __endpt_mgr_reset_all( p_port );\r
 \r
+       KeWaitForSingleObject(\r
+               &p_port->leave_mcast_event, Executive, KernelMode, FALSE, NULL );\r
+\r
+       __pending_list_destroy(p_port);\r
+       \r
        cl_obj_lock( &p_port->p_adapter->obj );\r
        ipoib_dereg_addrs( p_port->p_adapter );\r
        cl_obj_unlock( &p_port->p_adapter->obj );\r
@@ -5130,15 +5295,18 @@ __bcast_cb(
 \r
        IPOIB_ENTER( IPOIB_DBG_INIT );\r
 \r
-       p_port = (ipoib_port_t* __ptr64)p_mcast_rec->mcast_context;\r
+       p_port = (ipoib_port_t*)p_mcast_rec->mcast_context;\r
 \r
        cl_obj_lock( &p_port->obj );\r
        if( p_port->state != IB_QPS_INIT )\r
        {\r
                cl_obj_unlock( &p_port->obj );\r
                if( p_mcast_rec->status == IB_SUCCESS )\r
-                       p_port->p_adapter->p_ifc->leave_mcast( p_mcast_rec->h_mcast, NULL );\r
 \r
+               {\r
+                       ipoib_port_ref(p_port, ref_leave_mcast);\r
+                       p_port->p_adapter->p_ifc->leave_mcast( p_mcast_rec->h_mcast, __leave_error_mcast_cb );\r
+               }\r
                KeSetEvent( &p_port->sa_event, EVENT_INCREMENT, FALSE );\r
                ipoib_port_deref( p_port, ref_bcast_inv_state );\r
                IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT,\r
@@ -5198,7 +5366,8 @@ __bcast_cb(
                IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,\r
                        ("__endpt_mgr_add_bcast returned %s\n",\r
                        p_port->p_adapter->p_ifc->get_err_str( status )) );\r
-               status = p_port->p_adapter->p_ifc->leave_mcast( p_mcast_rec->h_mcast, NULL );\r
+               ipoib_port_ref(p_port, ref_leave_mcast);\r
+               status = p_port->p_adapter->p_ifc->leave_mcast( p_mcast_rec->h_mcast, __leave_error_mcast_cb );\r
                CL_ASSERT( status == IB_SUCCESS );\r
                goto err;\r
        }\r
@@ -5246,7 +5415,7 @@ __qp_event(
 {\r
        UNUSED_PARAM( p_event_rec );\r
        CL_ASSERT( p_event_rec->context );\r
-       ((ipoib_port_t* __ptr64)p_event_rec->context)->p_adapter->hung = TRUE;\r
+       ((ipoib_port_t*)p_event_rec->context)->p_adapter->hung = TRUE;\r
 }\r
 \r
 \r
@@ -5256,7 +5425,7 @@ __cq_event(
 {\r
        UNUSED_PARAM( p_event_rec );\r
        CL_ASSERT( p_event_rec->context );\r
-       ((ipoib_port_t* __ptr64)p_event_rec->context)->p_adapter->hung = TRUE;\r
+       ((ipoib_port_t*)p_event_rec->context)->p_adapter->hung = TRUE;\r
 }\r
 \r
 \r
@@ -5285,9 +5454,9 @@ __ib_mgr_activate(
        }\r
 \r
        /* Move the QP to RTS. */\r
-       dgrm_info.port_guid = p_port->p_adapter->guids.port_guid;\r
+       dgrm_info.port_guid = p_port->p_adapter->guids.port_guid.guid;\r
        dgrm_info.qkey = p_port->ib_mgr.bcast_rec.qkey;\r
-       dgrm_info.pkey_index = 0;\r
+       dgrm_info.pkey_index = p_port->pkey_index;\r
        status = p_port->p_adapter->p_ifc->init_dgrm_svc( p_port->ib_mgr.h_qp, &dgrm_info );\r
        if( status != IB_SUCCESS )\r
        {\r
@@ -5324,7 +5493,8 @@ __ib_mgr_activate(
 ib_api_status_t\r
 ipoib_port_join_mcast(\r
        IN                              ipoib_port_t* const                     p_port,\r
-       IN              const   mac_addr_t                                      mac )\r
+       IN              const   mac_addr_t                              mac,\r
+       IN              const   uint8_t                                 state)\r
 {\r
        ib_api_status_t         status;\r
        ib_mcast_req_t          mcast_req;\r
@@ -5357,8 +5527,7 @@ ipoib_port_join_mcast(
        mcast_req.member_rec = p_port->ib_mgr.bcast_rec;\r
        /* Clear fields that aren't specified in the join */\r
        mcast_req.member_rec.mlid = 0;\r
-       mcast_req.member_rec.mtu = 0;\r
-       mcast_req.member_rec.rate = 0;  \r
+       ib_member_set_state( &mcast_req.member_rec.scope_state,state);\r
 \r
        if( mac.addr[0] == 1 && mac.addr[1] == 0 && mac.addr[2] == 0x5E )\r
        {\r
@@ -5387,9 +5556,9 @@ ipoib_port_join_mcast(
        mcast_req.pfn_mcast_cb = __mcast_cb;\r
        mcast_req.timeout_ms = p_port->p_adapter->params.sa_timeout;\r
        mcast_req.retry_cnt = p_port->p_adapter->params.sa_retry_cnt;\r
-       mcast_req.port_guid = p_port->p_adapter->guids.port_guid;\r
-       mcast_req.pkey_index = 0;\r
-\r
+       mcast_req.port_guid = p_port->p_adapter->guids.port_guid.guid;\r
+       mcast_req.pkey_index = p_port->pkey_index;\r
+       mcast_req.member_rec.pkey = p_port->p_adapter->guids.port_guid.pkey;\r
        /*\r
         * Create the endpoint and insert it in the port.  Since we don't wait for\r
         * the mcast SA operations to complete before returning from the multicast\r
@@ -5445,15 +5614,18 @@ __mcast_cb(
 \r
        IPOIB_ENTER( IPOIB_DBG_MCAST );\r
 \r
-       p_port = (ipoib_port_t* __ptr64)p_mcast_rec->mcast_context;\r
+       p_port = (ipoib_port_t*)p_mcast_rec->mcast_context;\r
 \r
        cl_obj_lock( &p_port->obj );\r
        if( p_port->state != IB_QPS_RTS )\r
        {\r
                cl_obj_unlock( &p_port->obj );\r
                if( p_mcast_rec->status == IB_SUCCESS )\r
-                       p_port->p_adapter->p_ifc->leave_mcast( p_mcast_rec->h_mcast, NULL );\r
 \r
+               {\r
+                       ipoib_port_ref(p_port, ref_leave_mcast);\r
+                       p_port->p_adapter->p_ifc->leave_mcast( p_mcast_rec->h_mcast, __leave_error_mcast_cb );\r
+               }\r
                ipoib_port_deref( p_port, ref_mcast_inv_state );\r
                IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT,\r
                        ("Invalid state - Aborting.\n") );\r
@@ -5486,7 +5658,9 @@ __mcast_cb(
                cl_obj_unlock( &p_port->obj );\r
                IPOIB_PRINT(TRACE_LEVEL_WARNING, IPOIB_DBG_ERROR,\r
                        ("Failed to find endpoint for update.\n") );\r
-               p_port->p_adapter->p_ifc->leave_mcast( p_mcast_rec->h_mcast, NULL );\r
+\r
+               ipoib_port_ref(p_port, ref_leave_mcast);\r
+               p_port->p_adapter->p_ifc->leave_mcast( p_mcast_rec->h_mcast, __leave_error_mcast_cb );\r
                ipoib_port_deref( p_port, ref_mcast_no_endpt );\r
                IPOIB_EXIT( IPOIB_DBG_MCAST );\r
                return;\r
@@ -5530,3 +5704,53 @@ __mcast_cb(
 \r
        IPOIB_EXIT( IPOIB_DBG_MCAST );\r
 }\r
+\r
+\r
+void\r
+ipoib_leave_mcast_cb(\r
+       IN                              void                            *context )\r
+{\r
+       ipoib_port_t            *p_port;\r
+\r
+       IPOIB_ENTER( IPOIB_DBG_MCAST );\r
+\r
+       p_port = (ipoib_port_t*)context;\r
+\r
+       IPOIB_PRINT( TRACE_LEVEL_VERBOSE, IPOIB_DBG_MCAST,("p_port->mcast_cnt = %d\n", p_port->mcast_cnt));\r
+       \r
+       ipoib_port_deref( p_port, ref_leave_mcast);\r
+       cl_atomic_dec( &p_port->mcast_cnt);\r
+       \r
+       if(0 == p_port->mcast_cnt)\r
+       {\r
+               KeSetEvent( &p_port->leave_mcast_event, EVENT_INCREMENT, FALSE );\r
+       }\r
+       \r
+       IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,\r
+                       ("Leave mcast callback deref ipoib_port \n") );\r
+       \r
+       IPOIB_EXIT( IPOIB_DBG_MCAST );\r
+}\r
+\r
+\r
+\r
+void\r
+__leave_error_mcast_cb(\r
+       IN                              void                            *context )\r
+{\r
+       ipoib_port_t            *p_port;\r
+\r
+       IPOIB_ENTER( IPOIB_DBG_MCAST );\r
+\r
+       p_port = (ipoib_port_t*)context;\r
+\r
+       ipoib_port_deref( p_port, ref_leave_mcast);\r
+       IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,\r
+                       ("Leave mcast callback deref ipoib_port \n") );\r
+       \r
+       IPOIB_EXIT( IPOIB_DBG_MCAST );\r
+}\r
+\r
+\r
+\r
+\r