[infiniband] Provide a general mechanism for multicast group joins
authorMichael Brown <mcb30@etherboot.org>
Tue, 7 Jul 2009 15:07:31 +0000 (16:07 +0100)
committerMichael Brown <mcb30@etherboot.org>
Fri, 17 Jul 2009 22:06:34 +0000 (23:06 +0100)
Generalise out the multicast group membership record code from IPoIB.

src/drivers/net/ipoib.c
src/include/gpxe/errfile.h
src/include/gpxe/ib_mad.h
src/include/gpxe/ib_mcast.h [new file with mode: 0644]
src/net/infiniband.c
src/net/infiniband/ib_mcast.c [new file with mode: 0644]

index 4b9f1e0..36df342 100644 (file)
@@ -30,6 +30,7 @@ FILE_LICENCE ( GPL2_OR_LATER );
 #include <gpxe/infiniband.h>
 #include <gpxe/ib_qset.h>
 #include <gpxe/ib_pathrec.h>
+#include <gpxe/ib_mcast.h>
 #include <gpxe/ipoib.h>
 
 /** @file
@@ -67,20 +68,14 @@ struct ipoib_device {
        struct ib_queue_set meta;
        /** Broadcast MAC */
        struct ipoib_mac broadcast;
-       /** Attached to multicast group
+       /** Joined to multicast group
         *
-        * This flag indicates whether or not we have attached our
-        * data queue pair to the broadcast multicast GID.
+        * This flag indicates whether or not we have initiated the
+        * join to the IPv4 multicast group.
         */
-       int broadcast_attached;
+       int broadcast_joined;
 };
 
-/** TID half used to identify multicast member record replies */
-#define IPOIB_TID_MC_MEMBER_REC 0x22222222UL
-
-/** IPoIB metadata TID */
-static uint32_t ipoib_meta_tid = 0;
-
 /** Broadcast IPoIB address */
 static struct ipoib_mac ipoib_broadcast = {
        .qpn = htonl ( IB_QPN_BROADCAST ),
@@ -332,67 +327,6 @@ struct net_device * alloc_ipoibdev ( size_t priv_size ) {
  ****************************************************************************
  */
 
-/**
- * Transmit multicast group membership request
- *
- * @v ipoib            IPoIB device
- * @v gid              Multicast GID
- * @v join             Join (rather than leave) group
- * @ret rc             Return status code
- */
-static int ipoib_mc_member_record ( struct ipoib_device *ipoib,
-                                   struct ib_gid *gid, int join ) {
-       struct ib_device *ibdev = ipoib->ibdev;
-       struct io_buffer *iobuf;
-       struct ib_mad_sa *sa;
-       struct ib_address_vector av;
-       int rc;
-
-       /* Allocate I/O buffer */
-       iobuf = alloc_iob ( sizeof ( *sa ) );
-       if ( ! iobuf )
-               return -ENOMEM;
-       iob_put ( iobuf, sizeof ( *sa ) );
-       sa = iobuf->data;
-       memset ( sa, 0, sizeof ( *sa ) );
-
-       /* Construct path record request */
-       sa->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
-       sa->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
-       sa->mad_hdr.class_version = 2;
-       sa->mad_hdr.method =
-               ( join ? IB_MGMT_METHOD_SET : IB_MGMT_METHOD_DELETE );
-       sa->mad_hdr.attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC );
-       sa->mad_hdr.tid[0] = IPOIB_TID_MC_MEMBER_REC;
-       sa->mad_hdr.tid[1] = ipoib_meta_tid++;
-       sa->sa_hdr.comp_mask[1] =
-               htonl ( IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
-                       IB_SA_MCMEMBER_REC_JOIN_STATE );
-       sa->sa_data.mc_member_record.scope__join_state = 1;
-       memcpy ( &sa->sa_data.mc_member_record.mgid, gid,
-                sizeof ( sa->sa_data.mc_member_record.mgid ) );
-       memcpy ( &sa->sa_data.mc_member_record.port_gid, &ibdev->gid,
-                sizeof ( sa->sa_data.mc_member_record.port_gid ) );
-
-       /* Construct address vector */
-       memset ( &av, 0, sizeof ( av ) );
-       av.lid = ibdev->sm_lid;
-       av.sl = ibdev->sm_sl;
-       av.qpn = IB_QPN_GMA;
-       av.qkey = IB_QKEY_GMA;
-
-       /* Post send request */
-       if ( ( rc = ib_post_send ( ibdev, ipoib->meta.qp, &av,
-                                  iobuf ) ) != 0 ) {
-               DBGC ( ipoib, "IPoIB %p could not send get path record: %s\n",
-                      ipoib, strerror ( rc ) );
-               free_iob ( iobuf );
-               return rc;
-       }
-
-       return 0;
-}
-
 /**
  * Transmit packet via IPoIB network device
  *
@@ -529,33 +463,6 @@ static void ipoib_meta_complete_send ( struct ib_device *ibdev __unused,
        free_iob ( iobuf );
 }
 
-/**
- * Handle received IPoIB multicast membership record
- *
- * @v ipoib            IPoIB device
- * @v mc_member_record Multicast membership record
- */
-static void ipoib_recv_mc_member_record ( struct ipoib_device *ipoib,
-                              struct ib_mc_member_record *mc_member_record ) {
-       unsigned long data_qkey;
-       int joined;
-       int rc;
-
-       /* Record parameters */
-       joined = ( mc_member_record->scope__join_state & 0x0f );
-       data_qkey = ntohl ( mc_member_record->qkey );
-       DBGC ( ipoib, "IPoIB %p %s broadcast group: qkey %lx\n",
-              ipoib, ( joined ? "joined" : "left" ), data_qkey );
-
-       /* Update data queue pair qkey */
-       if ( ( rc = ib_modify_qp ( ipoib->ibdev, ipoib->data.qp,
-                                  IB_MODIFY_QKEY, data_qkey ) ) != 0 ){
-               DBGC ( ipoib, "IPoIB %p could not update data qkey: %s\n",
-                      ipoib, strerror ( rc ) );
-               return;
-       }
-}
-
 /**
  * Handle IPoIB metadata receive completion
  *
@@ -594,10 +501,6 @@ ipoib_meta_complete_recv ( struct ib_device *ibdev __unused,
        }
 
        switch ( sa->mad_hdr.tid[0] ) {
-       case IPOIB_TID_MC_MEMBER_REC:
-               ipoib_recv_mc_member_record ( ipoib,
-                                             &sa->sa_data.mc_member_record );
-               break;
        default:
                DBGC ( ipoib, "IPoIB %p unwanted response:\n",
                       ipoib );
@@ -647,31 +550,13 @@ static void ipoib_irq ( struct net_device *netdev __unused,
 static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
        int rc;
 
-       /* Sanity check */
-       if ( ! ipoib->data.qp )
-               return 0;
-
-       /* Attach data queue to broadcast multicast GID */
-       assert ( ipoib->broadcast_attached == 0 );
-       if ( ( rc = ib_mcast_attach ( ipoib->ibdev, ipoib->data.qp,
-                                     &ipoib->broadcast.gid ) ) != 0 ){
-               DBGC ( ipoib, "IPoIB %p could not attach to broadcast GID: "
-                      "%s\n", ipoib, strerror ( rc ) );
-               return rc;
-       }
-       ipoib->broadcast_attached = 1;
-
-       /* Initiate broadcast group join */
-       if ( ( rc = ipoib_mc_member_record ( ipoib, &ipoib->broadcast.gid,
-                                            1 ) ) != 0 ) {
-               DBGC ( ipoib, "IPoIB %p could not send broadcast join: %s\n",
+       if ( ( rc = ib_mcast_join ( ipoib->ibdev, ipoib->data.qp,
+                                   &ipoib->broadcast.gid ) ) != 0 ) {
+               DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n",
                       ipoib, strerror ( rc ) );
                return rc;
        }
-
-       /* We will set link up on the network device when we receive
-        * the broadcast join response.
-        */
+       ipoib->broadcast_joined = 1;
 
        return 0;
 }
@@ -684,11 +569,10 @@ static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
 static void ipoib_leave_broadcast_group ( struct ipoib_device *ipoib ) {
 
        /* Detach data queue from broadcast multicast GID */
-       if ( ipoib->broadcast_attached ) {
-               assert ( ipoib->data.qp != NULL );
-               ib_mcast_detach ( ipoib->ibdev, ipoib->data.qp,
-                                 &ipoib->broadcast.gid );
-               ipoib->broadcast_attached = 0;
+       if ( ipoib->broadcast_joined ) {
+               ib_mcast_leave ( ipoib->ibdev, ipoib->data.qp,
+                                &ipoib->broadcast.gid );
+               ipoib->broadcast_joined = 0;
        }
 }
 
index 0eac0a8..5e7fa09 100644 (file)
@@ -146,6 +146,7 @@ FILE_LICENCE ( GPL2_OR_LATER );
 #define ERRFILE_ib_qset                        ( ERRFILE_NET | 0x001a0000 )
 #define ERRFILE_ib_gma                 ( ERRFILE_NET | 0x001b0000 )
 #define ERRFILE_ib_pathrec             ( ERRFILE_NET | 0x001c0000 )
+#define ERRFILE_ib_mcast               ( ERRFILE_NET | 0x001d0000 )
 
 #define ERRFILE_image                ( ERRFILE_IMAGE | 0x00000000 )
 #define ERRFILE_elf                  ( ERRFILE_IMAGE | 0x00010000 )
index eaea12b..d4582c3 100644 (file)
@@ -203,6 +203,8 @@ struct ib_smp_class_specific {
 
 #define IB_SA_CLASS_VERSION                    2
 
+#define IB_SA_METHOD_DELETE_RESP               0x95
+
 struct ib_rmpp_hdr {
        uint32_t raw[3];
 } __attribute__ (( packed ));
diff --git a/src/include/gpxe/ib_mcast.h b/src/include/gpxe/ib_mcast.h
new file mode 100644 (file)
index 0000000..2ca3382
--- /dev/null
@@ -0,0 +1,19 @@
+#ifndef _GPXE_IB_MCAST_H
+#define _GPXE_IB_MCAST_H
+
+/** @file
+ *
+ * Infiniband multicast groups
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <gpxe/infiniband.h>
+
+extern int ib_mcast_join ( struct ib_device *ibdev, struct ib_queue_pair *qp,
+                          struct ib_gid *gid );
+extern void ib_mcast_leave ( struct ib_device *ibdev, struct ib_queue_pair *qp,
+                            struct ib_gid *gid );
+
+#endif /* _GPXE_IB_MCAST_H */
index 369d490..e71b3bc 100644 (file)
@@ -554,6 +554,10 @@ void ib_close ( struct ib_device *ibdev ) {
  * @v qp               Queue pair
  * @v gid              Multicast GID
  * @ret rc             Return status code
+ *
+ * Note that this function handles only the local device's attachment
+ * to the multicast GID; it does not issue the relevant MADs to join
+ * the multicast group on the subnet.
  */
 int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
                      struct ib_gid *gid ) {
diff --git a/src/net/infiniband/ib_mcast.c b/src/net/infiniband/ib_mcast.c
new file mode 100644 (file)
index 0000000..358ee0d
--- /dev/null
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <string.h>
+#include <byteswap.h>
+#include <errno.h>
+#include <gpxe/list.h>
+#include <gpxe/infiniband.h>
+#include <gpxe/ib_gma.h>
+#include <gpxe/ib_mcast.h>
+
+/** @file
+ *
+ * Infiniband multicast groups
+ *
+ */
+
+/**
+ * Transmit multicast group membership request
+ *
+ * @v ibdev            Infiniband device
+ * @v gid              Multicast GID
+ * @v join             Join (rather than leave) group
+ * @ret rc             Return status code
+ */
+static int ib_mc_member_request ( struct ib_device *ibdev, struct ib_gid *gid,
+                                 int join ) {
+       union ib_mad mad;
+       struct ib_mad_sa *sa = &mad.sa;
+       int rc;
+
+       /* Construct multicast membership record request */
+       memset ( sa, 0, sizeof ( *sa ) );
+       sa->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
+       sa->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
+       sa->mad_hdr.class_version = IB_SA_CLASS_VERSION;
+       sa->mad_hdr.method =
+               ( join ? IB_MGMT_METHOD_SET : IB_MGMT_METHOD_DELETE );
+       sa->mad_hdr.attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC );
+       sa->sa_hdr.comp_mask[1] =
+               htonl ( IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
+                       IB_SA_MCMEMBER_REC_JOIN_STATE );
+       sa->sa_data.mc_member_record.scope__join_state = 1;
+       memcpy ( &sa->sa_data.mc_member_record.mgid, gid,
+                sizeof ( sa->sa_data.mc_member_record.mgid ) );
+       memcpy ( &sa->sa_data.mc_member_record.port_gid, &ibdev->gid,
+                sizeof ( sa->sa_data.mc_member_record.port_gid ) );
+
+       /* Issue multicast membership record request */
+       if ( ( rc = ib_gma_request ( &ibdev->gma, &mad, NULL,
+                                    join ) ) != 0 ) {
+               DBGC ( ibdev, "IBDEV %p could not join group: %s\n",
+                      ibdev, strerror ( rc ) );
+               return rc;
+       }
+
+       return 0;
+}
+
+/**
+ * Join multicast group
+ *
+ * @v ibdev            Infiniband device
+ * @v qp               Queue pair
+ * @v gid              Multicast GID
+ * @ret rc             Return status code
+ */
+int ib_mcast_join ( struct ib_device *ibdev, struct ib_queue_pair *qp,
+                   struct ib_gid *gid ) {
+       int rc;
+
+       DBGC ( ibdev, "IBDEV %p QPN %lx joining %08x:%08x:%08x:%08x\n",
+              ibdev, qp->qpn, ntohl ( gid->u.dwords[0] ),
+              ntohl ( gid->u.dwords[1] ), ntohl ( gid->u.dwords[2] ),
+              ntohl ( gid->u.dwords[3] ) );
+
+       /* Attach queue pair to multicast GID */
+       if ( ( rc = ib_mcast_attach ( ibdev, qp, gid ) ) != 0 ) {
+               DBGC ( ibdev, "IBDEV %p could not attach: %s\n",
+                      ibdev, strerror ( rc ) );
+               goto err_mcast_attach;
+       }
+
+       /* Initiate multicast membership join */
+       if ( ( rc = ib_mc_member_request ( ibdev, gid, 1 ) ) != 0 )
+               goto err_mc_member_record;
+
+       return 0;
+
+ err_mc_member_record:
+       ib_mcast_detach ( ibdev, qp, gid );
+ err_mcast_attach:
+       return rc;
+}
+
+/**
+ * Leave multicast group
+ *
+ * @v ibdev            Infiniband device
+ * @v qp               Queue pair
+ * @v gid              Multicast GID
+ */
+void ib_mcast_leave ( struct ib_device *ibdev, struct ib_queue_pair *qp,
+                     struct ib_gid *gid ) {
+
+       DBGC ( ibdev, "IBDEV %p QPN %lx leaving %08x:%08x:%08x:%08x\n",
+              ibdev, qp->qpn, ntohl ( gid->u.dwords[0] ),
+              ntohl ( gid->u.dwords[1] ), ntohl ( gid->u.dwords[2] ),
+              ntohl ( gid->u.dwords[3] ) );
+
+       /* Detach queue pair from multicast GID */
+       ib_mcast_detach ( ibdev, qp, gid );
+
+       /* Initiate multicast membership leave */
+       ib_mc_member_request ( ibdev, gid, 0 );
+}
+
+/**
+ * Handle multicast membership record join response
+ *
+ * @v ibdev            Infiniband device
+ * @v mad              MAD
+ * @ret rc             Return status code
+ */
+static int ib_handle_mc_member_join ( struct ib_device *ibdev,
+                                     union ib_mad *mad ) {
+       struct ib_mc_member_record *mc_member_record =
+               &mad->sa.sa_data.mc_member_record;
+       struct ib_queue_pair *qp;
+       struct ib_gid *gid;
+       unsigned long qkey;
+       int rc;
+
+       /* Ignore if not a success */
+       if ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) ) {
+               DBGC ( ibdev, "IBDEV %p join failed with status %04x\n",
+                      ibdev, ntohs ( mad->hdr.status ) );
+               return -EINVAL;
+       }
+
+       /* Extract MAD parameters */
+       gid = &mc_member_record->mgid;
+       qkey = ntohl ( mc_member_record->qkey );
+
+       /* Locate matching queue pair */
+       qp = ib_find_qp_mgid ( ibdev, gid );
+       if ( ! qp ) {
+               DBGC ( ibdev, "IBDEV %p has no QP to join "
+                      "%08x:%08x:%08x:%08x\n", ibdev,
+                      ntohl ( gid->u.dwords[0] ),
+                      ntohl ( gid->u.dwords[1] ),
+                      ntohl ( gid->u.dwords[2] ),
+                      ntohl ( gid->u.dwords[3] ) );
+               return -ENOENT;
+       }
+       DBGC ( ibdev, "IBDEV %p QPN %lx joined %08x:%08x:%08x:%08x qkey "
+              "%lx\n", ibdev, qp->qpn,
+              ntohl ( gid->u.dwords[0] ), ntohl ( gid->u.dwords[1] ),
+              ntohl ( gid->u.dwords[2] ), ntohl ( gid->u.dwords[3] ),
+              qkey );
+
+       /* Set queue key */
+       if ( ( rc = ib_modify_qp ( ibdev, qp, IB_MODIFY_QKEY, qkey ) ) != 0 ) {
+               DBGC ( ibdev, "IBDEV %p QPN %lx could not modify qkey: %s\n",
+                      ibdev, qp->qpn, strerror ( rc ) );
+               return rc;
+       }
+
+       return 0;
+}
+
+/**
+ * Handle multicast membership record leave response
+ *
+ * @v ibdev            Infiniband device
+ * @v mad              MAD
+ * @ret rc             Return status code
+ */
+static int ib_handle_mc_member_leave ( struct ib_device *ibdev,
+                                      union ib_mad *mad ) {
+       struct ib_mc_member_record *mc_member_record =
+               &mad->sa.sa_data.mc_member_record;
+       struct ib_gid *gid;
+
+       /* Ignore if not a success */
+       if ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) ) {
+               DBGC ( ibdev, "IBDEV %p leave failed with status %04x\n",
+                      ibdev, ntohs ( mad->hdr.status ) );
+               return -EINVAL;
+       }
+
+       /* Extract MAD parameters */
+       gid = &mc_member_record->mgid;
+       DBGC ( ibdev, "IBDEV %p left %08x:%08x:%08x:%08x\n", ibdev,
+              ntohl ( gid->u.dwords[0] ), ntohl ( gid->u.dwords[1] ),
+              ntohl ( gid->u.dwords[2] ), ntohl ( gid->u.dwords[3] ) );
+
+       return 0;
+}
+
+/** Multicast membership record response handler */
+struct ib_mad_handler ib_mc_member_record_handlers[] __ib_mad_handler = {
+       {
+               .mgmt_class = IB_MGMT_CLASS_SUBN_ADM,
+               .class_version = IB_SA_CLASS_VERSION,
+               .method = IB_MGMT_METHOD_GET_RESP,
+               .attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC ),
+               .handle = ib_handle_mc_member_join,
+       },
+       {
+               .mgmt_class = IB_MGMT_CLASS_SUBN_ADM,
+               .class_version = IB_SA_CLASS_VERSION,
+               .method = IB_SA_METHOD_DELETE_RESP,
+               .attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC ),
+               .handle = ib_handle_mc_member_leave,
+       },
+};