[infiniband] Provide a general mechanism for path record lookups
authorMichael Brown <mcb30@etherboot.org>
Tue, 7 Jul 2009 01:01:21 +0000 (02:01 +0100)
committerMichael Brown <mcb30@etherboot.org>
Fri, 17 Jul 2009 22:06:33 +0000 (23:06 +0100)
Generalise out the path record lookup code from IPoIB.

src/drivers/net/ipoib.c
src/include/gpxe/errfile.h
src/include/gpxe/ib_pathrec.h [new file with mode: 0644]
src/net/infiniband/ib_pathrec.c [new file with mode: 0644]

index d6815ec..dd04a43 100644 (file)
@@ -29,6 +29,7 @@ FILE_LICENCE ( GPL2_OR_LATER );
 #include <gpxe/netdevice.h>
 #include <gpxe/infiniband.h>
 #include <gpxe/ib_qset.h>
+#include <gpxe/ib_pathrec.h>
 #include <gpxe/ipoib.h>
 
 /** @file
@@ -78,9 +79,6 @@ struct ipoib_device {
        int broadcast_attached;
 };
 
-/** TID half used to identify get path record replies */
-#define IPOIB_TID_GET_PATH_REC 0x11111111UL
-
 /** TID half used to identify multicast member record replies */
 #define IPOIB_TID_MC_MEMBER_REC 0x22222222UL
 
@@ -118,12 +116,6 @@ struct ipoib_peer {
        uint8_t key;
        /** MAC address */
        struct ipoib_mac mac;
-       /** LID */
-       unsigned int lid;
-       /** Service level */
-       unsigned int sl;
-       /** Rate */
-       unsigned int rate;
 };
 
 /** Number of IPoIB peer cache entries
@@ -352,63 +344,6 @@ struct ll_protocol ipoib_protocol __ll_protocol = {
  ****************************************************************************
  */
 
-/**
- * Transmit path record request
- *
- * @v ipoib            IPoIB device
- * @v gid              Destination GID
- * @ret rc             Return status code
- */
-static int ipoib_get_path_record ( struct ipoib_device *ipoib,
-                                  struct ib_gid *gid ) {
-       struct ib_device *ibdev = ipoib->ibdev;
-       struct io_buffer *iobuf;
-       struct ib_mad_sa *sa;
-       struct ib_address_vector av;
-       int rc;
-
-       /* Allocate I/O buffer */
-       iobuf = alloc_iob ( sizeof ( *sa ) );
-       if ( ! iobuf )
-               return -ENOMEM;
-       iob_put ( iobuf, sizeof ( *sa ) );
-       sa = iobuf->data;
-       memset ( sa, 0, sizeof ( *sa ) );
-
-       /* Construct path record request */
-       sa->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
-       sa->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
-       sa->mad_hdr.class_version = 2;
-       sa->mad_hdr.method = IB_MGMT_METHOD_GET;
-       sa->mad_hdr.attr_id = htons ( IB_SA_ATTR_PATH_REC );
-       sa->mad_hdr.tid[0] = IPOIB_TID_GET_PATH_REC;
-       sa->mad_hdr.tid[1] = ipoib_meta_tid++;
-       sa->sa_hdr.comp_mask[1] =
-               htonl ( IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID );
-       memcpy ( &sa->sa_data.path_record.dgid, gid,
-                sizeof ( sa->sa_data.path_record.dgid ) );
-       memcpy ( &sa->sa_data.path_record.sgid, &ibdev->gid,
-                sizeof ( sa->sa_data.path_record.sgid ) );
-
-       /* Construct address vector */
-       memset ( &av, 0, sizeof ( av ) );
-       av.lid = ibdev->sm_lid;
-       av.sl = ibdev->sm_sl;
-       av.qpn = IB_QPN_GMA;
-       av.qkey = IB_QKEY_GMA;
-
-       /* Post send request */
-       if ( ( rc = ib_post_send ( ibdev, ipoib->meta.qp, &av,
-                                  iobuf ) ) != 0 ) {
-               DBGC ( ipoib, "IPoIB %p could not send get path record: %s\n",
-                      ipoib, strerror ( rc ) );
-               free_iob ( iobuf );
-               return rc;
-       }
-
-       return 0;
-}
-
 /**
  * Transmit multicast group membership request
  *
@@ -484,7 +419,7 @@ static int ipoib_transmit ( struct net_device *netdev,
        struct ipoib_hdr *ipoib_hdr;
        struct ipoib_peer *dest;
        struct ib_address_vector av;
-       struct ib_gid *gid;
+       int rc;
 
        /* Sanity check */
        if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
@@ -513,21 +448,16 @@ static int ipoib_transmit ( struct net_device *netdev,
                /* Broadcast */
                av.qpn = IB_QPN_BROADCAST;
                av.lid = ipoib->broadcast_lid;
-               gid = &ipoib->broadcast_gid;
+               memcpy ( &av.gid, &ipoib->broadcast_gid, sizeof ( av.gid ) );
        } else {
                /* Unicast */
-               if ( ! dest->lid ) {
-                       /* No LID yet - get path record to fetch LID */
-                       ipoib_get_path_record ( ipoib, &dest->mac.gid );
-                       return -ENOENT;
-               }
                av.qpn = ntohl ( dest->mac.qpn );
-               av.lid = dest->lid;
-               av.rate = dest->rate;
-               av.sl = dest->sl;
-               gid = &dest->mac.gid;
+               memcpy ( &av.gid, &dest->mac.gid, sizeof ( av.gid ) );
+               if ( ( rc = ib_resolve_path ( ibdev, &av ) ) != 0 ) {
+                       /* Path not resolved yet */
+                       return rc;
+               }
        }
-       memcpy ( &av.gid, gid, sizeof ( av.gid ) );
 
        return ib_post_send ( ibdev, ipoib->data.qp, &av, iobuf );
 }
@@ -617,33 +547,6 @@ static void ipoib_meta_complete_send ( struct ib_device *ibdev __unused,
        free_iob ( iobuf );
 }
 
-/**
- * Handle received IPoIB path record
- *
- * @v ipoib            IPoIB device
- * @v path_record      Path record
- */
-static void ipoib_recv_path_record ( struct ipoib_device *ipoib,
-                                    struct ib_path_record *path_record ) {
-       struct ipoib_peer *peer;
-
-       /* Locate peer cache entry */
-       peer = ipoib_lookup_peer_by_gid ( &path_record->dgid );
-       if ( ! peer ) {
-               DBGC ( ipoib, "IPoIB %p received unsolicited path record\n",
-                      ipoib );
-               return;
-       }
-
-       /* Update path cache entry */
-       peer->lid = ntohs ( path_record->dlid );
-       peer->sl = ( path_record->reserved__sl & 0x0f );
-       peer->rate = ( path_record->rate_selector__rate & 0x3f );
-
-       DBG ( "IPoIB peer %x has dlid %x sl %x rate %x\n",
-             peer->key, peer->lid, peer->sl, peer->rate );
-}
-
 /**
  * Handle received IPoIB multicast membership record
  *
@@ -710,9 +613,6 @@ ipoib_meta_complete_recv ( struct ib_device *ibdev __unused,
        }
 
        switch ( sa->mad_hdr.tid[0] ) {
-       case IPOIB_TID_GET_PATH_REC:
-               ipoib_recv_path_record ( ipoib, &sa->sa_data.path_record );
-               break;
        case IPOIB_TID_MC_MEMBER_REC:
                ipoib_recv_mc_member_record ( ipoib,
                                              &sa->sa_data.mc_member_record );
index e8132b4..0eac0a8 100644 (file)
@@ -145,6 +145,7 @@ FILE_LICENCE ( GPL2_OR_LATER );
 #define ERRFILE_icmp                   ( ERRFILE_NET | 0x00190000 )
 #define ERRFILE_ib_qset                        ( ERRFILE_NET | 0x001a0000 )
 #define ERRFILE_ib_gma                 ( ERRFILE_NET | 0x001b0000 )
+#define ERRFILE_ib_pathrec             ( ERRFILE_NET | 0x001c0000 )
 
 #define ERRFILE_image                ( ERRFILE_IMAGE | 0x00000000 )
 #define ERRFILE_elf                  ( ERRFILE_IMAGE | 0x00010000 )
diff --git a/src/include/gpxe/ib_pathrec.h b/src/include/gpxe/ib_pathrec.h
new file mode 100644 (file)
index 0000000..4451556
--- /dev/null
@@ -0,0 +1,17 @@
+#ifndef _GPXE_IB_PATHREC_H
+#define _GPXE_IB_PATHREC_H
+
+/** @file
+ *
+ * Infiniband path records
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <gpxe/infiniband.h>
+
+extern int ib_resolve_path ( struct ib_device *ibdev,
+                            struct ib_address_vector *av );
+
+#endif /* _GPXE_IB_PATHREC_H */
diff --git a/src/net/infiniband/ib_pathrec.c b/src/net/infiniband/ib_pathrec.c
new file mode 100644 (file)
index 0000000..89622d9
--- /dev/null
@@ -0,0 +1,221 @@
+/*
+ * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <string.h>
+#include <byteswap.h>
+#include <errno.h>
+#include <gpxe/infiniband.h>
+#include <gpxe/ib_gma.h>
+#include <gpxe/ib_pathrec.h>
+
+/** @file
+ *
+ * Infiniband path lookups
+ *
+ */
+
+/** Number of path record cache entries
+ *
+ * Must be a power of two.
+ */
+#define IB_NUM_CACHED_PATHS 4
+
+/** A path record cache entry */
+struct ib_cached_path_record {
+       /** Infiniband device's port GID
+        *
+        * Used to disambiguate cache entries when we have multiple
+        * Infiniband devices, without having to maintain a pointer to
+        * the Infiniband device.
+        */
+       struct ib_gid sgid;
+       /** Destination GID */
+       struct ib_gid dgid;
+       /** Destination LID */
+       unsigned int dlid;
+       /** Rate */
+       unsigned int rate;
+       /** Service level */
+       unsigned int sl;
+};
+
+/** Path record cache */
+static struct ib_cached_path_record ib_path_cache[IB_NUM_CACHED_PATHS];
+
+/** Oldest path record cache entry index */
+static unsigned int ib_path_cache_idx;
+
+/**
+ * Find path record cache entry
+ *
+ * @v ibdev            Infiniband device
+ * @v dgid             Destination GID
+ * @ret cached         Path record cache entry, or NULL
+ */
+static struct ib_cached_path_record *
+ib_find_path_cache_entry ( struct ib_device *ibdev, struct ib_gid *dgid ) {
+       struct ib_cached_path_record *cached;
+       unsigned int i;
+
+       for ( i = 0 ; i < IB_NUM_CACHED_PATHS ; i++ ) {
+               cached = &ib_path_cache[i];
+               if ( memcmp ( &cached->sgid, &ibdev->gid,
+                             sizeof ( cached->sgid ) ) != 0 )
+                       continue;
+               if ( memcmp ( &cached->dgid, dgid,
+                             sizeof ( cached->dgid ) ) != 0 )
+                       continue;
+               return cached;
+       }
+
+       return NULL;
+}
+
+/**
+ * Resolve path record
+ *
+ * @v ibdev            Infiniband device
+ * @v av               Address vector to complete
+ * @ret rc             Return status code
+ */
+int ib_resolve_path ( struct ib_device *ibdev,
+                     struct ib_address_vector *av ) {
+       struct ib_gid *gid = &av->gid;
+       struct ib_cached_path_record *cached;
+       union ib_mad mad;
+       struct ib_mad_sa *sa = &mad.sa;
+       unsigned int cache_idx;
+       int rc;
+
+       /* Sanity check */
+       if ( ! av->gid_present ) {
+               DBGC ( ibdev, "IBDEV %p attempt to look up path record "
+                      "without GID\n", ibdev );
+               return -EINVAL;
+       }
+
+       /* Look in cache for a matching entry */
+       cached = ib_find_path_cache_entry ( ibdev, gid );
+       if ( cached && cached->dlid ) {
+               /* Populated entry found */
+               av->lid = cached->dlid;
+               av->rate = cached->rate;
+               av->sl = cached->sl;
+               DBGC2 ( ibdev, "IBDEV %p cache hit for %08x:%08x:%08x:%08x\n",
+                       ibdev, htonl ( gid->u.dwords[0] ),
+                       htonl ( gid->u.dwords[1] ), htonl ( gid->u.dwords[2] ),
+                       htonl ( gid->u.dwords[3] ) );
+               return 0;
+       }
+       DBGC ( ibdev, "IBDEV %p cache miss for %08x:%08x:%08x:%08x%s\n", ibdev,
+              htonl ( gid->u.dwords[0] ), htonl ( gid->u.dwords[1] ),
+              htonl ( gid->u.dwords[2] ), htonl ( gid->u.dwords[3] ),
+              ( cached ? " (in progress)" : "" ) );
+
+       /* If no unresolved entry was found, then create a new one */
+       if ( ! cached ) {
+               cache_idx = ( (ib_path_cache_idx++) % IB_NUM_CACHED_PATHS );
+               cached = &ib_path_cache[cache_idx];
+               memset ( cached, 0, sizeof ( *cached ) );
+               memcpy ( &cached->sgid, &ibdev->gid, sizeof ( cached->sgid ) );
+               memcpy ( &cached->dgid, gid, sizeof ( cached->dgid ) );
+       }
+
+       /* Construct path record request */
+       memset ( sa, 0, sizeof ( *sa ) );
+       sa->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
+       sa->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
+       sa->mad_hdr.class_version = IB_SA_CLASS_VERSION;
+       sa->mad_hdr.method = IB_MGMT_METHOD_GET;
+       sa->mad_hdr.attr_id = htons ( IB_SA_ATTR_PATH_REC );
+       sa->sa_hdr.comp_mask[1] =
+               htonl ( IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID );
+       memcpy ( &sa->sa_data.path_record.dgid, &cached->dgid,
+                sizeof ( sa->sa_data.path_record.dgid ) );
+       memcpy ( &sa->sa_data.path_record.sgid, &cached->sgid,
+                sizeof ( sa->sa_data.path_record.sgid ) );
+
+       /* Issue path record request */
+       if ( ( rc = ib_gma_request ( &ibdev->gma, &mad, NULL ) ) != 0 ) {
+               DBGC ( ibdev, "IBDEV %p could not get path record: %s\n",
+                      ibdev, strerror ( rc ) );
+               return rc;
+       }
+
+       /* Not found yet */
+       return -ENOENT;
+}
+
+/**
+ * Handle path record response
+ *
+ * @v ibdev            Infiniband device
+ * @v mad              MAD
+ * @ret rc             Return status code
+ */
+static int ib_handle_path_record ( struct ib_device *ibdev,
+                                  union ib_mad *mad ) {
+       struct ib_path_record *path_record = &mad->sa.sa_data.path_record;
+       struct ib_gid *dgid = &path_record->dgid;
+       struct ib_cached_path_record *cached;
+       unsigned int dlid;
+       unsigned int sl;
+       unsigned int rate;
+
+       /* Ignore if not a success */
+       if ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) ) {
+               DBGC ( ibdev, "IBDEV %p path record lookup failed with status "
+                      "%04x\n", ibdev, ntohs ( mad->hdr.status ) );
+               return -EINVAL;
+       }
+
+       /* Extract values from MAD */
+       dlid = ntohs ( path_record->dlid );
+       sl = ( path_record->reserved__sl & 0x0f );
+       rate = ( path_record->rate_selector__rate & 0x3f );
+       DBGC ( ibdev, "IBDEV %p path to %08x:%08x:%08x:%08x is %04x sl %d "
+              "rate %d\n", ibdev, htonl ( dgid->u.dwords[0] ),
+              htonl ( dgid->u.dwords[1] ), htonl ( dgid->u.dwords[2] ),
+              htonl ( dgid->u.dwords[3] ), dlid, sl, rate );
+
+       /* Look for a matching cache entry to fill in */
+       if ( ( cached = ib_find_path_cache_entry ( ibdev, dgid ) ) != NULL ) {
+               DBGC ( ibdev, "IBDEV %p cache add for %08x:%08x:%08x:%08x\n",
+                      ibdev, htonl ( dgid->u.dwords[0] ),
+                      htonl ( dgid->u.dwords[1] ),
+                      htonl ( dgid->u.dwords[2] ),
+                      htonl ( dgid->u.dwords[3] ) );
+               cached->dlid = dlid;
+               cached->rate = rate;
+               cached->sl = sl;
+       }
+
+       return 0;
+}
+
+/** Path record response handler */
+struct ib_mad_handler ib_path_record_handler __ib_mad_handler = {
+       .mgmt_class = IB_MGMT_CLASS_SUBN_ADM,
+       .class_version = IB_SA_CLASS_VERSION,
+       .method = IB_MGMT_METHOD_GET_RESP,
+       .attr_id = htons ( IB_SA_ATTR_PATH_REC ),
+       .handle = ib_handle_path_record,
+};