[infiniband] Add Communication Manager (CM)
authorMichael Brown <mcb30@etherboot.org>
Fri, 17 Jul 2009 21:10:42 +0000 (22:10 +0100)
committerMichael Brown <mcb30@etherboot.org>
Fri, 17 Jul 2009 22:06:35 +0000 (23:06 +0100)
The Communication Manager is responsible for handling the setup and
teardown of RC connections.

src/include/gpxe/errfile.h
src/include/gpxe/ib_cm.h [new file with mode: 0644]
src/include/gpxe/ib_mad.h
src/net/infiniband/ib_cm.c [new file with mode: 0644]

index 5e7fa09..d315532 100644 (file)
@@ -147,6 +147,7 @@ FILE_LICENCE ( GPL2_OR_LATER );
 #define ERRFILE_ib_gma                 ( ERRFILE_NET | 0x001b0000 )
 #define ERRFILE_ib_pathrec             ( ERRFILE_NET | 0x001c0000 )
 #define ERRFILE_ib_mcast               ( ERRFILE_NET | 0x001d0000 )
 #define ERRFILE_ib_gma                 ( ERRFILE_NET | 0x001b0000 )
 #define ERRFILE_ib_pathrec             ( ERRFILE_NET | 0x001c0000 )
 #define ERRFILE_ib_mcast               ( ERRFILE_NET | 0x001d0000 )
+#define ERRFILE_ib_cm                  ( ERRFILE_NET | 0x001e0000 )
 
 #define ERRFILE_image                ( ERRFILE_IMAGE | 0x00000000 )
 #define ERRFILE_elf                  ( ERRFILE_IMAGE | 0x00010000 )
 
 #define ERRFILE_image                ( ERRFILE_IMAGE | 0x00000000 )
 #define ERRFILE_elf                  ( ERRFILE_IMAGE | 0x00010000 )
diff --git a/src/include/gpxe/ib_cm.h b/src/include/gpxe/ib_cm.h
new file mode 100644 (file)
index 0000000..a444622
--- /dev/null
@@ -0,0 +1,21 @@
+#ifndef _GPXE_IB_CM_H
+#define _GPXE_IB_CM_H
+
+/** @file
+ *
+ * Infiniband communication management
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <gpxe/infiniband.h>
+
+extern int ib_cm_connect ( struct ib_queue_pair *qp, struct ib_gid *dgid,
+                          struct ib_gid_half *service_id,
+                          void *private_data, size_t private_data_len,
+                          void ( * notify ) ( struct ib_queue_pair *qp,
+                                              int rc, void *private_data,
+                                              size_t private_data_len ) );
+
+#endif /* _GPXE_IB_CM_H */
index 75bf271..a628cea 100644 (file)
@@ -282,6 +282,172 @@ union ib_sa_data {
        struct ib_mc_member_record mc_member_record;
 } __attribute__ (( packed ));
 
        struct ib_mc_member_record mc_member_record;
 } __attribute__ (( packed ));
 
+/*****************************************************************************
+ *
+ * Communication management MADs
+ *
+ *****************************************************************************
+ */
+
+/** Communication management class version */
+#define IB_CM_CLASS_VERSION                    2
+
+/* Communication management attributes */
+#define IB_CM_ATTR_CLASS_PORT_INFO             0x0001
+#define IB_CM_ATTR_CONNECT_REQUEST             0x0010
+#define IB_CM_ATTR_MSG_RCPT_ACK                        0x0011
+#define IB_CM_ATTR_CONNECT_REJECT              0x0012
+#define IB_CM_ATTR_CONNECT_REPLY               0x0013
+#define IB_CM_ATTR_READY_TO_USE                        0x0014
+#define IB_CM_ATTR_DISCONNECT_REQUEST          0x0015
+#define IB_CM_ATTR_DISCONNECT_REPLY            0x0016
+#define IB_CM_ATTR_SERVICE_ID_RES_REQ          0x0016
+#define IB_CM_ATTR_SERVICE_ID_RES_REQ_RESP     0x0018
+#define IB_CM_ATTR_LOAD_ALTERNATE_PATH         0x0019
+#define IB_CM_ATTR_ALTERNATE_PATH_RESPONSE     0x001a
+
+/** A communication management path */
+struct ib_cm_path {
+       /** Local port LID */
+       uint16_t local_lid;
+       /** Remote port LID */
+       uint16_t remote_lid;
+       /** Local port GID */
+       struct ib_gid local_gid;
+       /** Remote port GID */
+       struct ib_gid remote_gid;
+       /** Flow label and rate */
+       uint32_t flow_label__rate;
+       /** Traffic class */
+       uint8_t tc;
+       /** Hop limit */
+       uint8_t hop_limit;
+       /** SL and subnet local*/
+       uint8_t sl__subnet_local;
+       /** Local ACK timeout */
+       uint8_t local_ack_timeout;
+} __attribute__ (( packed ));
+
+/** A communication management connection request
+ *
+ * Defined in section 12.6.5 of the IBA.
+ */
+struct ib_cm_connect_request {
+       /** Local communication ID */
+       uint32_t local_id;
+       /** Reserved */
+       uint32_t reserved0[1];
+       /** Service ID */
+       struct ib_gid_half service_id;
+       /** Local CA GUID */
+       struct ib_gid_half local_ca;
+       /** Reserved */
+       uint32_t reserved1[1];
+       /** Local queue key */
+       uint32_t local_qkey;
+       /** Local QPN and responder resources*/
+       uint32_t local_qpn__responder_resources;
+       /** Local EECN and initiator depth */
+       uint32_t local_eecn__initiator_depth;
+       /** Remote EECN, remote CM response timeout, transport service
+        * type, EE flow control
+        */
+       uint32_t remote_eecn__remote_timeout__service_type__ee_flow_ctrl;
+       /** Starting PSN, local CM response timeout and retry count */
+       uint32_t starting_psn__local_timeout__retry_count;
+       /** Partition key */
+       uint16_t pkey;
+       /** Path packet payload MTU, RDC exists, RNR retry count */
+       uint8_t payload_mtu__rdc_exists__rnr_retry;
+       /** Max CM retries and SRQ */
+       uint8_t max_cm_retries__srq;
+       /** Primary path */
+       struct ib_cm_path primary;
+       /** Alternate path */
+       struct ib_cm_path alternate;
+       /** Private data */
+       uint8_t private_data[92];
+} __attribute__ (( packed ));
+
+/** CM transport types */
+#define IB_CM_TRANSPORT_RC             0
+#define IB_CM_TRANSPORT_UC             1
+#define IB_CM_TRANSPORT_RD             2
+
+/** A communication management connection rejection
+ *
+ * Defined in section 12.6.7 of the IBA.
+ */
+struct ib_cm_connect_reject {
+       /** Local communication ID */
+       uint32_t local_id;
+       /** Remote communication ID */
+       uint32_t remote_id;
+       /** Message rejected */
+       uint8_t message;
+       /** Reject information length */
+       uint8_t info_len;
+       /** Rejection reason */
+       uint16_t reason;
+       /** Additional rejection information */
+       uint8_t info[72];
+       /** Private data */
+       uint8_t private_data[148];
+} __attribute__ (( packed ));
+
+/** A communication management connection reply
+ *
+ * Defined in section 12.6.8 of the IBA.
+ */
+struct ib_cm_connect_reply {
+       /** Local communication ID */
+       uint32_t local_id;
+       /** Remote communication ID */
+       uint32_t remote_id;
+       /** Local queue key */
+       uint32_t local_qkey;
+       /** Local QPN */
+       uint32_t local_qpn;
+       /** Local EECN */
+       uint32_t local_eecn;
+       /** Starting PSN */
+       uint32_t starting_psn;
+       /** Responder resources */
+       uint8_t responder_resources;
+       /** Initiator depth */
+       uint8_t initiator_depth;
+       /** Target ACK delay, failover accepted, and end-to-end flow control */
+       uint8_t target_ack_delay__failover_accepted__ee_flow_ctrl;
+       /** RNR retry count, SRQ */
+       uint8_t rnr_retry__srq;
+       /** Local CA GUID */
+       struct ib_gid_half local_ca;
+       /** Private data */
+       uint8_t private_data[196];
+} __attribute__ (( packed ));
+
+/** A communication management ready to use reply
+ *
+ * Defined in section 12.6.9 of the IBA.
+ */
+struct ib_cm_ready_to_use {
+       /** Local communication ID */
+       uint32_t local_id;
+       /** Remote communication ID */
+       uint32_t remote_id;
+       /** Private data */
+       uint8_t private_data[224];
+} __attribute__ (( packed ));
+
+/** A communication management attribute */
+union ib_cm_data {
+       struct ib_cm_connect_request connect_request;
+       struct ib_cm_connect_reject connect_reject;
+       struct ib_cm_connect_reply connect_reply;
+       struct ib_cm_ready_to_use ready_to_use;
+       uint8_t bytes[232];
+} __attribute__ (( packed ));
+
 /*****************************************************************************
  *
  * MADs
 /*****************************************************************************
  *
  * MADs
@@ -362,11 +528,18 @@ struct ib_mad_sa {
        union ib_sa_data sa_data;
 } __attribute__ (( packed ));
 
        union ib_sa_data sa_data;
 } __attribute__ (( packed ));
 
+/** A communication management MAD */
+struct ib_mad_cm {
+       struct ib_mad_hdr mad_hdr;
+       union ib_cm_data cm_data;
+} __attribute__ (( packed ));
+
 /** A management datagram */
 union ib_mad {
        struct ib_mad_hdr hdr;
        struct ib_mad_smp smp;
        struct ib_mad_sa sa;
 /** A management datagram */
 union ib_mad {
        struct ib_mad_hdr hdr;
        struct ib_mad_smp smp;
        struct ib_mad_sa sa;
+       struct ib_mad_cm cm;
        uint8_t bytes[256];
 } __attribute__ (( packed ));
 
        uint8_t bytes[256];
 } __attribute__ (( packed ));
 
diff --git a/src/net/infiniband/ib_cm.c b/src/net/infiniband/ib_cm.c
new file mode 100644 (file)
index 0000000..b95ce9f
--- /dev/null
@@ -0,0 +1,329 @@
+/*
+ * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <byteswap.h>
+#include <errno.h>
+#include <assert.h>
+#include <gpxe/list.h>
+#include <gpxe/process.h>
+#include <gpxe/infiniband.h>
+#include <gpxe/ib_gma.h>
+#include <gpxe/ib_pathrec.h>
+#include <gpxe/ib_cm.h>
+
+/**
+ * @file
+ *
+ * Infiniband communication management
+ *
+ */
+
+/** An outstanding connection request */
+struct ib_cm_request {
+       /** List of all outstanding requests */
+       struct list_head list;
+       /** Local communication ID */
+       uint32_t local_id;
+       /** Remote communication ID */
+       uint32_t remote_id;
+       /** Queue pair */
+       struct ib_queue_pair *qp;
+       /** Target service ID */
+       struct ib_gid_half service_id;
+       /** Connection process */
+       struct process process;
+       /** Notification handler
+        *
+        * @v qp                Queue pair
+        * @v rc                Connection status code
+        * @v private_data      Private data
+        * @v private_data_len  Length of private data
+        */
+       void ( * notify ) ( struct ib_queue_pair *qp, int rc,
+                           void *private_data, size_t private_data_len );
+       /** Private data length */
+       size_t private_data_len;
+       /** Private data */
+       uint8_t private_data[0];
+};
+
+/** List of all outstanding connection requests */
+static LIST_HEAD ( ib_cm_requests );
+
+/**
+ * Send connection request
+ *
+ * @v request          Connection request
+ * @ret rc             Return status code
+ */
+static int ib_cm_send_request ( struct ib_cm_request *request ) {
+       struct ib_queue_pair *qp = request->qp;
+       struct ib_device *ibdev = qp->ibdev;
+       struct ib_gma *gma = ibdev->gma;
+       union ib_mad mad;
+       struct ib_mad_cm *cm = &mad.cm;
+       struct ib_cm_connect_request *connect_req =
+               &cm->cm_data.connect_request;
+       size_t private_data_len;
+       int rc;
+
+       /* Construct connection request */
+       memset ( cm, 0, sizeof ( *cm ) );
+       cm->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
+       cm->mad_hdr.mgmt_class = IB_MGMT_CLASS_CM;
+       cm->mad_hdr.class_version = IB_CM_CLASS_VERSION;
+       cm->mad_hdr.method = IB_MGMT_METHOD_SEND;
+       cm->mad_hdr.attr_id = htons ( IB_CM_ATTR_CONNECT_REQUEST );
+       connect_req->local_id = htonl ( request->local_id );
+       memcpy ( &connect_req->service_id, &request->service_id,
+                sizeof ( connect_req->service_id ) );
+       ib_get_hca_info ( ibdev, &connect_req->local_ca );
+       connect_req->local_qpn__responder_resources =
+               htonl ( ( qp->qpn << 8 ) | 1 );
+       connect_req->local_eecn__initiator_depth = htonl ( ( 0 << 8 ) | 1 );
+       connect_req->remote_eecn__remote_timeout__service_type__ee_flow_ctrl =
+               htonl ( ( 0x14 << 3 ) | ( IB_CM_TRANSPORT_RC << 1 ) |
+                       ( 0 << 0 ) );
+       connect_req->starting_psn__local_timeout__retry_count =
+               htonl ( ( qp->recv.psn << 8 ) | ( 0x14 << 3 ) |
+                       ( 0x07 << 0 ) );
+       connect_req->pkey = htons ( ibdev->pkey );
+       connect_req->payload_mtu__rdc_exists__rnr_retry =
+               ( ( IB_MTU_2048 << 4 ) | ( 1 << 3 ) | ( 0x07 << 0 ) );
+       connect_req->max_cm_retries__srq =
+               ( ( 0x0f << 4 ) | ( 0 << 3 ) );
+       connect_req->primary.local_lid = htons ( ibdev->lid );
+       connect_req->primary.remote_lid = htons ( request->qp->av.lid );
+       memcpy ( &connect_req->primary.local_gid, &ibdev->gid,
+                sizeof ( connect_req->primary.local_gid ) );
+       memcpy ( &connect_req->primary.remote_gid, &request->qp->av.gid,
+                sizeof ( connect_req->primary.remote_gid ) );
+       connect_req->primary.flow_label__rate =
+               htonl ( ( 0 << 12 ) | ( request->qp->av.rate << 0 ) );
+       connect_req->primary.hop_limit = 0;
+       connect_req->primary.sl__subnet_local =
+               ( ( request->qp->av.sl << 4 ) | ( 1 << 3 ) );
+       connect_req->primary.local_ack_timeout = ( 0x13 << 3 );
+       private_data_len = request->private_data_len;
+       if ( private_data_len > sizeof ( connect_req->private_data ) )
+               private_data_len = sizeof ( connect_req->private_data );
+       memcpy ( &connect_req->private_data, &request->private_data,
+                private_data_len );
+
+       /* Send request */
+       if ( ( rc = ib_gma_request ( gma, &mad, NULL, 1 ) ) != 0 ) {
+               DBGC ( gma, "GMA %p could not send connection request: %s\n",
+                      gma, strerror ( rc ) );
+               return rc;
+       }
+
+       return 0;
+
+}
+
+/**
+ * Connection request process step
+ *
+ * @v process          Connection request process
+ */
+static void ib_cm_step ( struct process *process ) {
+       struct ib_cm_request *request =
+               container_of ( process, struct ib_cm_request, process );
+       struct ib_queue_pair *qp = request->qp;
+       struct ib_device *ibdev = qp->ibdev;
+       int rc;
+
+       /* Wait until path can be resolved */
+       if ( ( rc = ib_resolve_path ( ibdev, &request->qp->av ) ) != 0 )
+               return;
+
+       /* Wait until request can be sent */
+       if ( ( rc = ib_cm_send_request ( request ) ) != 0 )
+               return;
+
+       /* Stop process */
+       process_del ( process );
+}
+
+/**
+ * Identify connection request by communication ID
+ *
+ * @v local_id         Local communication ID
+ * @v remote_id                Remote communication ID
+ * @ret request                Connection request, or NULL
+ */
+static struct ib_cm_request * ib_cm_find_request ( uint32_t local_id,
+                                                  uint32_t remote_id ) {
+       struct ib_cm_request *request;
+
+       list_for_each_entry ( request, &ib_cm_requests, list ) {
+               if ( request->local_id == local_id ) {
+                       request->remote_id = remote_id;
+                       return request;
+               }
+       }
+       return NULL;
+}
+
+/**
+ * Handle connection reply
+ *
+ * @v gma              General management agent
+ * @v mad              MAD
+ * @ret response       MAD response
+ */
+static union ib_mad * ib_cm_connect_reply ( struct ib_gma *gma,
+                                           union ib_mad *mad ) {
+       struct ib_cm_connect_reply *connect_rep =
+               &mad->cm.cm_data.connect_reply;
+       struct ib_cm_ready_to_use *ready =
+               &mad->cm.cm_data.ready_to_use;
+       struct ib_cm_request *request;
+       int rc;
+
+       /* Identify request */
+       request = ib_cm_find_request ( ntohl ( connect_rep->remote_id ),
+                                      ntohl ( connect_rep->local_id ) );
+       if ( ! request ) {
+               DBGC ( gma, "GMA %p received connection reply with unknown "
+                      "ID %08x\n", gma, ntohl ( connect_rep->remote_id ) );
+               return NULL;
+       }
+
+       /* Extract fields */
+       request->qp->av.qpn = ( ntohl ( connect_rep->local_qpn ) >> 8 );
+       request->qp->send.psn = ( ntohl ( connect_rep->starting_psn ) >> 8 );
+       DBGC ( gma, "GMA %p QPN %lx connected to QPN %lx PSN %x\n", gma,
+              request->qp->qpn, request->qp->av.qpn, request->qp->send.psn );
+
+       /* Modify queue pair */
+       if ( ( rc = ib_modify_qp ( request->qp->ibdev, request->qp ) ) != 0 ) {
+               DBGC ( gma, "GMA %p QPN %lx could not modify queue pair: %s\n",
+                      gma, request->qp->qpn, strerror ( rc ) );
+               return NULL;
+       }
+
+       /* Inform recipient that we are now connected */
+       request->notify ( request->qp, 0, &connect_rep->private_data,
+                         sizeof ( connect_rep->private_data ) );
+
+       /* Construct ready to use reply */
+       mad->hdr.attr_id = htons ( IB_CM_ATTR_READY_TO_USE );
+       memset ( ready, 0, sizeof ( *ready ) );
+       ready->local_id = htonl ( request->local_id );
+       ready->remote_id = htonl ( request->remote_id );
+
+       return mad;
+}
+
+/**
+ * Handle connection rejection
+ *
+ * @v gma              General management agent
+ * @v mad              MAD
+ * @ret response       MAD response
+ */
+static union ib_mad * ib_cm_connect_reject ( struct ib_gma *gma,
+                                            union ib_mad *mad ) {
+       struct ib_cm_connect_reject *connect_rej =
+               &mad->cm.cm_data.connect_reject;
+       struct ib_cm_request *request;
+       uint16_t reason;
+
+       /* Identify request */
+       request = ib_cm_find_request ( ntohl ( connect_rej->remote_id ),
+                                      ntohl ( connect_rej->local_id ) );
+       if ( ! request ) {
+               DBGC ( gma, "GMA %p received connection rejection with "
+                      "unknown ID %08x\n", gma,
+                      ntohl ( connect_rej->remote_id ) );
+               return NULL;
+       }
+
+       /* Extract fields */
+       reason = ntohs ( connect_rej->reason );
+       DBGC ( gma, "GMA %p QPN %lx connection rejected (reason %d)\n",
+              gma, request->qp->qpn, reason );
+
+       /* Inform recipient that we are now disconnected */
+       request->notify ( request->qp, -ENOTCONN, &connect_rej->private_data,
+                         sizeof ( connect_rej->private_data ) );
+
+       return NULL;
+}
+
+/** Communication management MAD handlers */
+struct ib_gma_handler ib_cm_handlers[] __ib_gma_handler = {
+       {
+               .mgmt_class = IB_MGMT_CLASS_CM,
+               .class_version = IB_CM_CLASS_VERSION,
+               .method = IB_MGMT_METHOD_SEND,
+               .attr_id = htons ( IB_CM_ATTR_CONNECT_REPLY ),
+               .handle = ib_cm_connect_reply,
+       },
+       {
+               .mgmt_class = IB_MGMT_CLASS_CM,
+               .class_version = IB_CM_CLASS_VERSION,
+               .method = IB_MGMT_METHOD_SEND,
+               .attr_id = htons ( IB_CM_ATTR_CONNECT_REJECT ),
+               .handle = ib_cm_connect_reject,
+       },
+};
+
+/**
+ * Connect to remote QP
+ *
+ * @v qp               Queue pair
+ * @v dgid             Target GID
+ * @v service_id       Target service ID
+ * @v private_data     Private data
+ * @v private_data_len Length of private data
+ * @ret rc             Return status code
+ */
+int ib_cm_connect ( struct ib_queue_pair *qp, struct ib_gid *dgid,
+                   struct ib_gid_half *service_id,
+                   void *private_data, size_t private_data_len,
+                   void ( * notify ) ( struct ib_queue_pair *qp, int rc,
+                                       void *private_data,
+                                       size_t private_data_len ) ) {
+       struct ib_cm_request *request;
+
+       /* Allocate and initialise request */
+       request = zalloc ( sizeof ( *request ) + private_data_len );
+       if ( ! request )
+               return -ENOMEM;
+       list_add ( &request->list, &ib_cm_requests );
+       request->local_id = random();
+       request->qp = qp;
+       memset ( &qp->av, 0, sizeof ( qp->av ) );
+       qp->av.gid_present = 1;
+       memcpy ( &qp->av.gid, dgid, sizeof ( qp->av.gid ) );
+       memcpy ( &request->service_id, service_id,
+                sizeof ( request->service_id ) );
+       request->notify = notify;
+       request->private_data_len = private_data_len;
+       memcpy ( &request->private_data, private_data, private_data_len );
+       process_init ( &request->process, ib_cm_step, NULL );
+
+       return 0;
+}