-/*
- * Copyright (c) 2005 Topspin Communications. All rights reserved.
- * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.
- * Copyright (c) 2005 Hewlett Packard, Inc (Grant Grundler)
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * $Id$
- */
-
-#if HAVE_CONFIG_H
-# include <config.h>
-#endif /* HAVE_CONFIG_H */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/time.h>
-#include <netdb.h>
-#include <malloc.h>
-#include <getopt.h>
-#include <arpa/inet.h>
-#include <byteswap.h>
-#include <time.h>
-
-#include <infiniband/verbs.h>
-#include <rdma/rdma_cma.h>
-
-#include "get_clock.h"
-
-#define PINGPONG_RDMA_WRID 3
-#define MAX_INLINE 400
-
-static int inline_size = MAX_INLINE;
-static int page_size;
-static pid_t pid;
-
-struct report_options {
- int unsorted;
- int histogram;
- int cycles; /* report delta's in cycles, not microsec's */
-};
-
-
-struct pingpong_context {
- struct ibv_context *context;
- struct ibv_pd *pd;
- struct ibv_mr *mr;
- struct ibv_cq *rcq;
- struct ibv_cq *scq;
- struct ibv_qp *qp;
- void *buf;
- volatile char *post_buf;
- volatile char *poll_buf;
- int size;
- int tx_depth;
- struct ibv_sge list;
- struct ibv_send_wr wr;
-};
-
-struct pingpong_dest {
- int lid;
- int qpn;
- int psn;
- unsigned rkey;
- unsigned long long vaddr;
-};
-
-struct pp_data {
- int port;
- int ib_port;
- unsigned size;
- int tx_depth;
- int use_cma;
- int sockfd;
- char *servername;
- struct pingpong_dest my_dest;
- struct pingpong_dest *rem_dest;
- struct ibv_device *ib_dev;
- struct rdma_event_channel *cm_channel;
- struct rdma_cm_id *cm_id;
-
-};
-
-static void pp_post_recv(struct pingpong_context *);
-static void pp_wait_for_done(struct pingpong_context *);
-static void pp_send_done(struct pingpong_context *);
-static void pp_wait_for_start(struct pingpong_context *);
-static void pp_send_start(struct pingpong_context *);
-static void pp_close_cma(struct pp_data );
-static struct pingpong_context *pp_init_ctx(void *, struct pp_data *);
-
-
-static uint16_t pp_get_local_lid(struct pingpong_context *ctx, int port)
-{
- struct ibv_port_attr attr;
-
- if (ibv_query_port(ctx->context, port, &attr))
- return 0;
-
- return attr.lid;
-}
-
-static struct ibv_device *pp_find_dev(const char *ib_devname)
-{
- struct ibv_device **dev_list;
- struct ibv_device *ib_dev = NULL;
-
- dev_list = ibv_get_device_list(NULL);
-
- if (!ib_devname) {
- ib_dev = dev_list[0];
- if (!ib_dev)
- fprintf(stderr, "No IB devices found\n");
- } else {
- for (; (ib_dev = *dev_list); ++dev_list) {
- if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
- break;
- }
- if (!ib_dev)
- fprintf(stderr, "IB device %s not found\n", ib_devname);
- }
- return ib_dev;
-}
-
-#define KEY_MSG_SIZE (sizeof "0000:000000:000000:00000000:0000000000000000")
-#define KEY_PRINT_FMT "%04x:%06x:%06x:%08x:%016Lx"
-
-static int pp_write_keys(int sockfd, const struct pingpong_dest *my_dest)
-{
- char msg[KEY_MSG_SIZE];
-
- sprintf(msg, KEY_PRINT_FMT, my_dest->lid, my_dest->qpn,
- my_dest->psn, my_dest->rkey, my_dest->vaddr);
-
- if (write(sockfd, msg, sizeof msg) != sizeof msg) {
- perror("client write");
- fprintf(stderr, "Couldn't send local address\n");
- return -1;
- }
-
- return 0;
-}
-
-static int pp_read_keys(int sockfd, const struct pingpong_dest *my_dest,
- struct pingpong_dest *rem_dest)
-{
- int parsed;
- char msg[KEY_MSG_SIZE];
-
- if (read(sockfd, msg, sizeof msg) != sizeof msg) {
- perror("pp_read_keys");
- fprintf(stderr, "Couldn't read remote address\n");
- return -1;
- }
-
- parsed = sscanf(msg, KEY_PRINT_FMT, &rem_dest->lid, &rem_dest->qpn,
- &rem_dest->psn, &rem_dest->rkey, &rem_dest->vaddr);
-
- if (parsed != 5) {
- fprintf(stderr, "Couldn't parse line <%.*s>\n",
- (int)sizeof msg, msg);
- return -1;
- }
-
- return 0;
-}
-
-static struct pingpong_context *pp_client_connect(struct pp_data *data)
-{
- struct addrinfo *res, *t;
- struct addrinfo hints = {
- .ai_family = AF_UNSPEC,
- .ai_socktype = SOCK_STREAM
- };
- char *service;
- int n;
- int sockfd = -1;
- struct rdma_cm_event *event;
- struct sockaddr_in sin;
- struct pingpong_context *ctx = NULL;
- struct rdma_conn_param conn_param;
-
- if (asprintf(&service, "%d", data->port) < 0)
- goto err4;
-
- n = getaddrinfo(data->servername, service, &hints, &res);
-
- if (n < 0) {
- fprintf(stderr, "%d:%s: %s for %s:%d\n",
- pid, __func__, gai_strerror(n),
- data->servername, data->port);
- goto err4;
- }
-
- if (data->use_cma) {
- sin.sin_addr.s_addr = ((struct sockaddr_in*)res->ai_addr)->sin_addr.s_addr;
- sin.sin_family = AF_INET;
- sin.sin_port = htons(data->port);
- if (rdma_resolve_addr(data->cm_id, NULL,
- (struct sockaddr *)&sin, 2000)) {
- fprintf(stderr, "%d:%s: rdma_resolve_addr failed\n",
- pid, __func__ );
- goto err2;
- }
-
- if (rdma_get_cm_event(data->cm_channel, &event))
- goto err2;
-
- if (event->event != RDMA_CM_EVENT_ADDR_RESOLVED) {
- fprintf(stderr, "%d:%s: unexpected CM event %d\n",
- pid, __func__, event->event);
- goto err1;
- }
- rdma_ack_cm_event(event);
-
- if (rdma_resolve_route(data->cm_id, 2000)) {
- fprintf(stderr, "%d:%s: rdma_resolve_route failed\n",
- pid, __func__);
- goto err2;
- }
-
- if (rdma_get_cm_event(data->cm_channel, &event))
- goto err2;
-
- if (event->event != RDMA_CM_EVENT_ROUTE_RESOLVED) {
- fprintf(stderr, "%d:%s: unexpected CM event %d\n",
- pid, __func__, event->event);
- rdma_ack_cm_event(event);
- goto err1;
- }
- rdma_ack_cm_event(event);
- ctx = pp_init_ctx(data->cm_id, data);
- if (!ctx) {
- fprintf(stderr, "%d:%s: pp_init_ctx failed\n", pid, __func__);
- goto err2;
- }
- data->my_dest.psn = lrand48() & 0xffffff;
- data->my_dest.qpn = 0;
- data->my_dest.rkey = ctx->mr->rkey;
- data->my_dest.vaddr = (uintptr_t)ctx->buf + ctx->size;
-
- memset(&conn_param, 0, sizeof conn_param);
- conn_param.responder_resources = 1;
- conn_param.initiator_depth = 1;
- conn_param.retry_count = 5;
- conn_param.private_data = &data->my_dest;
- conn_param.private_data_len = sizeof(data->my_dest);
-
- if (rdma_connect(data->cm_id, &conn_param)) {
- fprintf(stderr, "%d:%s: rdma_connect failure\n", pid, __func__);
- goto err2;
- }
-
- if (rdma_get_cm_event(data->cm_channel, &event))
- goto err2;
-
- if (event->event != RDMA_CM_EVENT_ESTABLISHED) {
- fprintf(stderr, "%d:%s: unexpected CM event %d\n",
- pid, __func__, event->event);
- goto err1;
- }
- if (!event->param.conn.private_data ||
- (event->param.conn.private_data_len < sizeof(*data->rem_dest))) {
- fprintf(stderr, "%d:%s: bad private data ptr %p len %d\n",
- pid, __func__, event->param.conn.private_data,
- event->param.conn.private_data_len);
- goto err1;
- }
- data->rem_dest = malloc(sizeof *data->rem_dest);
- if (!data->rem_dest)
- goto err1;
-
- memcpy(data->rem_dest, event->param.conn.private_data, sizeof(*data->rem_dest));
- rdma_ack_cm_event(event);
- } else {
- for (t = res; t; t = t->ai_next) {
- sockfd = socket(t->ai_family, t->ai_socktype,
- t->ai_protocol);
- if (sockfd >= 0) {
- if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
- break;
- close(sockfd);
- sockfd = -1;
- }
- }
- if (sockfd < 0) {
- fprintf(stderr, "%d:%s: Couldn't connect to %s:%d\n",
- pid, __func__, data->servername, data->port);
- goto err3;
- }
- ctx = pp_init_ctx(data->ib_dev, data);
- if (!ctx)
- goto err3;
- data->sockfd = sockfd;
- }
-
- freeaddrinfo(res);
- return ctx;
-
-err1:
- rdma_ack_cm_event(event);
-err2:
- rdma_destroy_id(data->cm_id);
- rdma_destroy_event_channel(data->cm_channel);
-err3:
- freeaddrinfo(res);
-err4:
- return NULL;
-
-}
-
-
-static int pp_client_exch_dest(struct pp_data *data)
-{
- if (data->rem_dest != NULL)
- free(data->rem_dest);
-
- data->rem_dest = malloc(sizeof *data->rem_dest);
- if (!data->rem_dest)
- return -1;
-
- if (pp_write_keys(data->sockfd, &data->my_dest))
- return -1;
-
- return pp_read_keys(data->sockfd, &data->my_dest, data->rem_dest);
-}
-
-static struct pingpong_context *pp_server_connect(struct pp_data *data)
-{
- struct addrinfo *res, *t;
- struct addrinfo hints = {
- .ai_flags = AI_PASSIVE,
- .ai_family = AF_UNSPEC,
- .ai_socktype = SOCK_STREAM
- };
- char *service;
- int sockfd = -1, connfd;
- int n;
- struct rdma_cm_event *event;
- struct sockaddr_in sin;
- struct pingpong_context *ctx = NULL;
- struct rdma_cm_id *child_cm_id;
- struct rdma_conn_param conn_param;
-
- if (asprintf(&service, "%d", data->port) < 0)
- goto err5;
-
- if ( (n = getaddrinfo(NULL, service, &hints, &res)) < 0 ) {
- fprintf(stderr, "%d:%s: %s for port %d\n", pid, __func__,
- gai_strerror(n), data->port);
- goto err5;
- }
-
- if (data->use_cma) {
- sin.sin_addr.s_addr = 0;
- sin.sin_family = AF_INET;
- sin.sin_port = htons(data->port);
- if (rdma_bind_addr(data->cm_id, (struct sockaddr *)&sin)) {
- fprintf(stderr, "%d:%s: rdma_bind_addr failed\n", pid, __func__);
- goto err3;
- }
-
- if (rdma_listen(data->cm_id, 0)) {
- fprintf(stderr, "%d:%s: rdma_listen failed\n", pid, __func__);
- goto err3;
- }
-
- if (rdma_get_cm_event(data->cm_channel, &event))
- goto err3;
-
- if (event->event != RDMA_CM_EVENT_CONNECT_REQUEST) {
- fprintf(stderr, "%d:%s: bad event waiting for connect request %d\n",
- pid, __func__, event->event);
- goto err2;
- }
-
- if (!event->param.conn.private_data ||
- (event->param.conn.private_data_len < sizeof(*data->rem_dest))) {
- fprintf(stderr, "%d:%s: bad private data len %d\n", pid,
- __func__, event->param.conn.private_data_len);
- goto err2;
- }
-
- data->rem_dest = malloc(sizeof *data->rem_dest);
- if (!data->rem_dest)
- goto err2;
-
- memcpy(data->rem_dest, event->param.conn.private_data, sizeof(*data->rem_dest));
-
- child_cm_id = (struct rdma_cm_id *)event->id;
- ctx = pp_init_ctx(child_cm_id, data);
- if (!ctx) {
- free(data->rem_dest);
- goto err1;
- }
- data->my_dest.psn = lrand48() & 0xffffff;
- data->my_dest.qpn = 0;
- data->my_dest.rkey = ctx->mr->rkey;
- data->my_dest.vaddr = (uintptr_t)ctx->buf + ctx->size;
-
- memset(&conn_param, 0, sizeof conn_param);
- conn_param.responder_resources = 1;
- conn_param.initiator_depth = 1;
- conn_param.private_data = &data->my_dest;
- conn_param.private_data_len = sizeof(data->my_dest);
- if (rdma_accept(child_cm_id, &conn_param)) {
- fprintf(stderr, "%d:%s: rdma_accept failed\n", pid, __func__);
- goto err1;
- }
- rdma_ack_cm_event(event);
- if (rdma_get_cm_event(data->cm_channel, &event)) {
- fprintf(stderr, "%d:%s: rdma_get_cm_event error\n", pid, __func__);
- rdma_destroy_id(child_cm_id);
- goto err3;
- }
- if (event->event != RDMA_CM_EVENT_ESTABLISHED) {
- fprintf(stderr, "%d:%s: bad event waiting for established %d\n",
- pid, __func__, event->event);
- goto err1;
- }
- rdma_ack_cm_event(event);
- } else {
- for (t = res; t; t = t->ai_next) {
- sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
- if (sockfd >= 0) {
- n = 1;
-
- setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
-
- if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
- break;
- close(sockfd);
- sockfd = -1;
- }
- }
-
- if (sockfd < 0) {
- fprintf(stderr, "%d:%s: Couldn't listen to port %d\n", pid,
- __func__, data->port);
- goto err4;
- }
-
- listen(sockfd, 1);
- connfd = accept(sockfd, NULL, 0);
- if (connfd < 0) {
- perror("server accept");
- fprintf(stderr, "%d:%s: accept() failed\n", pid, __func__);
- close(sockfd);
- goto err4;
- }
-
- close(sockfd);
-
- ctx = pp_init_ctx(data->ib_dev, data);
- if (!ctx)
- goto err4;
- data->sockfd = connfd;
- }
- freeaddrinfo(res);
- return ctx;
-
-err1:
- rdma_destroy_id(child_cm_id);
-err2:
- rdma_ack_cm_event(event);
-err3:
- rdma_destroy_id(data->cm_id);
- rdma_destroy_event_channel(data->cm_channel);
-err4:
- freeaddrinfo(res);
-err5:
- return NULL;
-
-}
-
-static int pp_server_exch_dest(struct pp_data *data)
-{
- if (data->rem_dest != NULL)
- free(data->rem_dest);
- data->rem_dest = malloc(sizeof *data->rem_dest);
-
- if (!data->rem_dest)
- return -1;
-
- if (pp_read_keys(data->sockfd, &data->my_dest, data->rem_dest))
- return -1;
-
- return pp_write_keys(data->sockfd, &data->my_dest);
-}
-
-static struct pingpong_context *pp_init_ctx(void *ptr, struct pp_data *data)
-{
- struct pingpong_context *ctx;
- struct ibv_device *ib_dev;
- struct rdma_cm_id *cm_id;
-
- ctx = malloc(sizeof *ctx);
- if (!ctx)
- return NULL;
-
- ctx->size = data->size;
- ctx->tx_depth = data->tx_depth;
-
- ctx->buf = memalign(page_size, ctx->size * 2);
- if (!ctx->buf) {
- fprintf(stderr, "%d:%s: Couldn't allocate work buf.\n",
- pid, __func__);
- return NULL;
- }
-
- memset(ctx->buf, 0, ctx->size * 2);
-
- ctx->post_buf = (char *)ctx->buf + (ctx->size -1);
- ctx->poll_buf = (char *)ctx->buf + (2 * ctx->size -1);
-
-
- if (data->use_cma) {
- cm_id = (struct rdma_cm_id *)ptr;
- ctx->context = cm_id->verbs;
- if (!ctx->context) {
- fprintf(stderr, "%d:%s: Unbound cm_id!!\n", pid,
- __func__);
- return NULL;
- }
-
- } else {
- ib_dev = (struct ibv_device *)ptr;
- ctx->context = ibv_open_device(ib_dev);
- if (!ctx->context) {
- fprintf(stderr, "%d:%s: Couldn't get context for %s\n",
- pid, __func__, ibv_get_device_name(ib_dev));
- return NULL;
- }
- }
-
- ctx->pd = ibv_alloc_pd(ctx->context);
- if (!ctx->pd) {
- fprintf(stderr, "%d:%s: Couldn't allocate PD\n", pid, __func__);
- return NULL;
- }
-
- /* We dont really want IBV_ACCESS_LOCAL_WRITE, but IB spec says:
- * The Consumer is not allowed to assign Remote Write or Remote Atomic to
- * a Memory Region that has not been assigned Local Write. */
- ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, ctx->size * 2,
- IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);
- if (!ctx->mr) {
- fprintf(stderr, "%d:%s: Couldn't allocate MR\n", pid, __func__);
- return NULL;
- }
-
- ctx->rcq = ibv_create_cq(ctx->context, 1, NULL, NULL, 0);
- if (!ctx->rcq) {
- fprintf(stderr, "%d:%s: Couldn't create recv CQ\n", pid,
- __func__);
- return NULL;
- }
-
- ctx->scq = ibv_create_cq(ctx->context, ctx->tx_depth, ctx, NULL, 0);
- if (!ctx->scq) {
- fprintf(stderr, "%d:%s: Couldn't create send CQ\n", pid,
- __func__);
- return NULL;
- }
-
-
- struct ibv_qp_init_attr attr = {
- .send_cq = ctx->scq,
- .recv_cq = ctx->rcq,
- .cap = {
- .max_send_wr = ctx->tx_depth,
- /* Work around: driver doesnt support
- * recv_wr = 0 */
- .max_recv_wr = 1,
- .max_send_sge = 1,
- .max_recv_sge = 1,
- .max_inline_data = inline_size,
- },
- .qp_type = IBV_QPT_RC
- };
-
- if (data->use_cma) {
- if (rdma_create_qp(cm_id, ctx->pd, &attr)) {
- fprintf(stderr, "%d:%s: Couldn't create QP\n", pid, __func__);
- return NULL;
- }
- ctx->qp = cm_id->qp;
- pp_post_recv(ctx);
- } else {
- ctx->qp = ibv_create_qp(ctx->pd, &attr);
- if (!ctx->qp) {
- fprintf(stderr, "%d:%s: Couldn't create QP\n", pid, __func__);
- return NULL;
- }
- {
- struct ibv_qp_attr attr;
-
- attr.qp_state = IBV_QPS_INIT;
- attr.pkey_index = 0;
- attr.port_num = data->ib_port;
- attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE;
-
- if (ibv_modify_qp(ctx->qp, &attr,
- IBV_QP_STATE |
- IBV_QP_PKEY_INDEX |
- IBV_QP_PORT |
- IBV_QP_ACCESS_FLAGS)) {
- fprintf(stderr, "%d:%s: Failed to modify QP to INIT\n",
- pid, __func__);
- return NULL;
- }
- }
- }
-
- return ctx;
-}
-
-static int pp_connect_ctx(struct pingpong_context *ctx, struct pp_data *data)
-{
- struct ibv_qp_attr attr = {
- .qp_state = IBV_QPS_RTR,
- .path_mtu = IBV_MTU_256,
- .dest_qp_num = data->rem_dest->qpn,
- .rq_psn = data->rem_dest->psn,
- .max_dest_rd_atomic = 1,
- .min_rnr_timer = 12,
- .ah_attr.is_global = 0,
- .ah_attr.dlid = data->rem_dest->lid,
- .ah_attr.sl = 0,
- .ah_attr.src_path_bits = 0,
- .ah_attr.port_num = data->ib_port
- };
-
- if (ibv_modify_qp(ctx->qp, &attr,
- IBV_QP_STATE |
- IBV_QP_AV |
- IBV_QP_PATH_MTU |
- IBV_QP_DEST_QPN |
- IBV_QP_RQ_PSN |
- IBV_QP_MAX_DEST_RD_ATOMIC |
- IBV_QP_MIN_RNR_TIMER)) {
- fprintf(stderr, "%s: Failed to modify QP to RTR\n", __func__);
- return 1;
- }
-
- attr.qp_state = IBV_QPS_RTS;
- attr.timeout = 14;
- attr.retry_cnt = 7;
- attr.rnr_retry = 7;
- attr.sq_psn = data->my_dest.psn;
- attr.max_rd_atomic = 1;
- if (ibv_modify_qp(ctx->qp, &attr,
- IBV_QP_STATE |
- IBV_QP_TIMEOUT |
- IBV_QP_RETRY_CNT |
- IBV_QP_RNR_RETRY |
- IBV_QP_SQ_PSN |
- IBV_QP_MAX_QP_RD_ATOMIC)) {
- fprintf(stderr, "%s: Failed to modify QP to RTS\n", __func__);
- return 1;
- }
-
- return 0;
-}
-
-static int pp_open_port(struct pingpong_context *ctx, struct pp_data *data )
-{
- char addr_fmt[] = "%8s address: LID %#04x QPN %#06x PSN %#06x RKey %#08x VAddr %#016Lx\n";
-
- /* Create connection between client and server.
- * We do it by exchanging data over a TCP socket connection. */
-
- data->my_dest.lid = pp_get_local_lid(ctx, data->ib_port);
- data->my_dest.qpn = ctx->qp->qp_num;
- data->my_dest.psn = lrand48() & 0xffffff;
- if (!data->my_dest.lid) {
- fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n");
- return -1;
- }
- data->my_dest.rkey = ctx->mr->rkey;
- data->my_dest.vaddr = (uintptr_t)ctx->buf + ctx->size;
-
- printf(addr_fmt, "local", data->my_dest.lid, data->my_dest.qpn, data->my_dest.psn,
- data->my_dest.rkey, data->my_dest.vaddr);
-
- if (data->servername) {
- if (pp_client_exch_dest(data))
- return 1;
- } else {
- if (pp_server_exch_dest(data))
- return 1;
- }
-
- printf(addr_fmt, "remote", data->rem_dest->lid, data->rem_dest->qpn,
- data->rem_dest->psn, data->rem_dest->rkey,
- data->rem_dest->vaddr);
-
- if (pp_connect_ctx(ctx, data))
- return 1;
-
- /* An additional handshake is required *after* moving qp to RTR.
- Arbitrarily reuse exch_dest for this purpose. */
- if (data->servername) {
- if (pp_client_exch_dest(data))
- return -1;
- } else {
- if (pp_server_exch_dest(data))
- return -1;
- }
-
- if (write(data->sockfd, "done", sizeof "done") != sizeof "done"){
- perror("write");
- fprintf(stderr, "Couldn't write to socket\n");
- return 1;
- }
-
- close(data->sockfd);
-
- return 0;
-}
-
-static void pp_post_recv(struct pingpong_context *ctx)
-{
- struct ibv_sge list;
- struct ibv_recv_wr wr, *bad_wr;
- int rc;
-
- list.addr = (uintptr_t) ctx->buf;
- list.length = 1;
- list.lkey = ctx->mr->lkey;
- wr.next = NULL;
- wr.wr_id = 0xdeadbeef;
- wr.sg_list = &list;
- wr.num_sge = 1;
-
- rc = ibv_post_recv(ctx->qp, &wr, &bad_wr);
- if (rc) {
- perror("ibv_post_recv");
- fprintf(stderr, "%d:%s: ibv_post_recv failed %d\n", pid,
- __func__, rc);
- }
-}
-
-static void pp_wait_for_done(struct pingpong_context *ctx)
-{
- struct ibv_wc wc;
- int ne;
-
- do {
- usleep(500);
- ne = ibv_poll_cq(ctx->rcq, 1, &wc);
- } while (ne == 0);
-
- if (wc.status)
- fprintf(stderr, "%d:%s: bad wc status %d\n", pid, __func__,
- wc.status);
- if (!(wc.opcode & IBV_WC_RECV))
- fprintf(stderr, "%d:%s: bad wc opcode %d\n", pid, __func__,
- wc.opcode);
- if (wc.wr_id != 0xdeadbeef)
- fprintf(stderr, "%d:%s: bad wc wr_id 0x%x\n", pid, __func__,
- (int)wc.wr_id);
-}
-
-static void pp_send_done(struct pingpong_context *ctx)
-{
- struct ibv_send_wr *bad_wr;
- struct ibv_wc wc;
- int ne;
-
- ctx->list.addr = (uintptr_t) ctx->buf;
- ctx->list.length = 1;
- ctx->list.lkey = ctx->mr->lkey;
- ctx->wr.wr_id = 0xcafebabe;
- ctx->wr.sg_list = &ctx->list;
- ctx->wr.num_sge = 1;
- ctx->wr.opcode = IBV_WR_SEND;
- ctx->wr.send_flags = IBV_SEND_SIGNALED;
- ctx->wr.next = NULL;
- if (ibv_post_send(ctx->qp, &ctx->wr, &bad_wr)) {
- fprintf(stderr, "%d:%s: ibv_post_send failed\n", pid, __func__);
- return;
- }
- do {
- usleep(500);
- ne = ibv_poll_cq(ctx->scq, 1, &wc);
- } while (ne == 0);
-
- if (wc.status)
- fprintf(stderr, "%d:%s: bad wc status %d\n", pid, __func__,
- wc.status);
- if (wc.opcode != IBV_WC_SEND)
- fprintf(stderr, "%d:%s: bad wc opcode %d\n", pid, __func__,
- wc.opcode);
- if (wc.wr_id != 0xcafebabe)
- fprintf(stderr, "%d:%s: bad wc wr_id 0x%x\n", pid, __func__,
- (int)wc.wr_id);
-}
-
-static void pp_wait_for_start(struct pingpong_context *ctx)
-{
- struct ibv_wc wc;
- int ne;
-
- do {
- usleep(500);
- ne = ibv_poll_cq(ctx->rcq, 1, &wc);
- } while (ne == 0);
-
- if (wc.status)
- fprintf(stderr, "%d:%s: bad wc status %d\n", pid, __func__,
- wc.status);
- if (!(wc.opcode & IBV_WC_RECV))
- fprintf(stderr, "%d:%s: bad wc opcode %d\n", pid, __func__,
- wc.opcode);
- if (wc.wr_id != 0xdeadbeef)
- fprintf(stderr, "%d:%s: bad wc wr_id 0x%x\n", pid, __func__,
- (int)wc.wr_id);
- pp_post_recv(ctx);
-}
-
-static void pp_send_start(struct pingpong_context *ctx)
-{
- struct ibv_send_wr *bad_wr;
- struct ibv_wc wc;
- int ne;
-
- ctx->list.addr = (uintptr_t) ctx->buf;
- ctx->list.length = 1;
- ctx->list.lkey = ctx->mr->lkey;
- ctx->wr.wr_id = 0xabbaabba;
- ctx->wr.sg_list = &ctx->list;
- ctx->wr.num_sge = 1;
- ctx->wr.opcode = IBV_WR_SEND;
- ctx->wr.send_flags = IBV_SEND_SIGNALED;
- ctx->wr.next = NULL;
- if (ibv_post_send(ctx->qp, &ctx->wr, &bad_wr)) {
- fprintf(stderr, "%d:%s: ibv_post_send failed\n", pid, __func__);
- return;
- }
- do {
- usleep(500);
- ne = ibv_poll_cq(ctx->scq, 1, &wc);
- } while (ne == 0);
-
- if (wc.status)
- fprintf(stderr, "%d:%s: bad wc status %d\n", pid, __func__,
- wc.status);
- if (wc.opcode != IBV_WC_SEND)
- fprintf(stderr, "%d:%s: bad wc opcode %d\n", pid, __func__,
- wc.opcode);
- if (wc.wr_id != 0xabbaabba)
- fprintf(stderr, "%d:%s: bad wc wr_id 0x%x\n", pid, __func__,
- (int)wc.wr_id);
-}
-
-static void pp_close_cma(struct pp_data data)
-{
- struct rdma_cm_event *event;
- int rc;
-
- if (data.servername) {
- rc = rdma_disconnect(data.cm_id);
- if (rc) {
- perror("rdma_disconnect");
- fprintf(stderr, "%d:%s: rdma disconnect error\n", pid,
- __func__);
- return;
- }
- }
-
- rdma_get_cm_event(data.cm_channel, &event);
- if (event->event != RDMA_CM_EVENT_DISCONNECTED)
- fprintf(stderr, "%d:%s: unexpected event during disconnect %d\n",
- pid, __func__, event->event);
- rdma_ack_cm_event(event);
- rdma_destroy_id(data.cm_id);
- rdma_destroy_event_channel(data.cm_channel);
-}
-
-static void usage(const char *argv0)
-{
- printf("Usage:\n");
- printf(" %s start a server and wait for connection\n", argv0);
- printf(" %s <host> connect to server at <host>\n", argv0);
- printf("\n");
- printf("Options:\n");
- printf(" -p, --port=<port> listen on/connect to port <port> (default 18515)\n");
- printf(" -d, --ib-dev=<dev> use IB device <dev> (default first device found)\n");
- printf(" -i, --ib-port=<port> use port <port> of IB device (default 1)\n");
- printf(" -s, --size=<size> size of message to exchange (default 1)\n");
- printf(" -t, --tx-depth=<dep> size of tx queue (default 50)\n");
- printf(" -n, --iters=<iters> number of exchanges (at least 2, default 1000)\n");
- printf(" -I, --inline_size=<size> max size of message to be sent in inline mode (default 400)\n");
- printf(" -C, --report-cycles report times in cpu cycle units (default microseconds)\n");
- printf(" -H, --report-histogram print out all results (default print summary only)\n");
- printf(" -U, --report-unsorted (implies -H) print out unsorted results (default sorted)\n");
- printf(" -c, --cma Use the RDMA CMA to setup the RDMA connection\n");
-}
-
-/*
- * When there is an
- * odd number of samples, the median is the middle number.
- * even number of samples, the median is the mean of the
- * two middle numbers.
- *
- */
-static inline cycles_t get_median(int n, cycles_t delta[])
-{
- if ((n - 1) % 2)
- return (delta[n / 2] + delta[n / 2 - 1]) / 2;
- else
- return delta[n / 2];
-}
-
-static int cycles_compare(const void * aptr, const void * bptr)
-{
- const cycles_t *a = aptr;
- const cycles_t *b = bptr;
- if (*a < *b) return -1;
- if (*a > *b) return 1;
- return 0;
-}
-
-static void print_report(struct report_options * options,
- unsigned int iters, cycles_t *tstamp)
-{
- double cycles_to_units;
- cycles_t median;
- unsigned int i;
- const char* units;
- cycles_t *delta = malloc((iters - 1) * sizeof *delta);
-
- if (!delta) {
- perror("malloc");
- return;
- }
-
- for (i = 0; i < iters - 1; ++i)
- delta[i] = tstamp[i + 1] - tstamp[i];
-
-
- if (options->cycles) {
- cycles_to_units = 1;
- units = "cycles";
- } else {
- cycles_to_units = get_cpu_mhz();
- units = "usec";
- }
-
- if (options->unsorted) {
- printf("#, %s\n", units);
- for(i = 0; i < iters - 1; ++i)
- printf("%d, %g\n", i + 1, delta[i] / cycles_to_units / 2);
- }
-
- qsort(delta, iters - 1, sizeof *delta, cycles_compare);
-
- if (options->histogram) {
- printf("#, %s\n", units);
- for(i = 0; i < iters - 1; ++i)
- printf("%d, %g\n", i + 1, delta[i] / cycles_to_units / 2);
- }
-
- median = get_median(iters - 1, delta);
-
- printf("Latency typical: %g %s\n", median / cycles_to_units / 2, units);
- printf("Latency best : %g %s\n", delta[0] / cycles_to_units / 2, units);
- printf("Latency worst : %g %s\n", delta[iters - 2] / cycles_to_units / 2, units);
-
- free(delta);
-}
-
-int main(int argc, char *argv[])
-{
- const char *ib_devname = NULL;
- const char *servername = NULL;
- int iters = 1000;
- struct report_options report = {};
-
- struct pingpong_context *ctx;
-
- struct ibv_qp *qp;
- struct ibv_send_wr *wr;
- volatile char *poll_buf;
- volatile char *post_buf;
-
- int scnt, rcnt, ccnt;
-
- cycles_t *tstamp;
-
- struct pp_data data = {
- .port = 18515,
- .ib_port = 1,
- .size = 1,
- .tx_depth = 50,
- .use_cma = 0,
- .servername = NULL,
- .rem_dest = NULL,
- .ib_dev = NULL,
- .cm_channel = NULL,
- .cm_id = NULL
- };
-
- /* Parameter parsing. */
- while (1) {
- int c;
-
- static struct option long_options[] = {
- { .name = "port", .has_arg = 1, .val = 'p' },
- { .name = "ib-dev", .has_arg = 1, .val = 'd' },
- { .name = "ib-port", .has_arg = 1, .val = 'i' },
- { .name = "size", .has_arg = 1, .val = 's' },
- { .name = "iters", .has_arg = 1, .val = 'n' },
- { .name = "tx-depth", .has_arg = 1, .val = 't' },
- { .name = "inline_size", .has_arg = 1, .val = 'I' },
- { .name = "report-cycles", .has_arg = 0, .val = 'C' },
- { .name = "report-histogram",.has_arg = 0, .val = 'H' },
- { .name = "report-unsorted",.has_arg = 0, .val = 'U' },
- { .name = "cma", .has_arg = 0, .val = 'c' },
- { 0 }
- };
-
- c = getopt_long(argc, argv, "p:d:i:s:n:t:I:CHUc", long_options, NULL);
- if (c == -1)
- break;
-
- switch (c) {
- case 'p':
- data.port = strtol(optarg, NULL, 0);
- if (data.port < 0 || data.port > 65535) {
- usage(argv[0]);
- return 1;
- }
- break;
-
- case 'd':
- ib_devname = strdupa(optarg);
- break;
-
- case 'i':
- data.ib_port = strtol(optarg, NULL, 0);
- if (data.ib_port < 0) {
- usage(argv[0]);
- return 2;
- }
- break;
-
- case 's':
- data.size = strtol(optarg, NULL, 0);
- if (data.size < 1) { usage(argv[0]); return 3; }
- break;
-
- case 't':
- data.tx_depth = strtol(optarg, NULL, 0);
- if (data.tx_depth < 1) { usage(argv[0]); return 4; }
- break;
-
- case 'n':
- iters = strtol(optarg, NULL, 0);
- if (iters < 2) {
- usage(argv[0]);
- return 5;
- }
- break;
-
- case 'I':
- inline_size = strtol(optarg, NULL, 0);
- break;
-
- case 'C':
- report.cycles = 1;
- break;
-
- case 'H':
- report.histogram = 1;
- break;
-
- case 'U':
- report.unsorted = 1;
- break;
-
- case 'c':
- data.use_cma = 1;
- break;
-
- default:
- usage(argv[0]);
- return 5;
- }
- }
-
- if (optind == argc - 1) {
- if (!argv[optind]) {
- usage(argv[0]);
- return 6;
- }
- data.servername = strdupa(argv[optind]);
- }
- else if (optind < argc) {
- usage(argv[0]);
- return 7;
- }
-
- /*
- * Done with parameter parsing. Perform setup.
- */
- pid = getpid();
-
- srand48(pid * time(NULL));
- page_size = sysconf(_SC_PAGESIZE);
-
-
- if (data.use_cma) {
- data.cm_channel = rdma_create_event_channel();
- if (!data.cm_channel) {
- fprintf(stderr, "%d:%s: rdma_create_event_channel failed\n",
- pid, __func__);
- return 1;
- }
- if (rdma_create_id(data.cm_channel, &data.cm_id, NULL, RDMA_PS_TCP)) {
- fprintf(stderr, "%d:%s: rdma_create_id failed\n",
- pid, __func__);
- return 1;
- }
-
- if (data.servername) {
- ctx = pp_client_connect(&data);
- if (!ctx)
- return 1;
- } else {
- ctx = pp_server_connect(&data);
- if (!ctx)
- return 1;
- }
-
- printf("%d: Local address: LID %#04x, QPN %#06x, PSN %#06x "
- "RKey %#08x VAddr %#016Lx\n", pid,
- data.my_dest.lid, data.my_dest.qpn, data.my_dest.psn,
- data.my_dest.rkey, data.my_dest.vaddr);
-
- printf("%d: Remote address: LID %#04x, QPN %#06x, PSN %#06x, "
- "RKey %#08x VAddr %#016Lx\n\n", pid,
- data.rem_dest->lid, data.rem_dest->qpn, data.rem_dest->psn,
- data.rem_dest->rkey, data.rem_dest->vaddr);
-
- if (data.servername) {
- pp_send_start(ctx);
- } else {
- pp_wait_for_start(ctx);
- }
-
- } else {
- data.ib_dev = pp_find_dev(ib_devname);
- if (!data.ib_dev)
- return 7;
-
- if (data.servername) {
- ctx = pp_client_connect(&data);
- if (!ctx)
- return 8;
- } else {
- ctx = pp_server_connect(&data);
- if (!ctx)
- return 8;
- }
-
- if (pp_open_port(ctx, &data))
- return 9;
- }
- wr = &ctx->wr;
- ctx->list.addr = (uintptr_t) ctx->buf;
- ctx->list.length = ctx->size;
- ctx->list.lkey = ctx->mr->lkey;
- wr->wr.rdma.remote_addr = data.rem_dest->vaddr;
- wr->wr.rdma.rkey = data.rem_dest->rkey;
- ctx->wr.wr_id = PINGPONG_RDMA_WRID;
- ctx->wr.sg_list = &ctx->list;
- ctx->wr.num_sge = 1;
- ctx->wr.opcode = IBV_WR_RDMA_WRITE;
- if (ctx->size > inline_size || ctx->size == 0) {
- ctx->wr.send_flags = IBV_SEND_SIGNALED;
- } else {
- ctx->wr.send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE;
- }
- ctx->wr.next = NULL;
-
- scnt = 0;
- rcnt = 0;
- ccnt = 0;
- poll_buf = ctx->poll_buf;
- post_buf = ctx->post_buf;
- qp = ctx->qp;
-
- tstamp = malloc(iters * sizeof *tstamp);
- if (!tstamp) {
- perror("malloc");
- return 10;
- }
-
- /* Done with setup. Start the test. */
-
- while (scnt < iters || ccnt < iters || rcnt < iters) {
-
- /* Wait till buffer changes. */
- if (rcnt < iters && !(scnt < 1 && data.servername)) {
- ++rcnt;
- while (*poll_buf != (char)rcnt)
- ;
- /* Here the data is already in the physical memory.
- If we wanted to actually use it, we may need
- a read memory barrier here. */
- }
-
- if (scnt < iters) {
- struct ibv_send_wr *bad_wr;
- tstamp[scnt] = get_cycles();
-
- *post_buf = (char)++scnt;
- if (ibv_post_send(qp, wr, &bad_wr)) {
- fprintf(stderr, "Couldn't post send: scnt=%d\n",
- scnt);
- return 11;
- }
- }
-
- if (ccnt < iters) {
- struct ibv_wc wc;
- int ne;
- ++ccnt;
- do {
- ne = ibv_poll_cq(ctx->scq, 1, &wc);
- } while (ne == 0);
-
- if (ne < 0) {
- fprintf(stderr, "poll CQ failed %d\n", ne);
- return 12;
- }
- if (wc.status != IBV_WC_SUCCESS) {
- fprintf(stderr, "Completion wth error at %s:\n",
- servername ? "client" : "server");
- fprintf(stderr, "Failed status %d: wr_id %d\n",
- wc.status, (int) wc.wr_id);
- fprintf(stderr, "scnt=%d, rcnt=%d, ccnt=%d\n",
- scnt, rcnt, ccnt);
- return 13;
- }
- }
- }
- if (data.use_cma) {
- pp_send_done(ctx);
- pp_wait_for_done(ctx);
- pp_close_cma(data);
- }
-
- print_report(&report, iters, tstamp);
- return 0;
-}
+/*\r
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.\r
+ * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.\r
+ * Copyright (c) 2005 Hewlett Packard, Inc (Grant Grundler)\r
+ * Copyright (c) 2008-2009 Intel Corporation. All rights reserved.\r
+ *\r
+ * This software is available to you under the OpenIB.org BSD license\r
+ * below:\r
+ *\r
+ * Redistribution and use in source and binary forms, with or\r
+ * without modification, are permitted provided that the following\r
+ * conditions are met:\r
+ *\r
+ * - Redistributions of source code must retain the above\r
+ * copyright notice, this list of conditions and the following\r
+ * disclaimer.\r
+ *\r
+ * - Redistributions in binary form must reproduce the above\r
+ * copyright notice, this list of conditions and the following\r
+ * disclaimer in the documentation and/or other materials\r
+ * provided with the distribution.\r
+ *\r
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV\r
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
+ * SOFTWARE.\r
+ */\r
+\r
+#include <stdio.h>\r
+#include <stdlib.h>\r
+#include <string.h>\r
+#include <ws2tcpip.h>\r
+#include <winsock2.h>\r
+#include <time.h>\r
+\r
+#include "..\..\..\etc\user\getopt.c"\r
+#include <infiniband/verbs.h>\r
+#include <rdma/rdma_cma.h>\r
+\r
+#define PINGPONG_RDMA_WRID 3\r
+#define MAX_INLINE 400\r
+\r
+static int inline_size = MAX_INLINE;\r
+\r
+struct report_options {\r
+ int unsorted;\r
+ int histogram;\r
+ int cycles; /* report delta's in cycles, not microsec's */\r
+};\r
+\r
+\r
+struct pingpong_context {\r
+ struct ibv_context *context;\r
+ struct ibv_pd *pd;\r
+ struct ibv_mr *mr;\r
+ struct ibv_cq *rcq;\r
+ struct ibv_cq *scq;\r
+ struct ibv_qp *qp;\r
+ void *buf;\r
+ volatile char *post_buf;\r
+ volatile char *poll_buf;\r
+ int size;\r
+ int tx_depth;\r
+ struct ibv_sge list;\r
+ struct ibv_send_wr wr;\r
+};\r
+\r
+struct pingpong_dest {\r
+ int lid;\r
+ int qpn;\r
+ int psn;\r
+ unsigned rkey;\r
+ unsigned long long vaddr;\r
+};\r
+\r
+struct pp_data {\r
+ int port;\r
+ int ib_port;\r
+ unsigned size;\r
+ int tx_depth;\r
+ int use_cma;\r
+ SOCKET sockfd;\r
+ char *servername;\r
+ struct pingpong_dest my_dest;\r
+ struct pingpong_dest *rem_dest;\r
+ struct ibv_device *ib_dev;\r
+ struct rdma_event_channel *cm_channel;\r
+ struct rdma_cm_id *cm_id;\r
+\r
+};\r
+\r
+static void pp_post_recv(struct pingpong_context *);\r
+static void pp_wait_for_done(struct pingpong_context *);\r
+static void pp_send_done(struct pingpong_context *);\r
+static void pp_wait_for_start(struct pingpong_context *);\r
+static void pp_send_start(struct pingpong_context *);\r
+static void pp_close_cma(struct pp_data );\r
+static struct pingpong_context *pp_init_ctx(void *, struct pp_data *);\r
+\r
+typedef UINT64 cycles_t;\r
+\r
+static __inline UINT64 get_cycles()\r
+{\r
+ LARGE_INTEGER counter;\r
+ QueryPerformanceCounter(&counter);\r
+ return counter.QuadPart;\r
+}\r
+\r
+static __inline UINT64 get_freq()\r
+{\r
+ LARGE_INTEGER freq;\r
+ QueryPerformanceFrequency(&freq);\r
+ return freq.QuadPart;\r
+}\r
+\r
+static uint16_t pp_get_local_lid(struct pingpong_context *ctx, int port)\r
+{\r
+ struct ibv_port_attr attr;\r
+\r
+ if (ibv_query_port(ctx->context, (uint8_t) port, &attr))\r
+ return 0;\r
+\r
+ return attr.lid;\r
+}\r
+\r
+static struct ibv_device *pp_find_dev(const char *ib_devname)\r
+{\r
+ struct ibv_device **dev_list;\r
+ struct ibv_device *ib_dev = NULL;\r
+\r
+ dev_list = ibv_get_device_list(NULL);\r
+\r
+ if (!ib_devname) {\r
+ ib_dev = dev_list[0];\r
+ if (!ib_dev)\r
+ fprintf(stderr, "No IB devices found\n");\r
+ } else {\r
+ for (; (ib_dev = *dev_list); ++dev_list) {\r
+ if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))\r
+ break;\r
+ }\r
+ if (!ib_dev)\r
+ fprintf(stderr, "IB device %s not found\n", ib_devname);\r
+ }\r
+ return ib_dev;\r
+}\r
+\r
+#define KEY_MSG_SIZE (sizeof "0000:000000:000000:00000000:0000000000000000")\r
+#define KEY_PRINT_FMT "%04x:%06x:%06x:%08x:%016Lx"\r
+\r
+static int pp_write_keys(SOCKET sockfd, const struct pingpong_dest *my_dest)\r
+{\r
+ char msg[KEY_MSG_SIZE];\r
+\r
+ sprintf(msg, KEY_PRINT_FMT, my_dest->lid, my_dest->qpn,\r
+ my_dest->psn, my_dest->rkey, my_dest->vaddr);\r
+\r
+ if (send(sockfd, msg, sizeof msg, 0) != sizeof msg) {\r
+ perror("client send");\r
+ fprintf(stderr, "Couldn't send local address\n");\r
+ return -1;\r
+ }\r
+\r
+ return 0;\r
+}\r
+\r
+static int pp_read_keys(SOCKET sockfd, const struct pingpong_dest *my_dest,\r
+ struct pingpong_dest *rem_dest)\r
+{\r
+ int parsed;\r
+ char msg[KEY_MSG_SIZE];\r
+\r
+ if (recv(sockfd, msg, sizeof msg, 0) != sizeof msg) {\r
+ perror("pp_read_keys");\r
+ fprintf(stderr, "Couldn't recv remote address\n");\r
+ return -1;\r
+ }\r
+\r
+ parsed = sscanf(msg, KEY_PRINT_FMT, &rem_dest->lid, &rem_dest->qpn,\r
+ &rem_dest->psn, &rem_dest->rkey, &rem_dest->vaddr);\r
+\r
+ if (parsed != 5) {\r
+ fprintf(stderr, "Couldn't parse line <%.*s>\n",\r
+ (int)sizeof msg, msg);\r
+ return -1;\r
+ }\r
+\r
+ return 0;\r
+}\r
+\r
+static struct pingpong_context *pp_client_connect(struct pp_data *data)\r
+{\r
+ struct addrinfo *res, *t;\r
+ struct addrinfo hints;\r
+ char service[6];\r
+ int n;\r
+ SOCKET sockfd = INVALID_SOCKET;\r
+ struct rdma_cm_event *event;\r
+ struct sockaddr_in sin;\r
+ struct pingpong_context *ctx = NULL;\r
+ struct rdma_conn_param conn_param;\r
+\r
+ memset(&hints, 0, sizeof hints);\r
+ hints.ai_flags = AI_PASSIVE;\r
+ hints.ai_family = AF_UNSPEC;\r
+ hints.ai_socktype = SOCK_STREAM;\r
+ sprintf(service, "%d", data->port);\r
+\r
+ n = getaddrinfo(data->servername, service, &hints, &res);\r
+ if (n < 0) {\r
+ fprintf(stderr, "%s for %s:%d\n", gai_strerror(n),\r
+ data->servername, data->port);\r
+ goto err4;\r
+ }\r
+\r
+ if (data->use_cma) {\r
+ sin.sin_addr.s_addr = ((struct sockaddr_in*)res->ai_addr)->sin_addr.s_addr;\r
+ sin.sin_family = AF_INET;\r
+ sin.sin_port = htons((u_short) data->port);\r
+ if (rdma_resolve_addr(data->cm_id, NULL,\r
+ (struct sockaddr *)&sin, 2000)) {\r
+ fprintf(stderr, "rdma_resolve_addr failed\n");\r
+ goto err2;\r
+ }\r
+ \r
+ if (rdma_get_cm_event(data->cm_channel, &event)) \r
+ goto err2;\r
+\r
+ if (event->event != RDMA_CM_EVENT_ADDR_RESOLVED) {\r
+ fprintf(stderr, "unexpected CM event resolving addr %d\n", event->event);\r
+ goto err1;\r
+ }\r
+ rdma_ack_cm_event(event);\r
+ \r
+ if (rdma_resolve_route(data->cm_id, 2000)) {\r
+ fprintf(stderr, "rdma_resolve_route failed\n");\r
+ goto err2;\r
+ }\r
+ \r
+ if (rdma_get_cm_event(data->cm_channel, &event))\r
+ goto err2;\r
+\r
+ if (event->event != RDMA_CM_EVENT_ROUTE_RESOLVED) {\r
+ fprintf(stderr, "unexpected CM event resolving route %d\n", event->event);\r
+ rdma_ack_cm_event(event);\r
+ goto err1;\r
+ }\r
+ rdma_ack_cm_event(event);\r
+ ctx = pp_init_ctx(data->cm_id, data);\r
+ if (!ctx) {\r
+ fprintf(stderr, "pp_init_ctx failed\n");\r
+ goto err2;\r
+ }\r
+ data->my_dest.psn = rand() & 0xffffff;\r
+ data->my_dest.qpn = 0;\r
+ data->my_dest.rkey = ctx->mr->rkey;\r
+ data->my_dest.vaddr = (uintptr_t)ctx->buf + ctx->size;\r
+ \r
+ memset(&conn_param, 0, sizeof conn_param);\r
+ conn_param.responder_resources = 1;\r
+ conn_param.initiator_depth = 1;\r
+ conn_param.retry_count = 5;\r
+ conn_param.private_data = &data->my_dest;\r
+ conn_param.private_data_len = sizeof(data->my_dest);\r
+\r
+ if (rdma_connect(data->cm_id, &conn_param)) {\r
+ fprintf(stderr, "rdma_connect failure\n");\r
+ goto err2;\r
+ }\r
+ \r
+ if (rdma_get_cm_event(data->cm_channel, &event))\r
+ goto err2;\r
+ \r
+ if (event->event != RDMA_CM_EVENT_ESTABLISHED) {\r
+ fprintf(stderr, "unexpected CM event connecting %d\n", event->event);\r
+ goto err1;\r
+ }\r
+ if (!event->param.conn.private_data || \r
+ (event->param.conn.private_data_len < sizeof(*data->rem_dest))) {\r
+ fprintf(stderr, "bad private data ptr %p len %d\n", \r
+ event->param.conn.private_data, \r
+ event->param.conn.private_data_len);\r
+ goto err1;\r
+ }\r
+ data->rem_dest = malloc(sizeof *data->rem_dest);\r
+ if (!data->rem_dest)\r
+ goto err1;\r
+ \r
+ memcpy(data->rem_dest, event->param.conn.private_data, sizeof(*data->rem_dest));\r
+ rdma_ack_cm_event(event);\r
+ } else {\r
+ for (t = res; t; t = t->ai_next) {\r
+ sockfd = socket(t->ai_family, t->ai_socktype,\r
+ t->ai_protocol);\r
+ if (sockfd != INVALID_SOCKET) {\r
+ if (!connect(sockfd, t->ai_addr, t->ai_addrlen))\r
+ break;\r
+ closesocket(sockfd);\r
+ sockfd = INVALID_SOCKET;\r
+ }\r
+ }\r
+ if (sockfd == INVALID_SOCKET) {\r
+ fprintf(stderr, "Couldn't connect to %s:%d\n",\r
+ data->servername, data->port);\r
+ goto err3;\r
+ }\r
+ ctx = pp_init_ctx(data->ib_dev, data);\r
+ if (!ctx)\r
+ goto err3;\r
+ data->sockfd = sockfd;\r
+ }\r
+\r
+ freeaddrinfo(res);\r
+ return ctx;\r
+\r
+err1:\r
+ rdma_ack_cm_event(event);\r
+err2:\r
+ rdma_destroy_id(data->cm_id);\r
+ rdma_destroy_event_channel(data->cm_channel);\r
+err3: \r
+ freeaddrinfo(res);\r
+err4: \r
+ return NULL;\r
+\r
+}\r
+\r
+\r
+static int pp_client_exch_dest(struct pp_data *data)\r
+{\r
+ if (data->rem_dest != NULL)\r
+ free(data->rem_dest);\r
+\r
+ data->rem_dest = malloc(sizeof *data->rem_dest);\r
+ if (!data->rem_dest)\r
+ return -1;\r
+\r
+ if (pp_write_keys(data->sockfd, &data->my_dest))\r
+ return -1;\r
+\r
+ return pp_read_keys(data->sockfd, &data->my_dest, data->rem_dest);\r
+}\r
+\r
+static struct pingpong_context *pp_server_connect(struct pp_data *data)\r
+{\r
+ struct addrinfo *res, *t;\r
+ struct addrinfo hints;\r
+ char service[6];\r
+ SOCKET sockfd = INVALID_SOCKET, connfd;\r
+ int n;\r
+ struct rdma_cm_event *event;\r
+ struct sockaddr_in sin;\r
+ struct pingpong_context *ctx = NULL;\r
+ struct rdma_cm_id *child_cm_id;\r
+ struct rdma_conn_param conn_param;\r
+\r
+ memset(&hints, 0, sizeof hints);\r
+ hints.ai_flags = AI_PASSIVE;\r
+ hints.ai_family = AF_UNSPEC;\r
+ hints.ai_socktype = SOCK_STREAM;\r
+ sprintf(service, "%d", data->port);\r
+\r
+ if ( (n = getaddrinfo(NULL, service, &hints, &res)) < 0 ) {\r
+ fprintf(stderr, "%s for port %d\n", gai_strerror(n), data->port);\r
+ goto err5;\r
+ }\r
+\r
+ if (data->use_cma) {\r
+ sin.sin_addr.s_addr = 0;\r
+ sin.sin_family = AF_INET;\r
+ sin.sin_port = htons((u_short) data->port);\r
+ if (rdma_bind_addr(data->cm_id, (struct sockaddr *)&sin)) {\r
+ fprintf(stderr, "rdma_bind_addr failed\n");\r
+ goto err3;\r
+ }\r
+ \r
+ if (rdma_listen(data->cm_id, 0)) {\r
+ fprintf(stderr, "rdma_listen failed\n");\r
+ goto err3;\r
+ }\r
+ \r
+ if (rdma_get_cm_event(data->cm_channel, &event)) \r
+ goto err3;\r
+\r
+ if (event->event != RDMA_CM_EVENT_CONNECT_REQUEST) {\r
+ fprintf(stderr, "bad event waiting for connect request %d\n", event->event);\r
+ goto err2;\r
+ }\r
+ \r
+ if (!event->param.conn.private_data ||\r
+ (event->param.conn.private_data_len < sizeof(*data->rem_dest))) {\r
+ fprintf(stderr, "bad private data len %d\n",\r
+ event->param.conn.private_data_len);\r
+ goto err2;\r
+ }\r
+ \r
+ data->rem_dest = malloc(sizeof *data->rem_dest);\r
+ if (!data->rem_dest)\r
+ goto err2;\r
+\r
+ memcpy(data->rem_dest, event->param.conn.private_data, sizeof(*data->rem_dest));\r
+\r
+ child_cm_id = (struct rdma_cm_id *)event->id;\r
+ ctx = pp_init_ctx(child_cm_id, data);\r
+ if (!ctx) {\r
+ free(data->rem_dest);\r
+ goto err1;\r
+ }\r
+ data->my_dest.psn = rand() & 0xffffff;\r
+ data->my_dest.qpn = 0;\r
+ data->my_dest.rkey = ctx->mr->rkey;\r
+ data->my_dest.vaddr = (uintptr_t)ctx->buf + ctx->size;\r
+\r
+ memset(&conn_param, 0, sizeof conn_param);\r
+ conn_param.responder_resources = 1;\r
+ conn_param.initiator_depth = 1;\r
+ conn_param.private_data = &data->my_dest;\r
+ conn_param.private_data_len = sizeof(data->my_dest);\r
+ if (rdma_accept(child_cm_id, &conn_param)) {\r
+ fprintf(stderr, "rdma_accept failed\n");\r
+ goto err1;\r
+ } \r
+ rdma_ack_cm_event(event);\r
+ if (rdma_get_cm_event(data->cm_channel, &event)) {\r
+ fprintf(stderr, "rdma_get_cm_event error\n");\r
+ rdma_destroy_id(child_cm_id);\r
+ goto err3;\r
+ }\r
+ if (event->event != RDMA_CM_EVENT_ESTABLISHED) {\r
+ fprintf(stderr, "bad event waiting for established %d\n", event->event);\r
+ goto err1;\r
+ }\r
+ rdma_ack_cm_event(event); \r
+ } else {\r
+ for (t = res; t; t = t->ai_next) {\r
+ sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);\r
+ if (sockfd != INVALID_SOCKET) {\r
+ n = 1;\r
+ \r
+ if (!bind(sockfd, t->ai_addr, t->ai_addrlen))\r
+ break;\r
+ closesocket(sockfd);\r
+ sockfd = INVALID_SOCKET;\r
+ }\r
+ }\r
+ \r
+ if (sockfd == INVALID_SOCKET) {\r
+ fprintf(stderr, "Couldn't listen to port %d\n", data->port);\r
+ goto err4;\r
+ }\r
+ \r
+ listen(sockfd, 1);\r
+ connfd = accept(sockfd, NULL, 0);\r
+ if (connfd == INVALID_SOCKET) {\r
+ perror("server accept");\r
+ fprintf(stderr, "accept() failed\n");\r
+ closesocket(sockfd);\r
+ goto err4;\r
+ }\r
+ \r
+ closesocket(sockfd);\r
+\r
+ ctx = pp_init_ctx(data->ib_dev, data);\r
+ if (!ctx)\r
+ goto err4;\r
+ data->sockfd = connfd;\r
+ }\r
+ freeaddrinfo(res);\r
+ return ctx;\r
+\r
+err1:\r
+ rdma_destroy_id(child_cm_id);\r
+err2:\r
+ rdma_ack_cm_event(event);\r
+err3:\r
+ rdma_destroy_id(data->cm_id);\r
+ rdma_destroy_event_channel(data->cm_channel);\r
+err4: \r
+ freeaddrinfo(res);\r
+err5: \r
+ return NULL;\r
+\r
+}\r
+\r
+static int pp_server_exch_dest(struct pp_data *data)\r
+{\r
+ if (data->rem_dest != NULL)\r
+ free(data->rem_dest);\r
+ data->rem_dest = malloc(sizeof *data->rem_dest);\r
+\r
+ if (!data->rem_dest)\r
+ return -1;\r
+\r
+ if (pp_read_keys(data->sockfd, &data->my_dest, data->rem_dest))\r
+ return -1;\r
+\r
+ return pp_write_keys(data->sockfd, &data->my_dest);\r
+}\r
+\r
+static struct pingpong_context *pp_init_ctx(void *ptr, struct pp_data *data)\r
+{\r
+ struct pingpong_context *ctx;\r
+ struct ibv_device *ib_dev;\r
+ struct rdma_cm_id *cm_id;\r
+ struct ibv_qp_init_attr attr;\r
+\r
+ ctx = malloc(sizeof *ctx);\r
+ if (!ctx)\r
+ return NULL;\r
+\r
+ ctx->size = data->size;\r
+ ctx->tx_depth = data->tx_depth;\r
+\r
+ ctx->buf = malloc(ctx->size * 2);\r
+ if (!ctx->buf) {\r
+ fprintf(stderr, "Couldn't allocate work buf.\n");\r
+ return NULL;\r
+ }\r
+\r
+ memset(ctx->buf, 0, ctx->size * 2);\r
+\r
+ ctx->post_buf = (char *)ctx->buf + (ctx->size -1);\r
+ ctx->poll_buf = (char *)ctx->buf + (2 * ctx->size -1);\r
+ \r
+ if (data->use_cma) {\r
+ cm_id = (struct rdma_cm_id *)ptr;\r
+ ctx->context = cm_id->verbs;\r
+ if (!ctx->context) {\r
+ fprintf(stderr, "Unbound cm_id!!\n");\r
+ return NULL;\r
+ }\r
+ \r
+ } else {\r
+ ib_dev = (struct ibv_device *)ptr;\r
+ ctx->context = ibv_open_device(ib_dev);\r
+ if (!ctx->context) {\r
+ fprintf(stderr, "Couldn't get context for %s\n",\r
+ ibv_get_device_name(ib_dev));\r
+ return NULL;\r
+ }\r
+ }\r
+\r
+ ctx->pd = ibv_alloc_pd(ctx->context);\r
+ if (!ctx->pd) {\r
+ fprintf(stderr, "Couldn't allocate PD\n");\r
+ return NULL;\r
+ }\r
+\r
+ /* We dont really want IBV_ACCESS_LOCAL_WRITE, but IB spec says:\r
+ * The Consumer is not allowed to assign Remote Write or Remote Atomic to\r
+ * a Memory Region that has not been assigned Local Write. */\r
+ ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, ctx->size * 2,\r
+ IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);\r
+ if (!ctx->mr) {\r
+ fprintf(stderr, "Couldn't allocate MR\n");\r
+ return NULL;\r
+ }\r
+\r
+ ctx->rcq = ibv_create_cq(ctx->context, 1, NULL, NULL, 0);\r
+ if (!ctx->rcq) {\r
+ fprintf(stderr, "%Couldn't create recv CQ\n");\r
+ return NULL;\r
+ }\r
+\r
+ ctx->scq = ibv_create_cq(ctx->context, ctx->tx_depth, ctx, NULL, 0);\r
+ if (!ctx->scq) {\r
+ fprintf(stderr, "Couldn't create send CQ\n");\r
+ return NULL;\r
+ }\r
+\r
+ memset(&attr, 0, sizeof attr);\r
+ attr.send_cq = ctx->scq;\r
+ attr.recv_cq = ctx->rcq;\r
+ attr.cap.max_send_wr = ctx->tx_depth;\r
+ attr.cap.max_recv_wr = 1;\r
+ attr.cap.max_send_sge = 1;\r
+ attr.cap.max_recv_sge = 1;\r
+ attr.cap.max_inline_data = inline_size;\r
+ attr.qp_type = IBV_QPT_RC;\r
+\r
+ if (data->use_cma) {\r
+ if (rdma_create_qp(cm_id, ctx->pd, &attr)) {\r
+ fprintf(stderr, "Couldn't create QP\n");\r
+ return NULL;\r
+ }\r
+ ctx->qp = cm_id->qp;\r
+ pp_post_recv(ctx);\r
+ } else {\r
+ ctx->qp = ibv_create_qp(ctx->pd, &attr);\r
+ if (!ctx->qp) {\r
+ fprintf(stderr, "Couldn't create QP\n");\r
+ return NULL;\r
+ }\r
+ {\r
+ struct ibv_qp_attr attr;\r
+ \r
+ attr.qp_state = IBV_QPS_INIT;\r
+ attr.pkey_index = 0;\r
+ attr.port_num = (uint8_t) data->ib_port;\r
+ attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE;\r
+ \r
+ if (ibv_modify_qp(ctx->qp, &attr,\r
+ IBV_QP_STATE |\r
+ IBV_QP_PKEY_INDEX |\r
+ IBV_QP_PORT |\r
+ IBV_QP_ACCESS_FLAGS)) {\r
+ fprintf(stderr, "Failed to modify QP to INIT\n");\r
+ return NULL;\r
+ }\r
+ }\r
+ }\r
+\r
+ return ctx; \r
+}\r
+\r
+static int pp_connect_ctx(struct pingpong_context *ctx, struct pp_data *data) \r
+{\r
+ struct ibv_qp_attr attr;\r
+ \r
+ memset(&attr, 0, sizeof attr);\r
+ \r
+ attr.qp_state = IBV_QPS_RTR,\r
+ attr.path_mtu = IBV_MTU_256,\r
+ attr.dest_qp_num = data->rem_dest->qpn,\r
+ attr.rq_psn = data->rem_dest->psn,\r
+ attr.max_dest_rd_atomic = 1,\r
+ attr.min_rnr_timer = 12,\r
+ attr.ah_attr.is_global = 0,\r
+ attr.ah_attr.dlid = (uint16_t) data->rem_dest->lid,\r
+ attr.ah_attr.sl = 0,\r
+ attr.ah_attr.src_path_bits = 0,\r
+ attr.ah_attr.port_num = (uint8_t) data->ib_port;\r
+\r
+ if (ibv_modify_qp(ctx->qp, &attr,\r
+ IBV_QP_STATE |\r
+ IBV_QP_AV |\r
+ IBV_QP_PATH_MTU |\r
+ IBV_QP_DEST_QPN |\r
+ IBV_QP_RQ_PSN |\r
+ IBV_QP_MAX_DEST_RD_ATOMIC |\r
+ IBV_QP_MIN_RNR_TIMER)) {\r
+ fprintf(stderr, "Failed to modify QP to RTR\n");\r
+ return 1;\r
+ }\r
+\r
+ attr.qp_state = IBV_QPS_RTS;\r
+ attr.timeout = 14;\r
+ attr.retry_cnt = 7;\r
+ attr.rnr_retry = 7;\r
+ attr.sq_psn = data->my_dest.psn;\r
+ attr.max_rd_atomic = 1;\r
+ if (ibv_modify_qp(ctx->qp, &attr,\r
+ IBV_QP_STATE |\r
+ IBV_QP_TIMEOUT |\r
+ IBV_QP_RETRY_CNT |\r
+ IBV_QP_RNR_RETRY |\r
+ IBV_QP_SQ_PSN |\r
+ IBV_QP_MAX_QP_RD_ATOMIC)) {\r
+ fprintf(stderr, "Failed to modify QP to RTS\n");\r
+ return 1;\r
+ }\r
+\r
+ return 0;\r
+}\r
+\r
+static int pp_open_port(struct pingpong_context *ctx, struct pp_data *data )\r
+{\r
+ char addr_fmt[] = "%8s address: LID %#04x QPN %#06x PSN %#06x RKey %#08x VAddr %#016Lx\n";\r
+\r
+ /* Create connection between client and server.\r
+ * We do it by exchanging data over a TCP socket connection. */\r
+\r
+ data->my_dest.lid = pp_get_local_lid(ctx, data->ib_port);\r
+ data->my_dest.qpn = ctx->qp->qp_num;\r
+ data->my_dest.psn = rand() & 0xffffff;\r
+ if (!data->my_dest.lid) {\r
+ fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n");\r
+ return -1;\r
+ }\r
+ data->my_dest.rkey = ctx->mr->rkey;\r
+ data->my_dest.vaddr = (uintptr_t)ctx->buf + ctx->size;\r
+\r
+ printf(addr_fmt, "local", data->my_dest.lid, data->my_dest.qpn, data->my_dest.psn,\r
+ data->my_dest.rkey, data->my_dest.vaddr);\r
+\r
+ if (data->servername) {\r
+ if (pp_client_exch_dest(data))\r
+ return 1;\r
+ } else {\r
+ if (pp_server_exch_dest(data)) \r
+ return 1;\r
+ }\r
+\r
+ printf(addr_fmt, "remote", data->rem_dest->lid, data->rem_dest->qpn,\r
+ data->rem_dest->psn, data->rem_dest->rkey, \r
+ data->rem_dest->vaddr);\r
+\r
+ if (pp_connect_ctx(ctx, data))\r
+ return 1;\r
+\r
+ /* An additional handshake is required *after* moving qp to RTR.\r
+ Arbitrarily reuse exch_dest for this purpose. */\r
+ if (data->servername) {\r
+ if (pp_client_exch_dest(data))\r
+ return -1;\r
+ } else {\r
+ if (pp_server_exch_dest(data))\r
+ return -1;\r
+ }\r
+\r
+ if (send(data->sockfd, "done", sizeof "done", 0) != sizeof "done"){\r
+ perror("send");\r
+ fprintf(stderr, "Couldn't send to socket\n");\r
+ return 1;\r
+ }\r
+\r
+ closesocket(data->sockfd);\r
+ \r
+ return 0;\r
+}\r
+\r
+static void pp_post_recv(struct pingpong_context *ctx)\r
+{\r
+ struct ibv_sge list;\r
+ struct ibv_recv_wr wr, *bad_wr;\r
+ int rc;\r
+\r
+ list.addr = (uintptr_t) ctx->buf;\r
+ list.length = 1;\r
+ list.lkey = ctx->mr->lkey;\r
+ wr.next = NULL;\r
+ wr.wr_id = 0xdeadbeef;\r
+ wr.sg_list = &list;\r
+ wr.num_sge = 1;\r
+\r
+ rc = ibv_post_recv(ctx->qp, &wr, &bad_wr);\r
+ if (rc) {\r
+ perror("ibv_post_recv");\r
+ fprintf(stderr, "ibv_post_recv failed %d\n", rc);\r
+ }\r
+}\r
+\r
+static void pp_wait_for_done(struct pingpong_context *ctx)\r
+{\r
+ struct ibv_wc wc;\r
+ int ne;\r
+\r
+ do {\r
+ ne = ibv_poll_cq(ctx->rcq, 1, &wc);\r
+ } while (ne == 0);\r
+\r
+ if (wc.status) \r
+ fprintf(stderr, "bad wc status %d\n", wc.status);\r
+ if (!(wc.opcode & IBV_WC_RECV))\r
+ fprintf(stderr, "bad wc opcode %d\n", wc.opcode);\r
+ if (wc.wr_id != 0xdeadbeef) \r
+ fprintf(stderr, "bad wc wr_id 0x%x\n", (int)wc.wr_id);\r
+}\r
+\r
+static void pp_send_done(struct pingpong_context *ctx)\r
+{\r
+ struct ibv_send_wr *bad_wr;\r
+ struct ibv_wc wc;\r
+ int ne;\r
+\r
+ ctx->list.addr = (uintptr_t) ctx->buf;\r
+ ctx->list.length = 1;\r
+ ctx->list.lkey = ctx->mr->lkey;\r
+ ctx->wr.wr_id = 0xcafebabe;\r
+ ctx->wr.sg_list = &ctx->list;\r
+ ctx->wr.num_sge = 1;\r
+ ctx->wr.opcode = IBV_WR_SEND;\r
+ ctx->wr.send_flags = IBV_SEND_SIGNALED;\r
+ ctx->wr.next = NULL;\r
+ if (ibv_post_send(ctx->qp, &ctx->wr, &bad_wr)) {\r
+ fprintf(stderr, "ibv_post_send failed\n");\r
+ return;\r
+ }\r
+ do {\r
+ ne = ibv_poll_cq(ctx->scq, 1, &wc);\r
+ } while (ne == 0);\r
+\r
+ if (wc.status) \r
+ fprintf(stderr, "bad wc status %d\n", wc.status);\r
+ if (wc.opcode != IBV_WC_SEND)\r
+ fprintf(stderr, "bad wc opcode %d\n", wc.opcode);\r
+ if (wc.wr_id != 0xcafebabe) \r
+ fprintf(stderr, "bad wc wr_id 0x%x\n", (int)wc.wr_id);\r
+}\r
+\r
+static void pp_wait_for_start(struct pingpong_context *ctx)\r
+{\r
+ struct ibv_wc wc;\r
+ int ne;\r
+\r
+ do {\r
+ ne = ibv_poll_cq(ctx->rcq, 1, &wc);\r
+ } while (ne == 0);\r
+\r
+ if (wc.status) \r
+ fprintf(stderr, "bad wc status %d\n", wc.status);\r
+ if (!(wc.opcode & IBV_WC_RECV))\r
+ fprintf(stderr, "bad wc opcode %d\n", wc.opcode);\r
+ if (wc.wr_id != 0xdeadbeef) \r
+ fprintf(stderr, "bad wc wr_id 0x%x\n", (int)wc.wr_id);\r
+ pp_post_recv(ctx);\r
+}\r
+\r
+static void pp_send_start(struct pingpong_context *ctx)\r
+{\r
+ struct ibv_send_wr *bad_wr;\r
+ struct ibv_wc wc;\r
+ int ne;\r
+\r
+ ctx->list.addr = (uintptr_t) ctx->buf;\r
+ ctx->list.length = 1;\r
+ ctx->list.lkey = ctx->mr->lkey;\r
+ ctx->wr.wr_id = 0xabbaabba;\r
+ ctx->wr.sg_list = &ctx->list;\r
+ ctx->wr.num_sge = 1;\r
+ ctx->wr.opcode = IBV_WR_SEND;\r
+ ctx->wr.send_flags = IBV_SEND_SIGNALED;\r
+ ctx->wr.next = NULL;\r
+ if (ibv_post_send(ctx->qp, &ctx->wr, &bad_wr)) {\r
+ fprintf(stderr, "ibv_post_send failed\n");\r
+ return;\r
+ }\r
+ do {\r
+ ne = ibv_poll_cq(ctx->scq, 1, &wc);\r
+ } while (ne == 0);\r
+\r
+ if (wc.status) \r
+ fprintf(stderr, "bad wc status %d\n", wc.status);\r
+ if (wc.opcode != IBV_WC_SEND)\r
+ fprintf(stderr, "bad wc opcode %d\n", wc.opcode);\r
+ if (wc.wr_id != 0xabbaabba) \r
+ fprintf(stderr, "bad wc wr_id 0x%x\n", (int)wc.wr_id);\r
+}\r
+\r
+static void pp_close_cma(struct pp_data data)\r
+{\r
+ struct rdma_cm_event *event;\r
+ int rc;\r
+\r
+ if (data.servername) {\r
+ rc = rdma_disconnect(data.cm_id);\r
+ if (rc) {\r
+ perror("rdma_disconnect");\r
+ fprintf(stderr, "rdma disconnect error\n");\r
+ return;\r
+ }\r
+ }\r
+\r
+ rdma_get_cm_event(data.cm_channel, &event);\r
+ if (event->event != RDMA_CM_EVENT_DISCONNECTED)\r
+ fprintf(stderr, "unexpected event during disconnect %d\n", \r
+ event->event);\r
+ rdma_ack_cm_event(event);\r
+ rdma_destroy_id(data.cm_id);\r
+ rdma_destroy_event_channel(data.cm_channel);\r
+}\r
+\r
+static void usage(const char *argv0)\r
+{\r
+ printf("Usage:\n");\r
+ printf(" %s start a server and wait for connection\n", argv0);\r
+ printf(" %s -h <host> connect to server at <host>\n", argv0);\r
+ printf("\n");\r
+ printf("Options:\n");\r
+ printf(" -p, --port=<port> listen on/connect to port <port> (default 18515)\n");\r
+ printf(" -d, --ib-dev=<dev> use IB device <dev> (default first device found)\n");\r
+ printf(" -i, --ib-port=<port> use port <port> of IB device (default 1)\n");\r
+ printf(" -s, --size=<size> size of message to exchange (default 1)\n");\r
+ printf(" -t, --tx-depth=<dep> size of tx queue (default 50)\n");\r
+ printf(" -n, --iters=<iters> number of exchanges (at least 2, default 1000)\n");\r
+ printf(" -I, --inline_size=<size> max size of message to be sent in inline mode (default 400)\n");\r
+ printf(" -C, --report-cycles report times in cpu cycle units (default microseconds)\n");\r
+ printf(" -H, --report-histogram print out all results (default print summary only)\n");\r
+ printf(" -U, --report-unsorted (implies -H) print out unsorted results (default sorted)\n");\r
+ printf(" -c, --cma Use the RDMA CMA to setup the RDMA connection\n");\r
+}\r
+\r
+/*\r
+ * When there is an\r
+ * odd number of samples, the median is the middle number.\r
+ * even number of samples, the median is the mean of the\r
+ * two middle numbers.\r
+ *\r
+ */\r
+static cycles_t get_median(int n, cycles_t delta[])\r
+{\r
+ if ((n - 1) % 2)\r
+ return (delta[n / 2] + delta[n / 2 - 1]) / 2;\r
+ else\r
+ return delta[n / 2];\r
+}\r
+\r
+static int __cdecl cycles_compare(const void * aptr, const void * bptr)\r
+{\r
+ const cycles_t *a = aptr;\r
+ const cycles_t *b = bptr;\r
+ if (*a < *b) return -1;\r
+ if (*a > *b) return 1;\r
+ return 0;\r
+}\r
+\r
+static void print_report(struct report_options * options,\r
+ unsigned int iters, cycles_t *tstamp)\r
+{\r
+ cycles_t cycles_to_units;\r
+ cycles_t median;\r
+ unsigned int i;\r
+ const char* units;\r
+ cycles_t *delta = malloc((iters - 1) * sizeof *delta);\r
+\r
+ if (!delta) {\r
+ perror("malloc");\r
+ return;\r
+ }\r
+\r
+ for (i = 0; i < iters - 1; ++i)\r
+ delta[i] = tstamp[i + 1] - tstamp[i];\r
+\r
+\r
+ if (options->cycles) {\r
+ cycles_to_units = 1;\r
+ units = "cycles";\r
+ } else {\r
+ cycles_to_units = get_freq();\r
+ units = "usec";\r
+ }\r
+\r
+ if (options->unsorted) {\r
+ printf("#, %s\n", units);\r
+ for(i = 0; i < iters - 1; ++i)\r
+ printf("%d, %g\n", i + 1, (double) delta[i] / (double) cycles_to_units / 2.0);\r
+ }\r
+\r
+ qsort(delta, iters - 1, sizeof *delta, cycles_compare);\r
+\r
+ if (options->histogram) {\r
+ printf("#, %s\n", units);\r
+ for(i = 0; i < iters - 1; ++i)\r
+ printf("%d, %g\n", i + 1, (double) delta[i] / (double) cycles_to_units / 2.0);\r
+ }\r
+\r
+ median = get_median(iters - 1, delta);\r
+\r
+ printf("Latency typical: %g %s\n",\r
+ (double) median / (double) cycles_to_units / 2.0, units);\r
+ printf("Latency best : %g %s\n",\r
+ (double) delta[0] / (double) cycles_to_units / 2.0, units);\r
+ printf("Latency worst : %g %s\n",\r
+ (double) delta[iters - 2] / (double) cycles_to_units / 2.0, units);\r
+\r
+ free(delta);\r
+}\r
+\r
+int __cdecl main(int argc, char *argv[])\r
+{\r
+ const char *ib_devname = NULL;\r
+ const char *servername = NULL;\r
+ int iters = 1000;\r
+ struct report_options report;\r
+ struct pingpong_context *ctx;\r
+ struct ibv_qp *qp;\r
+ struct ibv_send_wr *wr;\r
+ volatile char *poll_buf;\r
+ volatile char *post_buf;\r
+ int scnt, rcnt, ccnt;\r
+ cycles_t *tstamp;\r
+ struct pp_data data;\r
+ WORD version;\r
+ WSADATA wsdata;\r
+ int err;\r
+\r
+ srand((unsigned int) time(NULL));\r
+ version = MAKEWORD(2, 2);\r
+ err = WSAStartup(version, &wsdata);\r
+ if (err)\r
+ return -1;\r
+\r
+ memset(&report, 0, sizeof report);\r
+ memset(&data, 0, sizeof data);\r
+ data.port = 18515;\r
+ data.ib_port = 1;\r
+ data.size = 1;\r
+ data.tx_depth = 50;\r
+ data.use_cma = 0;\r
+ data.servername = NULL;\r
+ data.rem_dest = NULL;\r
+ data.ib_dev = NULL;\r
+ data.cm_channel = NULL;\r
+ data.cm_id = NULL;\r
+\r
+ /* Parameter parsing. */\r
+ while (1) {\r
+ int c;\r
+\r
+ c = getopt(argc, argv, "h:p:d:i:s:n:t:I:CHUc");\r
+ if (c == -1)\r
+ break;\r
+\r
+ switch (c) {\r
+ case 'p':\r
+ data.port = strtol(optarg, NULL, 0);\r
+ if (data.port < 0 || data.port > 65535) {\r
+ usage(argv[0]);\r
+ return 1;\r
+ }\r
+ break;\r
+\r
+ case 'd':\r
+ ib_devname = _strdup(optarg);\r
+ break;\r
+\r
+ case 'i':\r
+ data.ib_port = strtol(optarg, NULL, 0);\r
+ if (data.ib_port < 0) {\r
+ usage(argv[0]);\r
+ return 2;\r
+ }\r
+ break;\r
+\r
+ case 's':\r
+ data.size = strtol(optarg, NULL, 0);\r
+ if (data.size < 1) { usage(argv[0]); return 3; }\r
+ break;\r
+\r
+ case 't':\r
+ data.tx_depth = strtol(optarg, NULL, 0);\r
+ if (data.tx_depth < 1) { usage(argv[0]); return 4; }\r
+ break;\r
+\r
+ case 'n':\r
+ iters = strtol(optarg, NULL, 0);\r
+ if (iters < 2) {\r
+ usage(argv[0]);\r
+ return 5;\r
+ }\r
+ break;\r
+\r
+ case 'I':\r
+ inline_size = strtol(optarg, NULL, 0);\r
+ break;\r
+\r
+ case 'C':\r
+ report.cycles = 1;\r
+ break;\r
+\r
+ case 'H':\r
+ report.histogram = 1;\r
+ break;\r
+\r
+ case 'U':\r
+ report.unsorted = 1;\r
+ break;\r
+ \r
+ case 'c':\r
+ data.use_cma = 1;\r
+ break;\r
+\r
+ case 'h':\r
+ if (optarg) {\r
+ data.servername = _strdup(optarg);\r
+ break;\r
+ }\r
+\r
+ default:\r
+ usage(argv[0]);\r
+ return 5;\r
+ }\r
+ }\r
+\r
+ if (data.use_cma) {\r
+ data.cm_channel = rdma_create_event_channel();\r
+ if (!data.cm_channel) {\r
+ fprintf(stderr, "rdma_create_event_channel failed\n");\r
+ return 1;\r
+ }\r
+ if (rdma_create_id(data.cm_channel, &data.cm_id, NULL, RDMA_PS_TCP)) {\r
+ fprintf(stderr, "rdma_create_id failed\n");\r
+ return 1;\r
+ }\r
+ \r
+ if (data.servername) {\r
+ ctx = pp_client_connect(&data);\r
+ if (!ctx) \r
+ return 1; \r
+ } else {\r
+ ctx = pp_server_connect(&data);\r
+ if (!ctx) \r
+ return 1; \r
+ }\r
+\r
+ printf("Local address: LID %#04x, QPN %#06x, PSN %#06x "\r
+ "RKey %#08x VAddr %#016Lx\n",\r
+ data.my_dest.lid, data.my_dest.qpn, data.my_dest.psn,\r
+ data.my_dest.rkey, data.my_dest.vaddr);\r
+\r
+ printf("Remote address: LID %#04x, QPN %#06x, PSN %#06x, "\r
+ "RKey %#08x VAddr %#016Lx\n\n",\r
+ data.rem_dest->lid, data.rem_dest->qpn, data.rem_dest->psn,\r
+ data.rem_dest->rkey, data.rem_dest->vaddr);\r
+\r
+ if (data.servername) {\r
+ pp_send_start(ctx);\r
+ } else {\r
+ pp_wait_for_start(ctx);\r
+ }\r
+\r
+ } else {\r
+ data.ib_dev = pp_find_dev(ib_devname);\r
+ if (!data.ib_dev)\r
+ return 7;\r
+ \r
+ if (data.servername) {\r
+ ctx = pp_client_connect(&data);\r
+ if (!ctx) \r
+ return 8;\r
+ } else {\r
+ ctx = pp_server_connect(&data);\r
+ if (!ctx) \r
+ return 8;\r
+ }\r
+\r
+ if (pp_open_port(ctx, &data))\r
+ return 9;\r
+ }\r
+ wr = &ctx->wr;\r
+ ctx->list.addr = (uintptr_t) ctx->buf;\r
+ ctx->list.length = ctx->size;\r
+ ctx->list.lkey = ctx->mr->lkey;\r
+ wr->wr.rdma.remote_addr = data.rem_dest->vaddr;\r
+ wr->wr.rdma.rkey = data.rem_dest->rkey;\r
+ ctx->wr.wr_id = PINGPONG_RDMA_WRID;\r
+ ctx->wr.sg_list = &ctx->list;\r
+ ctx->wr.num_sge = 1;\r
+ ctx->wr.opcode = IBV_WR_RDMA_WRITE;\r
+ if (ctx->size > inline_size || ctx->size == 0) {\r
+ ctx->wr.send_flags = IBV_SEND_SIGNALED;\r
+ } else {\r
+ ctx->wr.send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE;\r
+ }\r
+ ctx->wr.next = NULL;\r
+\r
+ scnt = 0;\r
+ rcnt = 0;\r
+ ccnt = 0;\r
+ poll_buf = ctx->poll_buf;\r
+ post_buf = ctx->post_buf;\r
+ qp = ctx->qp;\r
+\r
+ tstamp = malloc(iters * sizeof *tstamp);\r
+ if (!tstamp) {\r
+ perror("malloc");\r
+ return 10;\r
+ }\r
+\r
+ /* Done with setup. Start the test. */\r
+\r
+ while (scnt < iters || ccnt < iters || rcnt < iters) {\r
+\r
+ /* Wait till buffer changes. */\r
+ if (rcnt < iters && !(scnt < 1 && data.servername)) {\r
+ ++rcnt;\r
+ while (*poll_buf != (char)rcnt)\r
+ ;\r
+ /* Here the data is already in the physical memory.\r
+ If we wanted to actually use it, we may need\r
+ a read memory barrier here. */\r
+ }\r
+\r
+ if (scnt < iters) {\r
+ struct ibv_send_wr *bad_wr;\r
+ tstamp[scnt] = get_cycles();\r
+\r
+ *post_buf = (char)++scnt;\r
+ if (ibv_post_send(qp, wr, &bad_wr)) {\r
+ fprintf(stderr, "Couldn't post send: scnt=%d\n",\r
+ scnt);\r
+ return 11;\r
+ }\r
+ }\r
+\r
+ if (ccnt < iters) {\r
+ struct ibv_wc wc;\r
+ int ne;\r
+ ++ccnt;\r
+ do {\r
+ ne = ibv_poll_cq(ctx->scq, 1, &wc);\r
+ } while (ne == 0);\r
+\r
+ if (ne < 0) {\r
+ fprintf(stderr, "poll CQ failed %d\n", ne);\r
+ return 12;\r
+ }\r
+ if (wc.status != IBV_WC_SUCCESS) {\r
+ fprintf(stderr, "Completion wth error at %s:\n",\r
+ servername ? "client" : "server");\r
+ fprintf(stderr, "Failed status %d: wr_id %d\n",\r
+ wc.status, (int) wc.wr_id);\r
+ fprintf(stderr, "scnt=%d, rcnt=%d, ccnt=%d\n",\r
+ scnt, rcnt, ccnt);\r
+ return 13;\r
+ }\r
+ }\r
+ }\r
+ if (data.use_cma) {\r
+ pp_send_done(ctx);\r
+ pp_wait_for_done(ctx);\r
+ pp_close_cma(data);\r
+ }\r
+\r
+ print_report(&report, iters, tstamp);\r
+ return 0;\r
+}\r