+/*\r
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.\r
+ * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.\r
+ * Copyright (c) 2009 Intel Corp. All rights reserved.\r
+ *\r
+ * This software is available to you under the OpenIB.org BSD license\r
+ * below:\r
+ *\r
+ * Redistribution and use in source and binary forms, with or\r
+ * without modification, are permitted provided that the following\r
+ * conditions are met:\r
+ *\r
+ * - Redistributions of source code must retain the above\r
+ * copyright notice, this list of conditions and the following\r
+ * disclaimer.\r
+ *\r
+ * - Redistributions in binary form must reproduce the above\r
+ * copyright notice, this list of conditions and the following\r
+ * disclaimer in the documentation and/or other materials\r
+ * provided with the distribution.\r
+ *\r
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV\r
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
+ * SOFTWARE.\r
+ */\r
+\r
+#include <stdio.h>\r
+#include <stdlib.h>\r
+#include <string.h>\r
+#include <ws2tcpip.h>\r
+#include <winsock2.h>\r
+#include <time.h>\r
+\r
+#include "..\..\..\..\etc\user\getopt.c"\r
+#include <rdma/rdma_cma.h>\r
+\r
+#include <rdma/rdma_cma.h>\r
+\r
+static int debug = 0;\r
+#define DEBUG_LOG if (debug) printf\r
+\r
+/*\r
+ * rping "ping/pong" loop:\r
+ * client sends source rkey/addr/len\r
+ * server receives source rkey/add/len\r
+ * server rdma reads "ping" data from source\r
+ * server sends "go ahead" on rdma read completion\r
+ * client sends sink rkey/addr/len\r
+ * server receives sink rkey/addr/len\r
+ * server rdma writes "pong" data to sink\r
+ * server sends "go ahead" on rdma write completion\r
+ * <repeat loop>\r
+ */\r
+\r
+/*\r
+ * These states are used to signal events between the completion handler\r
+ * and the main client or server thread.\r
+ *\r
+ * Once CONNECTED, they cycle through RDMA_READ_ADV, RDMA_WRITE_ADV, \r
+ * and RDMA_WRITE_COMPLETE for each ping.\r
+ */\r
+enum test_state {\r
+ IDLE = 1,\r
+ CONNECT_REQUEST,\r
+ ADDR_RESOLVED,\r
+ ROUTE_RESOLVED,\r
+ CONNECTED,\r
+ RDMA_READ_ADV,\r
+ RDMA_READ_COMPLETE,\r
+ RDMA_WRITE_ADV,\r
+ RDMA_WRITE_COMPLETE,\r
+ RDMA_ERROR\r
+};\r
+\r
+struct rping_rdma_info {\r
+ uint64_t buf;\r
+ uint32_t rkey;\r
+ uint32_t size;\r
+};\r
+\r
+#define RPING_SQ_DEPTH 16\r
+\r
+#define RPING_MSG_FMT "rdma-ping-%d: "\r
+#define RPING_MIN_BUFSIZE 16\r
+\r
+/*\r
+ * Control block struct.\r
+ */\r
+struct rping_cb {\r
+ int server; /* 0 iff client */\r
+ pthread_t cqthread;\r
+ struct ibv_comp_channel *channel;\r
+ struct ibv_cq *cq;\r
+ struct ibv_pd *pd;\r
+ struct ibv_qp *qp;\r
+\r
+ struct ibv_recv_wr rq_wr; /* recv work request record */\r
+ struct ibv_sge recv_sgl; /* recv single SGE */\r
+ struct rping_rdma_info recv_buf;/* malloc'd buffer */\r
+ struct ibv_mr *recv_mr; /* MR associated with this buffer */\r
+\r
+ struct ibv_send_wr sq_wr; /* send work request record */\r
+ struct ibv_sge send_sgl;\r
+ struct rping_rdma_info send_buf;/* single send buf */\r
+ struct ibv_mr *send_mr;\r
+\r
+ struct ibv_send_wr rdma_sq_wr; /* rdma work request record */\r
+ struct ibv_sge rdma_sgl; /* rdma single SGE */\r
+ char *rdma_buf; /* used as rdma sink */\r
+ struct ibv_mr *rdma_mr;\r
+\r
+ uint32_t remote_rkey; /* remote guys RKEY */\r
+ uint64_t remote_addr; /* remote guys TO */\r
+ uint32_t remote_len; /* remote guys LEN */\r
+\r
+ char *start_buf; /* rdma read src */\r
+ struct ibv_mr *start_mr;\r
+\r
+ enum test_state state; /* used for cond/signalling */\r
+// sem_t sem;\r
+\r
+ struct sockaddr_in sin;\r
+ uint16_t port; /* dst port in NBO */\r
+ int verbose; /* verbose logging */\r
+ int count; /* ping count */\r
+ int size; /* ping data size */\r
+ int validate; /* validate ping data */\r
+\r
+ /* CM stuff */\r
+// pthread_t cmthread;\r
+ struct rdma_event_channel *cm_channel;\r
+ struct rdma_cm_id *cm_id; /* connection on client side,*/\r
+ /* listener on service side. */\r
+ struct rdma_cm_id *child_cm_id; /* connection on server side */\r
+};\r
+\r
+struct rping_cb *cb;\r
+static void *cm_thread(void *arg);\r
+static void *cq_thread(void *arg);\r
+\r
+static int rping_cma_event_handler(struct rdma_cm_id *cma_id,\r
+ struct rdma_cm_event *event)\r
+{\r
+ int ret = 0;\r
+ struct rping_cb *cb = cma_id->context;\r
+\r
+ DEBUG_LOG("cma_event type %s cma_id %p (%s)\n",\r
+ rdma_event_str(event->event), cma_id,\r
+ (cma_id == cb->cm_id) ? "parent" : "child");\r
+\r
+ switch (event->event) {\r
+ case RDMA_CM_EVENT_ADDR_RESOLVED:\r
+ cb->state = ADDR_RESOLVED;\r
+ ret = rdma_resolve_route(cma_id, 2000);\r
+ if (ret) {\r
+ cb->state = RDMA_ERROR;\r
+ fprintf(stderr, "rdma_resolve_route error %d\n", ret);\r
+// sem_post(&cb->sem);\r
+ }\r
+ break;\r
+\r
+ case RDMA_CM_EVENT_ROUTE_RESOLVED:\r
+ cb->state = ROUTE_RESOLVED;\r
+// sem_post(&cb->sem);\r
+ break;\r
+\r
+ case RDMA_CM_EVENT_CONNECT_REQUEST:\r
+ cb->state = CONNECT_REQUEST;\r
+ cb->child_cm_id = cma_id;\r
+ DEBUG_LOG("child cma %p\n", cb->child_cm_id);\r
+// sem_post(&cb->sem);\r
+ break;\r
+\r
+ case RDMA_CM_EVENT_ESTABLISHED:\r
+ DEBUG_LOG("ESTABLISHED\n");\r
+\r
+ /*\r
+ * Server will wake up when first RECV completes.\r
+ */\r
+ if (!cb->server) {\r
+ cb->state = CONNECTED;\r
+ }\r
+// sem_post(&cb->sem);\r
+ break;\r
+\r
+ case RDMA_CM_EVENT_ADDR_ERROR:\r
+ case RDMA_CM_EVENT_ROUTE_ERROR:\r
+ case RDMA_CM_EVENT_CONNECT_ERROR:\r
+ case RDMA_CM_EVENT_UNREACHABLE:\r
+ case RDMA_CM_EVENT_REJECTED:\r
+ fprintf(stderr, "cma event %s, error %d\n",\r
+ rdma_event_str(event->event), event->status);\r
+// sem_post(&cb->sem);\r
+ ret = -1;\r
+ break;\r
+\r
+ case RDMA_CM_EVENT_DISCONNECTED:\r
+ fprintf(stderr, "%s DISCONNECT EVENT...\n",\r
+ cb->server ? "server" : "client");\r
+// sem_post(&cb->sem);\r
+ break;\r
+\r
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:\r
+ fprintf(stderr, "cma detected device removal!!!!\n");\r
+ ret = -1;\r
+ break;\r
+\r
+ default:\r
+ fprintf(stderr, "unhandled event: %s, ignoring\n",\r
+ rdma_event_str(event->event));\r
+ break;\r
+ }\r
+\r
+ return ret;\r
+}\r
+\r
+static int server_recv(struct rping_cb *cb, struct ibv_wc *wc)\r
+{\r
+ if (wc->byte_len != sizeof(cb->recv_buf)) {\r
+ fprintf(stderr, "Received bogus data, size %d\n", wc->byte_len);\r
+ return -1;\r
+ }\r
+\r
+ cb->remote_rkey = ntohl(cb->recv_buf.rkey);\r
+ cb->remote_addr = ntohll(cb->recv_buf.buf);\r
+ cb->remote_len = ntohl(cb->recv_buf.size);\r
+ DEBUG_LOG("Received rkey %x addr %" PRIx64 " len %d from peer\n",\r
+ cb->remote_rkey, cb->remote_addr, cb->remote_len);\r
+\r
+ if (cb->state <= CONNECTED || cb->state == RDMA_WRITE_COMPLETE)\r
+ cb->state = RDMA_READ_ADV;\r
+ else\r
+ cb->state = RDMA_WRITE_ADV;\r
+\r
+ return 0;\r
+}\r
+\r
+static int client_recv(struct rping_cb *cb, struct ibv_wc *wc)\r
+{\r
+ if (wc->byte_len != sizeof(cb->recv_buf)) {\r
+ fprintf(stderr, "Received bogus data, size %d\n", wc->byte_len);\r
+ return -1;\r
+ }\r
+\r
+ if (cb->state == RDMA_READ_ADV)\r
+ cb->state = RDMA_WRITE_ADV;\r
+ else\r
+ cb->state = RDMA_WRITE_COMPLETE;\r
+\r
+ return 0;\r
+}\r
+\r
+static int rping_cq_event_handler(struct rping_cb *cb)\r
+{\r
+ struct ibv_wc wc;\r
+ struct ibv_recv_wr *bad_wr;\r
+ int ret;\r
+\r
+ while ((ret = ibv_poll_cq(cb->cq, 1, &wc)) == 1) {\r
+ ret = 0;\r
+\r
+ if (wc.status) {\r
+ fprintf(stderr, "cq completion failed status %d\n",\r
+ wc.status);\r
+ if (wc.status != IBV_WC_WR_FLUSH_ERR)\r
+ ret = -1;\r
+ goto error;\r
+ }\r
+\r
+ switch (wc.opcode) {\r
+ case IBV_WC_SEND:\r
+ DEBUG_LOG("send completion\n");\r
+ break;\r
+\r
+ case IBV_WC_RDMA_WRITE:\r
+ DEBUG_LOG("rdma write completion\n");\r
+ cb->state = RDMA_WRITE_COMPLETE;\r
+// sem_post(&cb->sem);\r
+ break;\r
+\r
+ case IBV_WC_RDMA_READ:\r
+ DEBUG_LOG("rdma read completion\n");\r
+ cb->state = RDMA_READ_COMPLETE;\r
+// sem_post(&cb->sem);\r
+ break;\r
+\r
+ case IBV_WC_RECV:\r
+ DEBUG_LOG("recv completion\n");\r
+ ret = cb->server ? server_recv(cb, &wc) :\r
+ client_recv(cb, &wc);\r
+ if (ret) {\r
+ fprintf(stderr, "recv wc error: %d\n", ret);\r
+ goto error;\r
+ }\r
+\r
+ ret = ibv_post_recv(cb->qp, &cb->rq_wr, &bad_wr);\r
+ if (ret) {\r
+ fprintf(stderr, "post recv error: %d\n", ret);\r
+ goto error;\r
+ }\r
+// sem_post(&cb->sem);\r
+ break;\r
+\r
+ default:\r
+ DEBUG_LOG("unknown!!!!! completion\n");\r
+ ret = -1;\r
+ goto error;\r
+ }\r
+ }\r
+ if (ret) {\r
+ fprintf(stderr, "poll error %d\n", ret);\r
+ goto error;\r
+ }\r
+ return 0;\r
+\r
+error:\r
+ cb->state = RDMA_ERROR;\r
+// sem_post(&cb->sem);\r
+ return ret;\r
+}\r
+\r
+static int rping_accept(struct rping_cb *cb)\r
+{\r
+ struct rdma_conn_param conn_param;\r
+ int ret;\r
+\r
+ DEBUG_LOG("accepting client connection request\n");\r
+\r
+ memset(&conn_param, 0, sizeof conn_param);\r
+ conn_param.responder_resources = 1;\r
+ conn_param.initiator_depth = 1;\r
+\r
+ ret = rdma_accept(cb->child_cm_id, &conn_param);\r
+ if (ret) {\r
+ fprintf(stderr, "rdma_accept error: %d\n", ret);\r
+ return ret;\r
+ }\r
+\r
+ cm_thread(cb);\r
+// sem_wait(&cb->sem);\r
+ if (cb->state == RDMA_ERROR) {\r
+ fprintf(stderr, "wait for CONNECTED state %d\n", cb->state);\r
+ return -1;\r
+ }\r
+ return 0;\r
+}\r
+\r
+static void rping_setup_wr(struct rping_cb *cb)\r
+{\r
+ cb->recv_sgl.addr = (uint64_t) (unsigned long) &cb->recv_buf;\r
+ cb->recv_sgl.length = sizeof cb->recv_buf;\r
+ cb->recv_sgl.lkey = cb->recv_mr->lkey;\r
+ cb->rq_wr.sg_list = &cb->recv_sgl;\r
+ cb->rq_wr.num_sge = 1;\r
+\r
+ cb->send_sgl.addr = (uint64_t) (unsigned long) &cb->send_buf;\r
+ cb->send_sgl.length = sizeof cb->send_buf;\r
+ cb->send_sgl.lkey = cb->send_mr->lkey;\r
+\r
+ cb->sq_wr.opcode = IBV_WR_SEND;\r
+ cb->sq_wr.send_flags = IBV_SEND_SIGNALED;\r
+ cb->sq_wr.sg_list = &cb->send_sgl;\r
+ cb->sq_wr.num_sge = 1;\r
+\r
+ cb->rdma_sgl.addr = (uint64_t) (unsigned long) cb->rdma_buf;\r
+ cb->rdma_sgl.lkey = cb->rdma_mr->lkey;\r
+ cb->rdma_sq_wr.send_flags = IBV_SEND_SIGNALED;\r
+ cb->rdma_sq_wr.sg_list = &cb->rdma_sgl;\r
+ cb->rdma_sq_wr.num_sge = 1;\r
+}\r
+\r
+static int rping_setup_buffers(struct rping_cb *cb)\r
+{\r
+ int ret;\r
+\r
+ DEBUG_LOG("rping_setup_buffers called on cb %p\n", cb);\r
+\r
+ cb->recv_mr = ibv_reg_mr(cb->pd, &cb->recv_buf, sizeof cb->recv_buf,\r
+ IBV_ACCESS_LOCAL_WRITE);\r
+ if (!cb->recv_mr) {\r
+ fprintf(stderr, "recv_buf reg_mr failed\n");\r
+ return errno;\r
+ }\r
+\r
+ cb->send_mr = ibv_reg_mr(cb->pd, &cb->send_buf, sizeof cb->send_buf, 0);\r
+ if (!cb->send_mr) {\r
+ fprintf(stderr, "send_buf reg_mr failed\n");\r
+ ret = errno;\r
+ goto err1;\r
+ }\r
+\r
+ cb->rdma_buf = malloc(cb->size);\r
+ if (!cb->rdma_buf) {\r
+ fprintf(stderr, "rdma_buf malloc failed\n");\r
+ ret = -ENOMEM;\r
+ goto err2;\r
+ }\r
+\r
+ cb->rdma_mr = ibv_reg_mr(cb->pd, cb->rdma_buf, cb->size,\r
+ IBV_ACCESS_LOCAL_WRITE |\r
+ IBV_ACCESS_REMOTE_READ |\r
+ IBV_ACCESS_REMOTE_WRITE);\r
+ if (!cb->rdma_mr) {\r
+ fprintf(stderr, "rdma_buf reg_mr failed\n");\r
+ ret = errno;\r
+ goto err3;\r
+ }\r
+\r
+ if (!cb->server) {\r
+ cb->start_buf = malloc(cb->size);\r
+ if (!cb->start_buf) {\r
+ fprintf(stderr, "start_buf malloc failed\n");\r
+ ret = -ENOMEM;\r
+ goto err4;\r
+ }\r
+\r
+ cb->start_mr = ibv_reg_mr(cb->pd, cb->start_buf, cb->size,\r
+ IBV_ACCESS_LOCAL_WRITE | \r
+ IBV_ACCESS_REMOTE_READ |\r
+ IBV_ACCESS_REMOTE_WRITE);\r
+ if (!cb->start_mr) {\r
+ fprintf(stderr, "start_buf reg_mr failed\n");\r
+ ret = errno;\r
+ goto err5;\r
+ }\r
+ }\r
+\r
+ rping_setup_wr(cb);\r
+ DEBUG_LOG("allocated & registered buffers...\n");\r
+ return 0;\r
+\r
+err5:\r
+ free(cb->start_buf);\r
+err4:\r
+ ibv_dereg_mr(cb->rdma_mr);\r
+err3:\r
+ free(cb->rdma_buf);\r
+err2:\r
+ ibv_dereg_mr(cb->send_mr);\r
+err1:\r
+ ibv_dereg_mr(cb->recv_mr);\r
+ return ret;\r
+}\r
+\r
+static void rping_free_buffers(struct rping_cb *cb)\r
+{\r
+ DEBUG_LOG("rping_free_buffers called on cb %p\n", cb);\r
+ ibv_dereg_mr(cb->recv_mr);\r
+ ibv_dereg_mr(cb->send_mr);\r
+ ibv_dereg_mr(cb->rdma_mr);\r
+ free(cb->rdma_buf);\r
+ if (!cb->server) {\r
+ ibv_dereg_mr(cb->start_mr);\r
+ free(cb->start_buf);\r
+ }\r
+}\r
+\r
+static int rping_create_qp(struct rping_cb *cb)\r
+{\r
+ struct ibv_qp_init_attr init_attr;\r
+ int ret;\r
+\r
+ memset(&init_attr, 0, sizeof(init_attr));\r
+ init_attr.cap.max_send_wr = RPING_SQ_DEPTH;\r
+ init_attr.cap.max_recv_wr = 2;\r
+ init_attr.cap.max_recv_sge = 1;\r
+ init_attr.cap.max_send_sge = 1;\r
+ init_attr.qp_type = IBV_QPT_RC;\r
+ init_attr.send_cq = cb->cq;\r
+ init_attr.recv_cq = cb->cq;\r
+\r
+ if (cb->server) {\r
+ ret = rdma_create_qp(cb->child_cm_id, cb->pd, &init_attr);\r
+ if (!ret)\r
+ cb->qp = cb->child_cm_id->qp;\r
+ } else {\r
+ ret = rdma_create_qp(cb->cm_id, cb->pd, &init_attr);\r
+ if (!ret)\r
+ cb->qp = cb->cm_id->qp;\r
+ }\r
+\r
+ return ret;\r
+}\r
+\r
+static void rping_free_qp(struct rping_cb *cb)\r
+{\r
+ ibv_destroy_qp(cb->qp);\r
+ ibv_destroy_cq(cb->cq);\r
+ ibv_destroy_comp_channel(cb->channel);\r
+ ibv_dealloc_pd(cb->pd);\r
+}\r
+\r
+static int rping_setup_qp(struct rping_cb *cb, struct rdma_cm_id *cm_id)\r
+{\r
+ int ret;\r
+\r
+ cb->pd = ibv_alloc_pd(cm_id->verbs);\r
+ if (!cb->pd) {\r
+ fprintf(stderr, "ibv_alloc_pd failed\n");\r
+ return errno;\r
+ }\r
+ DEBUG_LOG("created pd %p\n", cb->pd);\r
+\r
+ cb->channel = ibv_create_comp_channel(cm_id->verbs);\r
+ if (!cb->channel) {\r
+ fprintf(stderr, "ibv_create_comp_channel failed\n");\r
+ ret = errno;\r
+ goto err1;\r
+ }\r
+ DEBUG_LOG("created channel %p\n", cb->channel);\r
+\r
+ cb->cq = ibv_create_cq(cm_id->verbs, RPING_SQ_DEPTH * 2, cb,\r
+ cb->channel, 0);\r
+ if (!cb->cq) {\r
+ fprintf(stderr, "ibv_create_cq failed\n");\r
+ ret = errno;\r
+ goto err2;\r
+ }\r
+ DEBUG_LOG("created cq %p\n", cb->cq);\r
+\r
+ ret = ibv_req_notify_cq(cb->cq, 0);\r
+ if (ret) {\r
+ fprintf(stderr, "ibv_create_cq failed\n");\r
+ ret = errno;\r
+ goto err3;\r
+ }\r
+\r
+ ret = rping_create_qp(cb);\r
+ if (ret) {\r
+ fprintf(stderr, "rping_create_qp failed: %d\n", ret);\r
+ goto err3;\r
+ }\r
+ DEBUG_LOG("created qp %p\n", cb->qp);\r
+ return 0;\r
+\r
+err3:\r
+ ibv_destroy_cq(cb->cq);\r
+err2:\r
+ ibv_destroy_comp_channel(cb->channel);\r
+err1:\r
+ ibv_dealloc_pd(cb->pd);\r
+ return ret;\r
+}\r
+\r
+static void cm_thread(void *arg)\r
+{\r
+ struct rping_cb *cb = arg;\r
+ struct rdma_cm_event *event;\r
+ int ret;\r
+\r
+// while (1) {\r
+ ret = rdma_get_cm_event(cb->cm_channel, &event);\r
+ if (ret) {\r
+ fprintf(stderr, "rdma_get_cm_event err %d\n", ret);\r
+ return;\r
+// exit(ret);\r
+ }\r
+ ret = rping_cma_event_handler(event->id, event);\r
+ rdma_ack_cm_event(event);\r
+// if (ret)\r
+// exit(ret);\r
+// }\r
+}\r
+\r
+static void cq_thread(void *arg)\r
+{\r
+ struct rping_cb *cb = arg;\r
+ struct ibv_cq *ev_cq;\r
+ void *ev_ctx;\r
+ int ret;\r
+ \r
+// DEBUG_LOG("cq_thread started.\n");\r
+\r
+// while (1) { \r
+// pthread_testcancel();\r
+\r
+ ret = ibv_get_cq_event(cb->channel, &ev_cq, &ev_ctx);\r
+ if (ret) {\r
+ fprintf(stderr, "Failed to get cq event!\n");\r
+ return;\r
+// pthread_exit(NULL);\r
+ }\r
+ if (ev_cq != cb->cq) {\r
+ fprintf(stderr, "Unknown CQ!\n");\r
+ return;\r
+// pthread_exit(NULL);\r
+ }\r
+ ret = ibv_req_notify_cq(cb->cq, 0);\r
+ if (ret) {\r
+ fprintf(stderr, "Failed to set notify!\n");\r
+ return;\r
+// pthread_exit(NULL);\r
+ }\r
+ ret = rping_cq_event_handler(cb);\r
+ ibv_ack_cq_events(cb->cq, 1);\r
+// pthread_exit(NULL);\r
+// }\r
+}\r
+\r
+static void rping_format_send(struct rping_cb *cb, char *buf, struct ibv_mr *mr)\r
+{\r
+ struct rping_rdma_info *info = &cb->send_buf;\r
+\r
+ info->buf = htonll((uint64_t) (unsigned long) buf);\r
+ info->rkey = htonl(mr->rkey);\r
+ info->size = htonl(cb->size);\r
+\r
+ DEBUG_LOG("RDMA addr %" PRIx64" rkey %x len %d\n",\r
+ ntohll(info->buf), ntohl(info->rkey), ntohl(info->size));\r
+}\r
+\r
+static int rping_test_server(struct rping_cb *cb)\r
+{\r
+ struct ibv_send_wr *bad_wr;\r
+ int ret;\r
+\r
+ while (1) {\r
+ /* Wait for client's Start STAG/TO/Len */\r
+ cq_thread(cb);\r
+// sem_wait(&cb->sem);\r
+ if (cb->state != RDMA_READ_ADV) {\r
+ fprintf(stderr, "wait for RDMA_READ_ADV state %d\n",\r
+ cb->state);\r
+ ret = -1;\r
+ break;\r
+ }\r
+\r
+ DEBUG_LOG("server received sink adv\n");\r
+\r
+ /* Issue RDMA Read. */\r
+ cb->rdma_sq_wr.opcode = IBV_WR_RDMA_READ;\r
+ cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey;\r
+ cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;\r
+ cb->rdma_sq_wr.sg_list->length = cb->remote_len;\r
+\r
+ ret = ibv_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr);\r
+ if (ret) {\r
+ fprintf(stderr, "post send error %d\n", ret);\r
+ break;\r
+ }\r
+ DEBUG_LOG("server posted rdma read req \n");\r
+\r
+ /* Wait for read completion */\r
+ cq_thread(cb);\r
+// sem_wait(&cb->sem);\r
+ if (cb->state != RDMA_READ_COMPLETE) {\r
+ fprintf(stderr, "wait for RDMA_READ_COMPLETE state %d\n",\r
+ cb->state);\r
+ ret = -1;\r
+ break;\r
+ }\r
+ DEBUG_LOG("server received read complete\n");\r
+\r
+ /* Display data in recv buf */\r
+ if (cb->verbose)\r
+ printf("server ping data: %s\n", cb->rdma_buf);\r
+\r
+ /* Tell client to continue */\r
+ ret = ibv_post_send(cb->qp, &cb->sq_wr, &bad_wr);\r
+ if (ret) {\r
+ fprintf(stderr, "post send error %d\n", ret);\r
+ break;\r
+ }\r
+ DEBUG_LOG("server posted go ahead\n");\r
+\r
+ /* Wait for client's RDMA STAG/TO/Len */\r
+ cq_thread(cb);\r
+// sem_wait(&cb->sem);\r
+ if (cb->state != RDMA_WRITE_ADV) {\r
+ fprintf(stderr, "wait for RDMA_WRITE_ADV state %d\n",\r
+ cb->state);\r
+ ret = -1;\r
+ break;\r
+ }\r
+ DEBUG_LOG("server received sink adv\n");\r
+\r
+ /* RDMA Write echo data */\r
+ cb->rdma_sq_wr.opcode = IBV_WR_RDMA_WRITE;\r
+ cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey;\r
+ cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;\r
+ cb->rdma_sq_wr.sg_list->length = strlen(cb->rdma_buf) + 1;\r
+ DEBUG_LOG("rdma write from lkey %x laddr %" PRIx64 " len %d\n",\r
+ cb->rdma_sq_wr.sg_list->lkey,\r
+ cb->rdma_sq_wr.sg_list->addr,\r
+ cb->rdma_sq_wr.sg_list->length);\r
+\r
+ ret = ibv_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr);\r
+ if (ret) {\r
+ fprintf(stderr, "post send error %d\n", ret);\r
+ break;\r
+ }\r
+\r
+ /* Wait for completion */\r
+ cq_thread(cb);\r
+// ret = sem_wait(&cb->sem);\r
+ if (cb->state != RDMA_WRITE_COMPLETE) {\r
+ fprintf(stderr, "wait for RDMA_WRITE_COMPLETE state %d\n",\r
+ cb->state);\r
+ ret = -1;\r
+ break;\r
+ }\r
+ DEBUG_LOG("server rdma write complete \n");\r
+\r
+ /* Tell client to begin again */\r
+ ret = ibv_post_send(cb->qp, &cb->sq_wr, &bad_wr);\r
+ if (ret) {\r
+ fprintf(stderr, "post send error %d\n", ret);\r
+ break;\r
+ }\r
+ DEBUG_LOG("server posted go ahead\n");\r
+ }\r
+\r
+ return ret;\r
+}\r
+\r
+static int rping_bind_server(struct rping_cb *cb)\r
+{\r
+ int ret;\r
+\r
+ cb->sin.sin_port = cb->port;\r
+ ret = rdma_bind_addr(cb->cm_id, (struct sockaddr *) &cb->sin);\r
+ if (ret) {\r
+ fprintf(stderr, "rdma_bind_addr error %d\n", ret);\r
+ return ret;\r
+ }\r
+ DEBUG_LOG("rdma_bind_addr successful\n");\r
+\r
+ DEBUG_LOG("rdma_listen\n");\r
+ ret = rdma_listen(cb->cm_id, 3);\r
+ if (ret) {\r
+ fprintf(stderr, "rdma_listen failed: %d\n", ret);\r
+ return ret;\r
+ }\r
+\r
+ return 0;\r
+}\r
+\r
+static struct rping_cb *clone_cb(struct rping_cb *listening_cb)\r
+{\r
+ struct rping_cb *cb = malloc(sizeof *cb);\r
+ if (!cb)\r
+ return NULL;\r
+ *cb = *listening_cb;\r
+ cb->child_cm_id->context = cb;\r
+ return cb;\r
+}\r
+\r
+static void free_cb(struct rping_cb *cb)\r
+{\r
+ free(cb);\r
+}\r
+\r
+static int rping_run_server(struct rping_cb *cb)\r
+{\r
+ struct ibv_recv_wr *bad_wr;\r
+ int ret;\r
+\r
+ ret = rping_bind_server(cb);\r
+ if (ret)\r
+ return ret;\r
+\r
+ cm_thread(cb);\r
+// sem_wait(&cb->sem);\r
+ if (cb->state != CONNECT_REQUEST) {\r
+ fprintf(stderr, "wait for CONNECT_REQUEST state %d\n",\r
+ cb->state);\r
+ return -1;\r
+ }\r
+\r
+ ret = rping_setup_qp(cb, cb->child_cm_id);\r
+ if (ret) {\r
+ fprintf(stderr, "setup_qp failed: %d\n", ret);\r
+ return ret;\r
+ }\r
+\r
+ ret = rping_setup_buffers(cb);\r
+ if (ret) {\r
+ fprintf(stderr, "rping_setup_buffers failed: %d\n", ret);\r
+ goto err1;\r
+ }\r
+\r
+ ret = ibv_post_recv(cb->qp, &cb->rq_wr, &bad_wr);\r
+ if (ret) {\r
+ fprintf(stderr, "ibv_post_recv failed: %d\n", ret);\r
+ goto err2;\r
+ }\r
+\r
+// pthread_create(&cb->cqthread, NULL, cq_thread, cb);\r
+\r
+ ret = rping_accept(cb);\r
+ if (ret) {\r
+ fprintf(stderr, "connect error %d\n", ret);\r
+ goto err2;\r
+ }\r
+\r
+ rping_test_server(cb);\r
+ rdma_disconnect(cb->child_cm_id);\r
+ rdma_destroy_id(cb->child_cm_id);\r
+err2:\r
+ rping_free_buffers(cb);\r
+err1:\r
+ rping_free_qp(cb);\r
+\r
+ return ret;\r
+}\r
+\r
+static int rping_test_client(struct rping_cb *cb)\r
+{\r
+ int ping, start, cc, i, ret = 0;\r
+ struct ibv_send_wr *bad_wr;\r
+ unsigned char c;\r
+\r
+ start = 65;\r
+ for (ping = 0; !cb->count || ping < cb->count; ping++) {\r
+ cb->state = RDMA_READ_ADV;\r
+\r
+ /* Put some ascii text in the buffer. */\r
+ cc = sprintf(cb->start_buf, RPING_MSG_FMT, ping);\r
+ for (i = cc, c = start; i < cb->size; i++) {\r
+ cb->start_buf[i] = c;\r
+ c++;\r
+ if (c > 122)\r
+ c = 65;\r
+ }\r
+ start++;\r
+ if (start > 122)\r
+ start = 65;\r
+ cb->start_buf[cb->size - 1] = 0;\r
+\r
+ rping_format_send(cb, cb->start_buf, cb->start_mr);\r
+ ret = ibv_post_send(cb->qp, &cb->sq_wr, &bad_wr);\r
+ if (ret) {\r
+ fprintf(stderr, "post send error %d\n", ret);\r
+ break;\r
+ }\r
+\r
+ /* Wait for server to ACK */\r
+ cq_thread(cb);\r
+// sem_wait(&cb->sem);\r
+ if (cb->state != RDMA_WRITE_ADV) {\r
+ fprintf(stderr, "wait for RDMA_WRITE_ADV state %d\n",\r
+ cb->state);\r
+ ret = -1;\r
+ break;\r
+ }\r
+\r
+ rping_format_send(cb, cb->rdma_buf, cb->rdma_mr);\r
+ ret = ibv_post_send(cb->qp, &cb->sq_wr, &bad_wr);\r
+ if (ret) {\r
+ fprintf(stderr, "post send error %d\n", ret);\r
+ break;\r
+ }\r
+\r
+ /* Wait for the server to say the RDMA Write is complete. */\r
+ cq_thread(cb);\r
+// sem_wait(&cb->sem);\r
+ if (cb->state != RDMA_WRITE_COMPLETE) {\r
+ fprintf(stderr, "wait for RDMA_WRITE_COMPLETE state %d\n",\r
+ cb->state);\r
+ ret = -1;\r
+ break;\r
+ }\r
+\r
+ if (cb->validate)\r
+ if (memcmp(cb->start_buf, cb->rdma_buf, cb->size)) {\r
+ fprintf(stderr, "data mismatch!\n");\r
+ ret = -1;\r
+ break;\r
+ }\r
+\r
+ if (cb->verbose)\r
+ printf("ping data: %s\n", cb->rdma_buf);\r
+ }\r
+\r
+ return ret;\r
+}\r
+\r
+static int rping_connect_client(struct rping_cb *cb)\r
+{\r
+ struct rdma_conn_param conn_param;\r
+ int ret;\r
+\r
+ memset(&conn_param, 0, sizeof conn_param);\r
+ conn_param.responder_resources = 1;\r
+ conn_param.initiator_depth = 1;\r
+ conn_param.retry_count = 10;\r
+\r
+ ret = rdma_connect(cb->cm_id, &conn_param);\r
+ if (ret) {\r
+ fprintf(stderr, "rdma_connect error %d\n", ret);\r
+ return ret;\r
+ }\r
+\r
+ cm_thread(cb);\r
+// sem_wait(&cb->sem);\r
+ if (cb->state != CONNECTED) {\r
+ fprintf(stderr, "wait for CONNECTED state %d\n", cb->state);\r
+ return -1;\r
+ }\r
+\r
+ DEBUG_LOG("rmda_connect successful\n");\r
+ return 0;\r
+}\r
+\r
+static int rping_bind_client(struct rping_cb *cb)\r
+{\r
+ int ret;\r
+\r
+ cb->sin.sin_port = cb->port;\r
+ ret = rdma_resolve_addr(cb->cm_id, NULL, (struct sockaddr *) &cb->sin, 2000);\r
+ if (ret) {\r
+ fprintf(stderr, "rdma_resolve_addr error %d\n", ret);\r
+ return ret;\r
+ }\r
+\r
+ cm_thread(cb);\r
+// sem_wait(&cb->sem);\r
+ if (cb->state != ROUTE_RESOLVED) {\r
+ fprintf(stderr, "waiting for addr/route resolution state %d\n",\r
+ cb->state);\r
+ return -1;\r
+ }\r
+\r
+ DEBUG_LOG("rdma_resolve_addr - rdma_resolve_route successful\n");\r
+ return 0;\r
+}\r
+\r
+static int rping_run_client(struct rping_cb *cb)\r
+{\r
+ struct ibv_recv_wr *bad_wr;\r
+ int ret;\r
+\r
+ ret = rping_bind_client(cb);\r
+ if (ret)\r
+ return ret;\r
+\r
+ ret = rping_setup_qp(cb, cb->cm_id);\r
+ if (ret) {\r
+ fprintf(stderr, "setup_qp failed: %d\n", ret);\r
+ return ret;\r
+ }\r
+\r
+ ret = rping_setup_buffers(cb);\r
+ if (ret) {\r
+ fprintf(stderr, "rping_setup_buffers failed: %d\n", ret);\r
+ goto err1;\r
+ }\r
+\r
+ ret = ibv_post_recv(cb->qp, &cb->rq_wr, &bad_wr);\r
+ if (ret) {\r
+ fprintf(stderr, "ibv_post_recv failed: %d\n", ret);\r
+ goto err2;\r
+ }\r
+\r
+// pthread_create(&cb->cqthread, NULL, cq_thread, cb);\r
+\r
+ ret = rping_connect_client(cb);\r
+ if (ret) {\r
+ fprintf(stderr, "connect error %d\n", ret);\r
+ goto err2;\r
+ }\r
+\r
+ rping_test_client(cb);\r
+ rdma_disconnect(cb->cm_id);\r
+err2:\r
+ rping_free_buffers(cb);\r
+err1:\r
+ rping_free_qp(cb);\r
+\r
+ return ret;\r
+}\r
+\r
+static int get_addr(char *dst, struct sockaddr_in *addr)\r
+{\r
+ struct addrinfo *res;\r
+ int ret;\r
+\r
+ ret = getaddrinfo(dst, NULL, NULL, &res);\r
+ if (ret) {\r
+ printf("getaddrinfo failed - invalid hostname or IP address\n");\r
+ return ret;\r
+ }\r
+\r
+ if (res->ai_family != PF_INET) {\r
+ ret = -1;\r
+ goto out;\r
+ }\r
+\r
+ *addr = *(struct sockaddr_in *) res->ai_addr;\r
+out:\r
+ freeaddrinfo(res);\r
+ return ret;\r
+}\r
+\r
+static void usage()\r
+{\r
+ printf("rdma_rping -s [-vVd] [-S size] [-C count] [-a addr] [-p port]\n");\r
+ printf("rdma_rping -c [-vVd] [-S size] [-C count] -a addr [-p port]\n");\r
+ printf("\t-c\t\tclient side\n");\r
+ printf("\t-s\t\tserver side\n");\r
+ printf("\t-v\t\tdisplay ping data to stdout\n");\r
+ printf("\t-V\t\tvalidate ping data\n");\r
+ printf("\t-d\t\tdebug printfs\n");\r
+ printf("\t-S size \tping data size\n");\r
+ printf("\t-C count\tping count times\n");\r
+ printf("\t-a addr\t\taddress\n");\r
+ printf("\t-p port\t\tport\n");\r
+}\r
+\r
+int main(int argc, char *argv[])\r
+{\r
+// struct rping_cb *cb;\r
+ int op;\r
+ int ret = 0;\r
+\r
+ cb = malloc(sizeof(*cb));\r
+ if (!cb)\r
+ return -ENOMEM;\r
+\r
+ memset(cb, 0, sizeof(*cb));\r
+ cb->server = -1;\r
+ cb->state = IDLE;\r
+ cb->size = 64;\r
+ cb->sin.sin_family = PF_INET;\r
+ cb->port = htons(7174);\r
+// sem_init(&cb->sem, 0, 0);\r
+\r
+ opterr = 0;\r
+ while ((op=getopt(argc, argv, "a:Pp:C:S:t:scvVd")) != -1) {\r
+ switch (op) {\r
+ case 'a':\r
+ ret = get_addr(optarg, &cb->sin);\r
+ break;\r
+ case 'p':\r
+ cb->port = htons(atoi(optarg));\r
+ DEBUG_LOG("port %d\n", (int) atoi(optarg));\r
+ break;\r
+ case 's':\r
+ cb->server = 1;\r
+ DEBUG_LOG("server\n");\r
+ break;\r
+ case 'c':\r
+ cb->server = 0;\r
+ DEBUG_LOG("client\n");\r
+ break;\r
+ case 'S':\r
+ cb->size = atoi(optarg);\r
+ if (cb->size < RPING_MIN_BUFSIZE) {\r
+ fprintf(stderr, "Invalid size (minimum is %d) " RPING_MIN_BUFSIZE);\r
+ ret = EINVAL;\r
+ } else\r
+ DEBUG_LOG("size %d\n", (int) atoi(optarg));\r
+ break;\r
+ case 'C':\r
+ cb->count = atoi(optarg);\r
+ if (cb->count < 0) {\r
+ fprintf(stderr, "Invalid count %d\n", cb->count);\r
+ ret = EINVAL;\r
+ } else\r
+ DEBUG_LOG("count %d\n", (int) cb->count);\r
+ break;\r
+ case 'v':\r
+ cb->verbose++;\r
+ DEBUG_LOG("verbose\n");\r
+ break;\r
+ case 'V':\r
+ cb->validate++;\r
+ DEBUG_LOG("validate data\n");\r
+ break;\r
+ case 'd':\r
+ debug++;\r
+ break;\r
+ default:\r
+ usage();\r
+ ret = EINVAL;\r
+ goto out;\r
+ }\r
+ }\r
+ if (ret)\r
+ goto out;\r
+\r
+ if (cb->server == -1) {\r
+ usage();\r
+ ret = EINVAL;\r
+ goto out;\r
+ }\r
+\r
+ cb->cm_channel = rdma_create_event_channel();\r
+ if (!cb->cm_channel) {\r
+ ret = errno;\r
+ fprintf(stderr, "rdma_create_event_channel error %d\n", ret);\r
+ goto out;\r
+ }\r
+\r
+ ret = rdma_create_id(cb->cm_channel, &cb->cm_id, cb, RDMA_PS_TCP);\r
+ if (ret) {\r
+ ret = errno;\r
+ fprintf(stderr, "rdma_create_id error %d\n", ret);\r
+ goto out2;\r
+ }\r
+ DEBUG_LOG("created cm_id %p\n", cb->cm_id);\r
+\r
+// pthread_create(&cb->cmthread, NULL, cm_thread, cb);\r
+\r
+ if (cb->server)\r
+ ret = rping_run_server(cb);\r
+ else\r
+ ret = rping_run_client(cb);\r
+\r
+ DEBUG_LOG("destroy cm_id %p\n", cb->cm_id);\r
+ rdma_destroy_id(cb->cm_id);\r
+out2:\r
+ rdma_destroy_event_channel(cb->cm_channel);\r
+out:\r
+ free(cb);\r
+ return ret;\r
+}\r