From: shefty Date: Sat, 24 Jan 2009 19:58:23 +0000 (+0000) Subject: librdmacm: provide OFED compatibility library. X-Git-Url: http://git.etherboot.org/mirror/winof/.git/commitdiff_plain/4d7718b70cea78187472aff9ef9345bb06e3d349 librdmacm: provide OFED compatibility library. Provide a port of librdmacm. Because of the use of overlapped structures and events, the library has a scalability limitation of about 60 connections. The scalability limit will be addressed in a subsequent version, once the needs of a real application are determined. Signed-off-by: Sean Hefty git-svn-id: svn://openib.tc.cornell.edu/gen1/trunk@1879 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86 --- diff --git a/ulp/dirs b/ulp/dirs index 9de2e840..3cf61e2e 100644 --- a/ulp/dirs +++ b/ulp/dirs @@ -1,5 +1,4 @@ DIRS = \ - opensm \ dapl \ dapl2 \ ipoib \ @@ -9,4 +8,5 @@ DIRS = \ libibverbs \ libibumad \ libibmad \ + librdmacm \ nd diff --git a/ulp/librdmacm/AUTHORS b/ulp/librdmacm/AUTHORS new file mode 100644 index 00000000..589c93d1 --- /dev/null +++ b/ulp/librdmacm/AUTHORS @@ -0,0 +1 @@ +Sean Hefty diff --git a/ulp/librdmacm/COPYING b/ulp/librdmacm/COPYING new file mode 100644 index 00000000..41367600 --- /dev/null +++ b/ulp/librdmacm/COPYING @@ -0,0 +1,26 @@ +Copyright (c) 2008 Intel Corporation. All rights reserved. + +This software is available to you under the OpenFabrics.org BSD license +below: + + Redistribution and use in source and binary forms, with or + without modification, are permitted provided that the following + conditions are met: + + - Redistributions of source code must retain the above + copyright notice, this list of conditions and the following + disclaimer. + + - Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials + provided with the distribution. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/ulp/librdmacm/dirs b/ulp/librdmacm/dirs new file mode 100644 index 00000000..b1051907 --- /dev/null +++ b/ulp/librdmacm/dirs @@ -0,0 +1,3 @@ +DIRS = \ + src \ + examples diff --git a/ulp/librdmacm/examples/cmatose/SOURCES b/ulp/librdmacm/examples/cmatose/SOURCES new file mode 100644 index 00000000..cfe59ff2 --- /dev/null +++ b/ulp/librdmacm/examples/cmatose/SOURCES @@ -0,0 +1,30 @@ +TARGETNAME = rdma_cmatose +TARGETPATH = ..\..\..\..\bin\user\obj$(BUILD_ALT_DIR) +TARGETTYPE = PROGRAM + +UMTYPE = console +UMENTRY = main + +USE_MSVCRT = 1 +USE_STL = 1 +USE_NATIVE_EH = 1 +USE_IOSTREAM = 1 + +SOURCES = cmatose.c + +INCLUDES = ..;..\..\include;..\..\..\..\inc;..\..\..\..\inc\user;..\..\..\libibverbs\include; + +TARGETLIBS = \ + $(SDK_LIB_PATH)\kernel32.lib \ + $(SDK_LIB_PATH)\advapi32.lib \ + $(SDK_LIB_PATH)\user32.lib \ + $(SDK_LIB_PATH)\ole32.lib \ + $(SDK_LIB_PATH)\ws2_32.lib \ +!if $(FREEBUILD) + $(TARGETPATH)\*\libibverbs.lib \ + $(TARGETPATH)\*\librdmacm.lib +!else + $(TARGETPATH)\*\libibverbsd.lib \ + $(TARGETPATH)\*\librdmacmd.lib +!endif + diff --git a/ulp/librdmacm/examples/cmatose/cmatose.c b/ulp/librdmacm/examples/cmatose/cmatose.c new file mode 100644 index 00000000..7f32f27f --- /dev/null +++ b/ulp/librdmacm/examples/cmatose/cmatose.c @@ -0,0 +1,729 @@ +/* + * Copyright (c) 2005-2009 Intel Corporation. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "..\..\..\..\etc\user\getopt.c" +#include + +struct cmatest_node { + int id; + struct rdma_cm_id *cma_id; + int connected; + struct ibv_pd *pd; + struct ibv_cq *cq[2]; + struct ibv_mr *mr; + void *mem; +}; + +enum cq_index { + SEND_CQ_INDEX, + RECV_CQ_INDEX +}; + +struct cmatest { + struct rdma_event_channel *channel; + struct cmatest_node *nodes; + int conn_index; + int connects_left; + int disconnects_left; + + struct sockaddr_in dst_in; + struct sockaddr *dst_addr; + struct sockaddr_in src_in; + struct sockaddr *src_addr; +}; + +static struct cmatest test; +static int connections = 1; +static int message_size = 100; +static int message_count = 10; +static uint16_t port = 7471; +static uint8_t set_tos = 0; +static uint8_t tos; +static uint8_t migrate = 0; +static char *dst_addr; +static char *src_addr; + +static int create_message(struct cmatest_node *node) +{ + if (!message_size) + message_count = 0; + + if (!message_count) + return 0; + + node->mem = malloc(message_size); + if (!node->mem) { + printf("failed message allocation\n"); + return -1; + } + node->mr = ibv_reg_mr(node->pd, node->mem, message_size, + IBV_ACCESS_LOCAL_WRITE); + if (!node->mr) { + printf("failed to reg MR\n"); + goto err; + } + return 0; +err: + free(node->mem); + return -1; +} + +static int init_node(struct cmatest_node *node) +{ + struct ibv_qp_init_attr init_qp_attr; + int cqe, ret; + + node->pd = ibv_alloc_pd(node->cma_id->verbs); + if (!node->pd) { + ret = -1; + printf("cmatose: unable to allocate PD\n"); + goto out; + } + + cqe = message_count ? message_count : 1; + node->cq[SEND_CQ_INDEX] = ibv_create_cq(node->cma_id->verbs, cqe, node, 0, 0); + node->cq[RECV_CQ_INDEX] = ibv_create_cq(node->cma_id->verbs, cqe, node, 0, 0); + if (!node->cq[SEND_CQ_INDEX] || !node->cq[RECV_CQ_INDEX]) { + ret = -1; + printf("cmatose: unable to create CQs\n"); + goto out; + } + + memset(&init_qp_attr, 0, sizeof init_qp_attr); + init_qp_attr.cap.max_send_wr = cqe; + init_qp_attr.cap.max_recv_wr = cqe; + init_qp_attr.cap.max_send_sge = 1; + init_qp_attr.cap.max_recv_sge = 1; + init_qp_attr.qp_context = node; + init_qp_attr.sq_sig_all = 1; + init_qp_attr.qp_type = IBV_QPT_RC; + init_qp_attr.send_cq = node->cq[SEND_CQ_INDEX]; + init_qp_attr.recv_cq = node->cq[RECV_CQ_INDEX]; + ret = rdma_create_qp(node->cma_id, node->pd, &init_qp_attr); + if (ret) { + printf("cmatose: unable to create QP: 0x%x\n", ret); + goto out; + } + + ret = create_message(node); + if (ret) { + printf("cmatose: failed to create messages: 0x%x\n", ret); + goto out; + } +out: + return ret; +} + +static int post_recvs(struct cmatest_node *node) +{ + struct ibv_recv_wr recv_wr, *recv_failure; + struct ibv_sge sge; + int i, ret = 0; + + if (!message_count) + return 0; + + recv_wr.next = NULL; + recv_wr.sg_list = &sge; + recv_wr.num_sge = 1; + recv_wr.wr_id = (uintptr_t) node; + + sge.length = message_size; + sge.lkey = node->mr->lkey; + sge.addr = (uintptr_t) node->mem; + + for (i = 0; i < message_count && !ret; i++ ) { + ret = ibv_post_recv(node->cma_id->qp, &recv_wr, &recv_failure); + if (ret) { + printf("failed to post receives: 0x%x\n", ret); + break; + } + } + return ret; +} + +static int post_sends(struct cmatest_node *node) +{ + struct ibv_send_wr send_wr, *bad_send_wr; + struct ibv_sge sge; + int i, ret = 0; + + if (!node->connected || !message_count) + return 0; + + send_wr.next = NULL; + send_wr.sg_list = &sge; + send_wr.num_sge = 1; + send_wr.opcode = IBV_WR_SEND; + send_wr.send_flags = 0; + send_wr.wr_id = (ULONG_PTR) node; + + sge.length = message_size; + sge.lkey = node->mr->lkey; + sge.addr = (uintptr_t) node->mem; + + for (i = 0; i < message_count && !ret; i++) { + ret = ibv_post_send(node->cma_id->qp, &send_wr, &bad_send_wr); + if (ret) + printf("failed to post sends: 0x%x\n", ret); + } + return ret; +} + +static void connect_error(void) +{ + test.disconnects_left--; + test.connects_left--; +} + +static int addr_handler(struct cmatest_node *node) +{ + int ret; + + if (set_tos) { + ret = rdma_set_option(node->cma_id, RDMA_OPTION_ID, + RDMA_OPTION_ID_TOS, &tos, sizeof tos); + if (ret) + printf("cmatose: set TOS option failed: 0x%x\n", ret); + } + + ret = rdma_resolve_route(node->cma_id, 2000); + if (ret) { + printf("cmatose: resolve route failed: 0x%x\n", ret); + connect_error(); + } + return ret; +} + +static int route_handler(struct cmatest_node *node) +{ + struct rdma_conn_param conn_param; + int ret; + + ret = init_node(node); + if (ret) + goto err; + + ret = post_recvs(node); + if (ret) + goto err; + + memset(&conn_param, 0, sizeof conn_param); + conn_param.responder_resources = 1; + conn_param.initiator_depth = 1; + conn_param.retry_count = 5; + ret = rdma_connect(node->cma_id, &conn_param); + if (ret) { + printf("cmatose: failure connecting: 0x%x\n", ret); + goto err; + } + return 0; +err: + connect_error(); + return ret; +} + +static int connect_handler(struct rdma_cm_id *cma_id) +{ + struct cmatest_node *node; + struct rdma_conn_param conn_param; + int ret; + + if (test.conn_index == connections) { + ret = -1; + goto err1; + } + node = &test.nodes[test.conn_index++]; + + node->cma_id = cma_id; + cma_id->context = node; + + ret = init_node(node); + if (ret) + goto err2; + + ret = post_recvs(node); + if (ret) + goto err2; + + memset(&conn_param, 0, sizeof conn_param); + conn_param.responder_resources = 1; + conn_param.initiator_depth = 1; + ret = rdma_accept(node->cma_id, &conn_param); + if (ret) { + printf("cmatose: failure accepting: 0x%x\n", ret); + goto err2; + } + return 0; + +err2: + node->cma_id = NULL; + connect_error(); +err1: + printf("cmatose: failing connection request\n"); + rdma_reject(cma_id, NULL, 0); + return ret; +} + +static int cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) +{ + int ret = 0; + + switch (event->event) { + case RDMA_CM_EVENT_ADDR_RESOLVED: + ret = addr_handler(cma_id->context); + break; + case RDMA_CM_EVENT_ROUTE_RESOLVED: + ret = route_handler(cma_id->context); + break; + case RDMA_CM_EVENT_CONNECT_REQUEST: + ret = connect_handler(cma_id); + break; + case RDMA_CM_EVENT_ESTABLISHED: + ((struct cmatest_node *) cma_id->context)->connected = 1; + test.connects_left--; + break; + case RDMA_CM_EVENT_ADDR_ERROR: + case RDMA_CM_EVENT_ROUTE_ERROR: + case RDMA_CM_EVENT_CONNECT_ERROR: + case RDMA_CM_EVENT_UNREACHABLE: + case RDMA_CM_EVENT_REJECTED: + printf("cmatose: event: %s, error: 0x%x\n", + rdma_event_str(event->event), event->status); + connect_error(); + break; + case RDMA_CM_EVENT_DISCONNECTED: + rdma_disconnect(cma_id); + test.disconnects_left--; + break; + case RDMA_CM_EVENT_DEVICE_REMOVAL: + /* Cleanup will occur after test completes. */ + break; + default: + break; + } + return ret; +} + +static void destroy_node(struct cmatest_node *node) +{ + if (!node->cma_id) + return; + + if (node->cma_id->qp) + rdma_destroy_qp(node->cma_id); + + if (node->cq[SEND_CQ_INDEX]) + ibv_destroy_cq(node->cq[SEND_CQ_INDEX]); + + if (node->cq[RECV_CQ_INDEX]) + ibv_destroy_cq(node->cq[RECV_CQ_INDEX]); + + if (node->mem) { + ibv_dereg_mr(node->mr); + free(node->mem); + } + + if (node->pd) + ibv_dealloc_pd(node->pd); + + /* Destroy the RDMA ID after all device resources */ + rdma_destroy_id(node->cma_id); +} + +static int alloc_nodes(void) +{ + int ret, i; + + test.nodes = malloc(sizeof *test.nodes * connections); + if (!test.nodes) { + printf("cmatose: unable to allocate memory for test nodes\n"); + return -1; + } + memset(test.nodes, 0, sizeof *test.nodes * connections); + + for (i = 0; i < connections; i++) { + test.nodes[i].id = i; + if (dst_addr) { + ret = rdma_create_id(test.channel, + &test.nodes[i].cma_id, + &test.nodes[i], RDMA_PS_TCP); + if (ret) + goto err; + } + } + return 0; +err: + while (--i >= 0) + rdma_destroy_id(test.nodes[i].cma_id); + free(test.nodes); + return ret; +} + +static void destroy_nodes(void) +{ + int i; + + for (i = 0; i < connections; i++) + destroy_node(&test.nodes[i]); + free(test.nodes); +} + +static int poll_cqs(enum CQ_INDEX index) +{ + struct ibv_wc wc[8]; + int done, i, ret; + + for (i = 0; i < connections; i++) { + if (!test.nodes[i].connected) + continue; + + for (done = 0; done < message_count; done += ret) { + ret = ibv_poll_cq(test.nodes[i].cq[index], 8, wc); + if (ret < 0) { + printf("cmatose: failed polling CQ: 0x%x\n", ret); + return ret; + } + } + } + return 0; +} + +static int connect_events(void) +{ + struct rdma_cm_event *event; + int err = 0, ret = 0; + + while (test.connects_left && !err) { + err = rdma_get_cm_event(test.channel, &event); + if (!err) { + cma_handler(event->id, event); + rdma_ack_cm_event(event); + } else { + printf("cmatose: failure in rdma_get_cm_event in connect events\n"); + ret = err; + } + } + + return ret; +} + +static int disconnect_events(void) +{ + struct rdma_cm_event *event; + int err = 0, ret = 0; + + while (test.disconnects_left && !err) { + err = rdma_get_cm_event(test.channel, &event); + if (!err) { + cma_handler(event->id, event); + rdma_ack_cm_event(event); + } else { + printf("cmatose: failure in rdma_get_cm_event in disconnect events\n"); + ret = err; + } + } + + return ret; +} + +static int migrate_channel(struct rdma_cm_id *listen_id) +{ + struct rdma_event_channel *channel; + int i, ret; + + printf("migrating to new event channel\n"); + + channel = rdma_create_event_channel(); + if (!channel) { + printf("cmatose: failed to create event channel\n"); + return -1; + } + + ret = 0; + if (listen_id) + ret = rdma_migrate_id(listen_id, channel); + + for (i = 0; i < connections && !ret; i++) + ret = rdma_migrate_id(test.nodes[i].cma_id, channel); + + if (!ret) { + rdma_destroy_event_channel(test.channel); + test.channel = channel; + } else + printf("cmatose: failure migrating to channel: 0x%x\n", ret); + + return ret; +} + +static int get_addr(char *dst, struct sockaddr_in *addr) +{ + struct addrinfo *res; + int ret; + + ret = getaddrinfo(dst, NULL, NULL, &res); + if (ret) { + printf("getaddrinfo failed - invalid hostname or IP address\n"); + return ret; + } + + if (res->ai_family != PF_INET) { + ret = -1; + goto out; + } + + *addr = *(struct sockaddr_in *) res->ai_addr; +out: + freeaddrinfo(res); + return ret; +} + +static int run_server(void) +{ + struct rdma_cm_id *listen_id; + int i, ret; + + printf("cmatose: starting server\n"); + ret = rdma_create_id(test.channel, &listen_id, &test, RDMA_PS_TCP); + if (ret) { + printf("cmatose: listen request failed\n"); + return ret; + } + + if (src_addr) { + ret = get_addr(src_addr, &test.src_in); + if (ret) + goto out; + } else + test.src_in.sin_family = PF_INET; + + test.src_in.sin_port = port; + ret = rdma_bind_addr(listen_id, test.src_addr); + if (ret) { + printf("cmatose: bind address failed: 0x%x\n", ret); + goto out; + } + + ret = rdma_listen(listen_id, 0); + if (ret) { + printf("cmatose: failure trying to listen: 0x%x\n", ret); + goto out; + } + + ret = connect_events(); + if (ret) + goto out; + + if (message_count) { + printf("initiating data transfers\n"); + for (i = 0; i < connections; i++) { + ret = post_sends(&test.nodes[i]); + if (ret) + goto out; + } + + printf("completing sends\n"); + ret = poll_cqs(SEND_CQ_INDEX); + if (ret) + goto out; + + printf("receiving data transfers\n"); + ret = poll_cqs(RECV_CQ_INDEX); + if (ret) + goto out; + printf("data transfers complete\n"); + + } + + if (migrate) { + ret = migrate_channel(listen_id); + if (ret) + goto out; + } + + printf("cmatose: disconnecting\n"); + for (i = 0; i < connections; i++) { + if (!test.nodes[i].connected) + continue; + + test.nodes[i].connected = 0; + rdma_disconnect(test.nodes[i].cma_id); + } + + ret = disconnect_events(); + + printf("disconnected\n"); + +out: + rdma_destroy_id(listen_id); + return ret; +} + +static int run_client(void) +{ + int i, ret, ret2; + + printf("cmatose: starting client\n"); + if (src_addr) { + ret = get_addr(src_addr, &test.src_in); + if (ret) + return ret; + } + + ret = get_addr(dst_addr, &test.dst_in); + if (ret) + return ret; + + test.dst_in.sin_port = port; + + printf("cmatose: connecting\n"); + for (i = 0; i < connections; i++) { + ret = rdma_resolve_addr(test.nodes[i].cma_id, + src_addr ? test.src_addr : NULL, + test.dst_addr, 2000); + if (ret) { + printf("cmatose: failure getting addr: 0x%x\n", ret); + connect_error(); + return ret; + } + } + + ret = connect_events(); + if (ret) + goto disc; + + if (message_count) { + printf("receiving data transfers\n"); + ret = poll_cqs(RECV_CQ_INDEX); + if (ret) + goto disc; + + printf("sending replies\n"); + for (i = 0; i < connections; i++) { + ret = post_sends(&test.nodes[i]); + if (ret) + goto disc; + } + + printf("data transfers complete\n"); + } + + ret = 0; + + if (migrate) { + ret = migrate_channel(NULL); + if (ret) + goto out; + } +disc: + ret2 = disconnect_events(); + if (ret2) + ret = ret2; +out: + return ret; +} + +int __cdecl main(int argc, char **argv) +{ + int op, ret; + + while ((op = getopt(argc, argv, "s:b:c:C:S:t:p:m")) != -1) { + switch (op) { + case 's': + dst_addr = optarg; + break; + case 'b': + src_addr = optarg; + break; + case 'c': + connections = atoi(optarg); + break; + case 'C': + message_count = atoi(optarg); + break; + case 'S': + message_size = atoi(optarg); + break; + case 't': + set_tos = 1; + tos = (uint8_t) atoi(optarg); + break; + case 'p': + port = (uint16_t) atoi(optarg); + break; + case 'm': + migrate = 1; + break; + default: + printf("usage: %s\n", argv[0]); + printf("\t[-s server_address]\n"); + printf("\t[-b bind_address]\n"); + printf("\t[-c connections]\n"); + printf("\t[-C message_count]\n"); + printf("\t[-S message_size]\n"); + printf("\t[-t type_of_service]\n"); + printf("\t[-p port_number]\n"); + printf("\t[-m(igrate)]\n"); + exit(1); + } + } + + test.dst_addr = (struct sockaddr *) &test.dst_in; + test.src_addr = (struct sockaddr *) &test.src_in; + test.connects_left = connections; + test.disconnects_left = connections; + + test.channel = rdma_create_event_channel(); + if (!test.channel) { + printf("failed to create event channel\n"); + exit(1); + } + + if (alloc_nodes()) + exit(1); + + if (dst_addr) + ret = run_client(); + else + ret = run_server(); + + printf("test complete\n"); + destroy_nodes(); + rdma_destroy_event_channel(test.channel); + + printf("return status 0x%x\n", ret); + return ret; +} diff --git a/ulp/librdmacm/examples/cmatose/makefile b/ulp/librdmacm/examples/cmatose/makefile new file mode 100644 index 00000000..a0c06273 --- /dev/null +++ b/ulp/librdmacm/examples/cmatose/makefile @@ -0,0 +1,7 @@ +# +# DO NOT EDIT THIS FILE!!! Edit .\sources. if you want to add a new source +# file to this component. This file merely indirects to the real make file +# that is shared by all the driver components of the OpenIB Windows project. +# + +!INCLUDE ..\..\..\..\inc\openib.def diff --git a/ulp/librdmacm/examples/dirs b/ulp/librdmacm/examples/dirs new file mode 100644 index 00000000..2bdf906e --- /dev/null +++ b/ulp/librdmacm/examples/dirs @@ -0,0 +1,2 @@ +DIRS = \ + cmatose diff --git a/ulp/librdmacm/examples/mckey/mckey.c b/ulp/librdmacm/examples/mckey/mckey.c new file mode 100644 index 00000000..a858ad6b --- /dev/null +++ b/ulp/librdmacm/examples/mckey/mckey.c @@ -0,0 +1,579 @@ +/* + * Copyright (c) 2005-2007 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +struct cmatest_node { + int id; + struct rdma_cm_id *cma_id; + int connected; + struct ibv_pd *pd; + struct ibv_cq *cq; + struct ibv_mr *mr; + struct ibv_ah *ah; + uint32_t remote_qpn; + uint32_t remote_qkey; + void *mem; +}; + +struct cmatest { + struct rdma_event_channel *channel; + struct cmatest_node *nodes; + int conn_index; + int connects_left; + + struct sockaddr_in6 dst_in; + struct sockaddr *dst_addr; + struct sockaddr_in6 src_in; + struct sockaddr *src_addr; +}; + +static struct cmatest test; +static int connections = 1; +static int message_size = 100; +static int message_count = 10; +static int is_sender; +static int unmapped_addr; +static char *dst_addr; +static char *src_addr; +static enum rdma_port_space port_space = RDMA_PS_UDP; + +static int create_message(struct cmatest_node *node) +{ + if (!message_size) + message_count = 0; + + if (!message_count) + return 0; + + node->mem = malloc(message_size + sizeof(struct ibv_grh)); + if (!node->mem) { + printf("failed message allocation\n"); + return -1; + } + node->mr = ibv_reg_mr(node->pd, node->mem, + message_size + sizeof(struct ibv_grh), + IBV_ACCESS_LOCAL_WRITE); + if (!node->mr) { + printf("failed to reg MR\n"); + goto err; + } + return 0; +err: + free(node->mem); + return -1; +} + +static int verify_test_params(struct cmatest_node *node) +{ + struct ibv_port_attr port_attr; + int ret; + + ret = ibv_query_port(node->cma_id->verbs, node->cma_id->port_num, + &port_attr); + if (ret) + return ret; + + if (message_count && message_size > (1 << (port_attr.active_mtu + 7))) { + printf("mckey: message_size %d is larger than active mtu %d\n", + message_size, 1 << (port_attr.active_mtu + 7)); + return -EINVAL; + } + + return 0; +} + +static int init_node(struct cmatest_node *node) +{ + struct ibv_qp_init_attr init_qp_attr; + int cqe, ret; + + node->pd = ibv_alloc_pd(node->cma_id->verbs); + if (!node->pd) { + ret = -ENOMEM; + printf("mckey: unable to allocate PD\n"); + goto out; + } + + cqe = message_count ? message_count * 2 : 2; + node->cq = ibv_create_cq(node->cma_id->verbs, cqe, node, 0, 0); + if (!node->cq) { + ret = -ENOMEM; + printf("mckey: unable to create CQ\n"); + goto out; + } + + memset(&init_qp_attr, 0, sizeof init_qp_attr); + init_qp_attr.cap.max_send_wr = message_count ? message_count : 1; + init_qp_attr.cap.max_recv_wr = message_count ? message_count : 1; + init_qp_attr.cap.max_send_sge = 1; + init_qp_attr.cap.max_recv_sge = 1; + init_qp_attr.qp_context = node; + init_qp_attr.sq_sig_all = 0; + init_qp_attr.qp_type = IBV_QPT_UD; + init_qp_attr.send_cq = node->cq; + init_qp_attr.recv_cq = node->cq; + ret = rdma_create_qp(node->cma_id, node->pd, &init_qp_attr); + if (ret) { + printf("mckey: unable to create QP: %d\n", ret); + goto out; + } + + ret = create_message(node); + if (ret) { + printf("mckey: failed to create messages: %d\n", ret); + goto out; + } +out: + return ret; +} + +static int post_recvs(struct cmatest_node *node) +{ + struct ibv_recv_wr recv_wr, *recv_failure; + struct ibv_sge sge; + int i, ret = 0; + + if (!message_count) + return 0; + + recv_wr.next = NULL; + recv_wr.sg_list = &sge; + recv_wr.num_sge = 1; + recv_wr.wr_id = (uintptr_t) node; + + sge.length = message_size + sizeof(struct ibv_grh); + sge.lkey = node->mr->lkey; + sge.addr = (uintptr_t) node->mem; + + for (i = 0; i < message_count && !ret; i++ ) { + ret = ibv_post_recv(node->cma_id->qp, &recv_wr, &recv_failure); + if (ret) { + printf("failed to post receives: %d\n", ret); + break; + } + } + return ret; +} + +static int post_sends(struct cmatest_node *node, int signal_flag) +{ + struct ibv_send_wr send_wr, *bad_send_wr; + struct ibv_sge sge; + int i, ret = 0; + + if (!node->connected || !message_count) + return 0; + + send_wr.next = NULL; + send_wr.sg_list = &sge; + send_wr.num_sge = 1; + send_wr.opcode = IBV_WR_SEND_WITH_IMM; + send_wr.send_flags = signal_flag; + send_wr.wr_id = (unsigned long)node; + send_wr.imm_data = htonl(node->cma_id->qp->qp_num); + + send_wr.wr.ud.ah = node->ah; + send_wr.wr.ud.remote_qpn = node->remote_qpn; + send_wr.wr.ud.remote_qkey = node->remote_qkey; + + sge.length = message_size; + sge.lkey = node->mr->lkey; + sge.addr = (uintptr_t) node->mem; + + for (i = 0; i < message_count && !ret; i++) { + ret = ibv_post_send(node->cma_id->qp, &send_wr, &bad_send_wr); + if (ret) + printf("failed to post sends: %d\n", ret); + } + return ret; +} + +static void connect_error(void) +{ + test.connects_left--; +} + +static int addr_handler(struct cmatest_node *node) +{ + int ret; + + ret = verify_test_params(node); + if (ret) + goto err; + + ret = init_node(node); + if (ret) + goto err; + + if (!is_sender) { + ret = post_recvs(node); + if (ret) + goto err; + } + + ret = rdma_join_multicast(node->cma_id, test.dst_addr, node); + if (ret) { + printf("mckey: failure joining: %d\n", ret); + goto err; + } + return 0; +err: + connect_error(); + return ret; +} + +static int join_handler(struct cmatest_node *node, + struct rdma_ud_param *param) +{ + char buf[40]; + + inet_ntop(AF_INET6, param->ah_attr.grh.dgid.raw, buf, 40); + printf("mckey: joined dgid: %s\n", buf); + + node->remote_qpn = param->qp_num; + node->remote_qkey = param->qkey; + node->ah = ibv_create_ah(node->pd, ¶m->ah_attr); + if (!node->ah) { + printf("mckey: failure creating address handle\n"); + goto err; + } + + node->connected = 1; + test.connects_left--; + return 0; +err: + connect_error(); + return -1; +} + +static int cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) +{ + int ret = 0; + + switch (event->event) { + case RDMA_CM_EVENT_ADDR_RESOLVED: + ret = addr_handler(cma_id->context); + break; + case RDMA_CM_EVENT_MULTICAST_JOIN: + ret = join_handler(cma_id->context, &event->param.ud); + break; + case RDMA_CM_EVENT_ADDR_ERROR: + case RDMA_CM_EVENT_ROUTE_ERROR: + case RDMA_CM_EVENT_MULTICAST_ERROR: + printf("mckey: event: %s, error: %d\n", + rdma_event_str(event->event), event->status); + connect_error(); + ret = event->status; + break; + case RDMA_CM_EVENT_DEVICE_REMOVAL: + /* Cleanup will occur after test completes. */ + break; + default: + break; + } + return ret; +} + +static void destroy_node(struct cmatest_node *node) +{ + if (!node->cma_id) + return; + + if (node->ah) + ibv_destroy_ah(node->ah); + + if (node->cma_id->qp) + rdma_destroy_qp(node->cma_id); + + if (node->cq) + ibv_destroy_cq(node->cq); + + if (node->mem) { + ibv_dereg_mr(node->mr); + free(node->mem); + } + + if (node->pd) + ibv_dealloc_pd(node->pd); + + /* Destroy the RDMA ID after all device resources */ + rdma_destroy_id(node->cma_id); +} + +static int alloc_nodes(void) +{ + int ret, i; + + test.nodes = malloc(sizeof *test.nodes * connections); + if (!test.nodes) { + printf("mckey: unable to allocate memory for test nodes\n"); + return -ENOMEM; + } + memset(test.nodes, 0, sizeof *test.nodes * connections); + + for (i = 0; i < connections; i++) { + test.nodes[i].id = i; + ret = rdma_create_id(test.channel, &test.nodes[i].cma_id, + &test.nodes[i], port_space); + if (ret) + goto err; + } + return 0; +err: + while (--i >= 0) + rdma_destroy_id(test.nodes[i].cma_id); + free(test.nodes); + return ret; +} + +static void destroy_nodes(void) +{ + int i; + + for (i = 0; i < connections; i++) + destroy_node(&test.nodes[i]); + free(test.nodes); +} + +static int poll_cqs(void) +{ + struct ibv_wc wc[8]; + int done, i, ret; + + for (i = 0; i < connections; i++) { + if (!test.nodes[i].connected) + continue; + + for (done = 0; done < message_count; done += ret) { + ret = ibv_poll_cq(test.nodes[i].cq, 8, wc); + if (ret < 0) { + printf("mckey: failed polling CQ: %d\n", ret); + return ret; + } + } + } + return 0; +} + +static int connect_events(void) +{ + struct rdma_cm_event *event; + int ret = 0; + + while (test.connects_left && !ret) { + ret = rdma_get_cm_event(test.channel, &event); + if (!ret) { + ret = cma_handler(event->id, event); + rdma_ack_cm_event(event); + } + } + return ret; +} + +static int get_addr(char *dst, struct sockaddr *addr) +{ + struct addrinfo *res; + int ret; + + ret = getaddrinfo(dst, NULL, NULL, &res); + if (ret) { + printf("getaddrinfo failed - invalid hostname or IP address\n"); + return ret; + } + + memcpy(addr, res->ai_addr, res->ai_addrlen); + freeaddrinfo(res); + return ret; +} + +static int run(void) +{ + int i, ret; + + printf("mckey: starting %s\n", is_sender ? "client" : "server"); + if (src_addr) { + ret = get_addr(src_addr, (struct sockaddr *) &test.src_in); + if (ret) + return ret; + } + + ret = get_addr(dst_addr, (struct sockaddr *) &test.dst_in); + if (ret) + return ret; + + printf("mckey: joining\n"); + for (i = 0; i < connections; i++) { + if (src_addr) { + ret = rdma_bind_addr(test.nodes[i].cma_id, + test.src_addr); + if (ret) { + printf("mckey: addr bind failure: %d\n", ret); + connect_error(); + return ret; + } + } + + if (unmapped_addr) + ret = addr_handler(&test.nodes[i]); + else + ret = rdma_resolve_addr(test.nodes[i].cma_id, + test.src_addr, test.dst_addr, + 2000); + if (ret) { + printf("mckey: resolve addr failure: %d\n", ret); + connect_error(); + return ret; + } + } + + ret = connect_events(); + if (ret) + goto out; + + /* + * Pause to give SM chance to configure switches. We don't want to + * handle reliability issue in this simple test program. + */ + sleep(3); + + if (message_count) { + if (is_sender) { + printf("initiating data transfers\n"); + for (i = 0; i < connections; i++) { + ret = post_sends(&test.nodes[i], 0); + if (ret) + goto out; + } + } else { + printf("receiving data transfers\n"); + ret = poll_cqs(); + if (ret) + goto out; + } + printf("data transfers complete\n"); + } +out: + for (i = 0; i < connections; i++) { + ret = rdma_leave_multicast(test.nodes[i].cma_id, + test.dst_addr); + if (ret) + printf("mckey: failure leaving: %d\n", ret); + } + return ret; +} + +int main(int argc, char **argv) +{ + int op, ret; + + + while ((op = getopt(argc, argv, "m:M:sb:c:C:S:p:")) != -1) { + switch (op) { + case 'm': + dst_addr = optarg; + break; + case 'M': + unmapped_addr = 1; + dst_addr = optarg; + break; + case 's': + is_sender = 1; + break; + case 'b': + src_addr = optarg; + test.src_addr = (struct sockaddr *) &test.src_in; + break; + case 'c': + connections = atoi(optarg); + break; + case 'C': + message_count = atoi(optarg); + break; + case 'S': + message_size = atoi(optarg); + break; + case 'p': + port_space = strtol(optarg, NULL, 0); + break; + default: + printf("usage: %s\n", argv[0]); + printf("\t-m multicast_address\n"); + printf("\t[-M unmapped_multicast_address]\n" + "\t replaces -m and requires -b\n"); + printf("\t[-s(ender)]\n"); + printf("\t[-b bind_address]\n"); + printf("\t[-c connections]\n"); + printf("\t[-C message_count]\n"); + printf("\t[-S message_size]\n"); + printf("\t[-p port_space - %#x for UDP (default), " + "%#x for IPOIB]\n", RDMA_PS_UDP, RDMA_PS_IPOIB); + exit(1); + } + } + + test.dst_addr = (struct sockaddr *) &test.dst_in; + test.connects_left = connections; + + test.channel = rdma_create_event_channel(); + if (!test.channel) { + printf("failed to create event channel\n"); + exit(1); + } + + if (alloc_nodes()) + exit(1); + + ret = run(); + + printf("test complete\n"); + destroy_nodes(); + rdma_destroy_event_channel(test.channel); + + printf("return status %d\n", ret); + return ret; +} diff --git a/ulp/librdmacm/examples/rping/rping.c b/ulp/librdmacm/examples/rping/rping.c new file mode 100644 index 00000000..1e264feb --- /dev/null +++ b/ulp/librdmacm/examples/rping/rping.c @@ -0,0 +1,1122 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. + * Copyright (c) 2009 Intel Corp. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "..\..\..\..\etc\user\getopt.c" +#include + +#include + +static int debug = 0; +#define DEBUG_LOG if (debug) printf + +/* + * rping "ping/pong" loop: + * client sends source rkey/addr/len + * server receives source rkey/add/len + * server rdma reads "ping" data from source + * server sends "go ahead" on rdma read completion + * client sends sink rkey/addr/len + * server receives sink rkey/addr/len + * server rdma writes "pong" data to sink + * server sends "go ahead" on rdma write completion + * + */ + +/* + * These states are used to signal events between the completion handler + * and the main client or server thread. + * + * Once CONNECTED, they cycle through RDMA_READ_ADV, RDMA_WRITE_ADV, + * and RDMA_WRITE_COMPLETE for each ping. + */ +enum test_state { + IDLE = 1, + CONNECT_REQUEST, + ADDR_RESOLVED, + ROUTE_RESOLVED, + CONNECTED, + RDMA_READ_ADV, + RDMA_READ_COMPLETE, + RDMA_WRITE_ADV, + RDMA_WRITE_COMPLETE, + RDMA_ERROR +}; + +struct rping_rdma_info { + uint64_t buf; + uint32_t rkey; + uint32_t size; +}; + +#define RPING_SQ_DEPTH 16 + +#define RPING_MSG_FMT "rdma-ping-%d: " +#define RPING_MIN_BUFSIZE 16 + +/* + * Control block struct. + */ +struct rping_cb { + int server; /* 0 iff client */ + pthread_t cqthread; + struct ibv_comp_channel *channel; + struct ibv_cq *cq; + struct ibv_pd *pd; + struct ibv_qp *qp; + + struct ibv_recv_wr rq_wr; /* recv work request record */ + struct ibv_sge recv_sgl; /* recv single SGE */ + struct rping_rdma_info recv_buf;/* malloc'd buffer */ + struct ibv_mr *recv_mr; /* MR associated with this buffer */ + + struct ibv_send_wr sq_wr; /* send work request record */ + struct ibv_sge send_sgl; + struct rping_rdma_info send_buf;/* single send buf */ + struct ibv_mr *send_mr; + + struct ibv_send_wr rdma_sq_wr; /* rdma work request record */ + struct ibv_sge rdma_sgl; /* rdma single SGE */ + char *rdma_buf; /* used as rdma sink */ + struct ibv_mr *rdma_mr; + + uint32_t remote_rkey; /* remote guys RKEY */ + uint64_t remote_addr; /* remote guys TO */ + uint32_t remote_len; /* remote guys LEN */ + + char *start_buf; /* rdma read src */ + struct ibv_mr *start_mr; + + enum test_state state; /* used for cond/signalling */ +// sem_t sem; + + struct sockaddr_in sin; + uint16_t port; /* dst port in NBO */ + int verbose; /* verbose logging */ + int count; /* ping count */ + int size; /* ping data size */ + int validate; /* validate ping data */ + + /* CM stuff */ +// pthread_t cmthread; + struct rdma_event_channel *cm_channel; + struct rdma_cm_id *cm_id; /* connection on client side,*/ + /* listener on service side. */ + struct rdma_cm_id *child_cm_id; /* connection on server side */ +}; + +struct rping_cb *cb; +static void *cm_thread(void *arg); +static void *cq_thread(void *arg); + +static int rping_cma_event_handler(struct rdma_cm_id *cma_id, + struct rdma_cm_event *event) +{ + int ret = 0; + struct rping_cb *cb = cma_id->context; + + DEBUG_LOG("cma_event type %s cma_id %p (%s)\n", + rdma_event_str(event->event), cma_id, + (cma_id == cb->cm_id) ? "parent" : "child"); + + switch (event->event) { + case RDMA_CM_EVENT_ADDR_RESOLVED: + cb->state = ADDR_RESOLVED; + ret = rdma_resolve_route(cma_id, 2000); + if (ret) { + cb->state = RDMA_ERROR; + fprintf(stderr, "rdma_resolve_route error %d\n", ret); +// sem_post(&cb->sem); + } + break; + + case RDMA_CM_EVENT_ROUTE_RESOLVED: + cb->state = ROUTE_RESOLVED; +// sem_post(&cb->sem); + break; + + case RDMA_CM_EVENT_CONNECT_REQUEST: + cb->state = CONNECT_REQUEST; + cb->child_cm_id = cma_id; + DEBUG_LOG("child cma %p\n", cb->child_cm_id); +// sem_post(&cb->sem); + break; + + case RDMA_CM_EVENT_ESTABLISHED: + DEBUG_LOG("ESTABLISHED\n"); + + /* + * Server will wake up when first RECV completes. + */ + if (!cb->server) { + cb->state = CONNECTED; + } +// sem_post(&cb->sem); + break; + + case RDMA_CM_EVENT_ADDR_ERROR: + case RDMA_CM_EVENT_ROUTE_ERROR: + case RDMA_CM_EVENT_CONNECT_ERROR: + case RDMA_CM_EVENT_UNREACHABLE: + case RDMA_CM_EVENT_REJECTED: + fprintf(stderr, "cma event %s, error %d\n", + rdma_event_str(event->event), event->status); +// sem_post(&cb->sem); + ret = -1; + break; + + case RDMA_CM_EVENT_DISCONNECTED: + fprintf(stderr, "%s DISCONNECT EVENT...\n", + cb->server ? "server" : "client"); +// sem_post(&cb->sem); + break; + + case RDMA_CM_EVENT_DEVICE_REMOVAL: + fprintf(stderr, "cma detected device removal!!!!\n"); + ret = -1; + break; + + default: + fprintf(stderr, "unhandled event: %s, ignoring\n", + rdma_event_str(event->event)); + break; + } + + return ret; +} + +static int server_recv(struct rping_cb *cb, struct ibv_wc *wc) +{ + if (wc->byte_len != sizeof(cb->recv_buf)) { + fprintf(stderr, "Received bogus data, size %d\n", wc->byte_len); + return -1; + } + + cb->remote_rkey = ntohl(cb->recv_buf.rkey); + cb->remote_addr = ntohll(cb->recv_buf.buf); + cb->remote_len = ntohl(cb->recv_buf.size); + DEBUG_LOG("Received rkey %x addr %" PRIx64 " len %d from peer\n", + cb->remote_rkey, cb->remote_addr, cb->remote_len); + + if (cb->state <= CONNECTED || cb->state == RDMA_WRITE_COMPLETE) + cb->state = RDMA_READ_ADV; + else + cb->state = RDMA_WRITE_ADV; + + return 0; +} + +static int client_recv(struct rping_cb *cb, struct ibv_wc *wc) +{ + if (wc->byte_len != sizeof(cb->recv_buf)) { + fprintf(stderr, "Received bogus data, size %d\n", wc->byte_len); + return -1; + } + + if (cb->state == RDMA_READ_ADV) + cb->state = RDMA_WRITE_ADV; + else + cb->state = RDMA_WRITE_COMPLETE; + + return 0; +} + +static int rping_cq_event_handler(struct rping_cb *cb) +{ + struct ibv_wc wc; + struct ibv_recv_wr *bad_wr; + int ret; + + while ((ret = ibv_poll_cq(cb->cq, 1, &wc)) == 1) { + ret = 0; + + if (wc.status) { + fprintf(stderr, "cq completion failed status %d\n", + wc.status); + if (wc.status != IBV_WC_WR_FLUSH_ERR) + ret = -1; + goto error; + } + + switch (wc.opcode) { + case IBV_WC_SEND: + DEBUG_LOG("send completion\n"); + break; + + case IBV_WC_RDMA_WRITE: + DEBUG_LOG("rdma write completion\n"); + cb->state = RDMA_WRITE_COMPLETE; +// sem_post(&cb->sem); + break; + + case IBV_WC_RDMA_READ: + DEBUG_LOG("rdma read completion\n"); + cb->state = RDMA_READ_COMPLETE; +// sem_post(&cb->sem); + break; + + case IBV_WC_RECV: + DEBUG_LOG("recv completion\n"); + ret = cb->server ? server_recv(cb, &wc) : + client_recv(cb, &wc); + if (ret) { + fprintf(stderr, "recv wc error: %d\n", ret); + goto error; + } + + ret = ibv_post_recv(cb->qp, &cb->rq_wr, &bad_wr); + if (ret) { + fprintf(stderr, "post recv error: %d\n", ret); + goto error; + } +// sem_post(&cb->sem); + break; + + default: + DEBUG_LOG("unknown!!!!! completion\n"); + ret = -1; + goto error; + } + } + if (ret) { + fprintf(stderr, "poll error %d\n", ret); + goto error; + } + return 0; + +error: + cb->state = RDMA_ERROR; +// sem_post(&cb->sem); + return ret; +} + +static int rping_accept(struct rping_cb *cb) +{ + struct rdma_conn_param conn_param; + int ret; + + DEBUG_LOG("accepting client connection request\n"); + + memset(&conn_param, 0, sizeof conn_param); + conn_param.responder_resources = 1; + conn_param.initiator_depth = 1; + + ret = rdma_accept(cb->child_cm_id, &conn_param); + if (ret) { + fprintf(stderr, "rdma_accept error: %d\n", ret); + return ret; + } + + cm_thread(cb); +// sem_wait(&cb->sem); + if (cb->state == RDMA_ERROR) { + fprintf(stderr, "wait for CONNECTED state %d\n", cb->state); + return -1; + } + return 0; +} + +static void rping_setup_wr(struct rping_cb *cb) +{ + cb->recv_sgl.addr = (uint64_t) (unsigned long) &cb->recv_buf; + cb->recv_sgl.length = sizeof cb->recv_buf; + cb->recv_sgl.lkey = cb->recv_mr->lkey; + cb->rq_wr.sg_list = &cb->recv_sgl; + cb->rq_wr.num_sge = 1; + + cb->send_sgl.addr = (uint64_t) (unsigned long) &cb->send_buf; + cb->send_sgl.length = sizeof cb->send_buf; + cb->send_sgl.lkey = cb->send_mr->lkey; + + cb->sq_wr.opcode = IBV_WR_SEND; + cb->sq_wr.send_flags = IBV_SEND_SIGNALED; + cb->sq_wr.sg_list = &cb->send_sgl; + cb->sq_wr.num_sge = 1; + + cb->rdma_sgl.addr = (uint64_t) (unsigned long) cb->rdma_buf; + cb->rdma_sgl.lkey = cb->rdma_mr->lkey; + cb->rdma_sq_wr.send_flags = IBV_SEND_SIGNALED; + cb->rdma_sq_wr.sg_list = &cb->rdma_sgl; + cb->rdma_sq_wr.num_sge = 1; +} + +static int rping_setup_buffers(struct rping_cb *cb) +{ + int ret; + + DEBUG_LOG("rping_setup_buffers called on cb %p\n", cb); + + cb->recv_mr = ibv_reg_mr(cb->pd, &cb->recv_buf, sizeof cb->recv_buf, + IBV_ACCESS_LOCAL_WRITE); + if (!cb->recv_mr) { + fprintf(stderr, "recv_buf reg_mr failed\n"); + return errno; + } + + cb->send_mr = ibv_reg_mr(cb->pd, &cb->send_buf, sizeof cb->send_buf, 0); + if (!cb->send_mr) { + fprintf(stderr, "send_buf reg_mr failed\n"); + ret = errno; + goto err1; + } + + cb->rdma_buf = malloc(cb->size); + if (!cb->rdma_buf) { + fprintf(stderr, "rdma_buf malloc failed\n"); + ret = -ENOMEM; + goto err2; + } + + cb->rdma_mr = ibv_reg_mr(cb->pd, cb->rdma_buf, cb->size, + IBV_ACCESS_LOCAL_WRITE | + IBV_ACCESS_REMOTE_READ | + IBV_ACCESS_REMOTE_WRITE); + if (!cb->rdma_mr) { + fprintf(stderr, "rdma_buf reg_mr failed\n"); + ret = errno; + goto err3; + } + + if (!cb->server) { + cb->start_buf = malloc(cb->size); + if (!cb->start_buf) { + fprintf(stderr, "start_buf malloc failed\n"); + ret = -ENOMEM; + goto err4; + } + + cb->start_mr = ibv_reg_mr(cb->pd, cb->start_buf, cb->size, + IBV_ACCESS_LOCAL_WRITE | + IBV_ACCESS_REMOTE_READ | + IBV_ACCESS_REMOTE_WRITE); + if (!cb->start_mr) { + fprintf(stderr, "start_buf reg_mr failed\n"); + ret = errno; + goto err5; + } + } + + rping_setup_wr(cb); + DEBUG_LOG("allocated & registered buffers...\n"); + return 0; + +err5: + free(cb->start_buf); +err4: + ibv_dereg_mr(cb->rdma_mr); +err3: + free(cb->rdma_buf); +err2: + ibv_dereg_mr(cb->send_mr); +err1: + ibv_dereg_mr(cb->recv_mr); + return ret; +} + +static void rping_free_buffers(struct rping_cb *cb) +{ + DEBUG_LOG("rping_free_buffers called on cb %p\n", cb); + ibv_dereg_mr(cb->recv_mr); + ibv_dereg_mr(cb->send_mr); + ibv_dereg_mr(cb->rdma_mr); + free(cb->rdma_buf); + if (!cb->server) { + ibv_dereg_mr(cb->start_mr); + free(cb->start_buf); + } +} + +static int rping_create_qp(struct rping_cb *cb) +{ + struct ibv_qp_init_attr init_attr; + int ret; + + memset(&init_attr, 0, sizeof(init_attr)); + init_attr.cap.max_send_wr = RPING_SQ_DEPTH; + init_attr.cap.max_recv_wr = 2; + init_attr.cap.max_recv_sge = 1; + init_attr.cap.max_send_sge = 1; + init_attr.qp_type = IBV_QPT_RC; + init_attr.send_cq = cb->cq; + init_attr.recv_cq = cb->cq; + + if (cb->server) { + ret = rdma_create_qp(cb->child_cm_id, cb->pd, &init_attr); + if (!ret) + cb->qp = cb->child_cm_id->qp; + } else { + ret = rdma_create_qp(cb->cm_id, cb->pd, &init_attr); + if (!ret) + cb->qp = cb->cm_id->qp; + } + + return ret; +} + +static void rping_free_qp(struct rping_cb *cb) +{ + ibv_destroy_qp(cb->qp); + ibv_destroy_cq(cb->cq); + ibv_destroy_comp_channel(cb->channel); + ibv_dealloc_pd(cb->pd); +} + +static int rping_setup_qp(struct rping_cb *cb, struct rdma_cm_id *cm_id) +{ + int ret; + + cb->pd = ibv_alloc_pd(cm_id->verbs); + if (!cb->pd) { + fprintf(stderr, "ibv_alloc_pd failed\n"); + return errno; + } + DEBUG_LOG("created pd %p\n", cb->pd); + + cb->channel = ibv_create_comp_channel(cm_id->verbs); + if (!cb->channel) { + fprintf(stderr, "ibv_create_comp_channel failed\n"); + ret = errno; + goto err1; + } + DEBUG_LOG("created channel %p\n", cb->channel); + + cb->cq = ibv_create_cq(cm_id->verbs, RPING_SQ_DEPTH * 2, cb, + cb->channel, 0); + if (!cb->cq) { + fprintf(stderr, "ibv_create_cq failed\n"); + ret = errno; + goto err2; + } + DEBUG_LOG("created cq %p\n", cb->cq); + + ret = ibv_req_notify_cq(cb->cq, 0); + if (ret) { + fprintf(stderr, "ibv_create_cq failed\n"); + ret = errno; + goto err3; + } + + ret = rping_create_qp(cb); + if (ret) { + fprintf(stderr, "rping_create_qp failed: %d\n", ret); + goto err3; + } + DEBUG_LOG("created qp %p\n", cb->qp); + return 0; + +err3: + ibv_destroy_cq(cb->cq); +err2: + ibv_destroy_comp_channel(cb->channel); +err1: + ibv_dealloc_pd(cb->pd); + return ret; +} + +static void cm_thread(void *arg) +{ + struct rping_cb *cb = arg; + struct rdma_cm_event *event; + int ret; + +// while (1) { + ret = rdma_get_cm_event(cb->cm_channel, &event); + if (ret) { + fprintf(stderr, "rdma_get_cm_event err %d\n", ret); + return; +// exit(ret); + } + ret = rping_cma_event_handler(event->id, event); + rdma_ack_cm_event(event); +// if (ret) +// exit(ret); +// } +} + +static void cq_thread(void *arg) +{ + struct rping_cb *cb = arg; + struct ibv_cq *ev_cq; + void *ev_ctx; + int ret; + +// DEBUG_LOG("cq_thread started.\n"); + +// while (1) { +// pthread_testcancel(); + + ret = ibv_get_cq_event(cb->channel, &ev_cq, &ev_ctx); + if (ret) { + fprintf(stderr, "Failed to get cq event!\n"); + return; +// pthread_exit(NULL); + } + if (ev_cq != cb->cq) { + fprintf(stderr, "Unknown CQ!\n"); + return; +// pthread_exit(NULL); + } + ret = ibv_req_notify_cq(cb->cq, 0); + if (ret) { + fprintf(stderr, "Failed to set notify!\n"); + return; +// pthread_exit(NULL); + } + ret = rping_cq_event_handler(cb); + ibv_ack_cq_events(cb->cq, 1); +// pthread_exit(NULL); +// } +} + +static void rping_format_send(struct rping_cb *cb, char *buf, struct ibv_mr *mr) +{ + struct rping_rdma_info *info = &cb->send_buf; + + info->buf = htonll((uint64_t) (unsigned long) buf); + info->rkey = htonl(mr->rkey); + info->size = htonl(cb->size); + + DEBUG_LOG("RDMA addr %" PRIx64" rkey %x len %d\n", + ntohll(info->buf), ntohl(info->rkey), ntohl(info->size)); +} + +static int rping_test_server(struct rping_cb *cb) +{ + struct ibv_send_wr *bad_wr; + int ret; + + while (1) { + /* Wait for client's Start STAG/TO/Len */ + cq_thread(cb); +// sem_wait(&cb->sem); + if (cb->state != RDMA_READ_ADV) { + fprintf(stderr, "wait for RDMA_READ_ADV state %d\n", + cb->state); + ret = -1; + break; + } + + DEBUG_LOG("server received sink adv\n"); + + /* Issue RDMA Read. */ + cb->rdma_sq_wr.opcode = IBV_WR_RDMA_READ; + cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; + cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; + cb->rdma_sq_wr.sg_list->length = cb->remote_len; + + ret = ibv_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr); + if (ret) { + fprintf(stderr, "post send error %d\n", ret); + break; + } + DEBUG_LOG("server posted rdma read req \n"); + + /* Wait for read completion */ + cq_thread(cb); +// sem_wait(&cb->sem); + if (cb->state != RDMA_READ_COMPLETE) { + fprintf(stderr, "wait for RDMA_READ_COMPLETE state %d\n", + cb->state); + ret = -1; + break; + } + DEBUG_LOG("server received read complete\n"); + + /* Display data in recv buf */ + if (cb->verbose) + printf("server ping data: %s\n", cb->rdma_buf); + + /* Tell client to continue */ + ret = ibv_post_send(cb->qp, &cb->sq_wr, &bad_wr); + if (ret) { + fprintf(stderr, "post send error %d\n", ret); + break; + } + DEBUG_LOG("server posted go ahead\n"); + + /* Wait for client's RDMA STAG/TO/Len */ + cq_thread(cb); +// sem_wait(&cb->sem); + if (cb->state != RDMA_WRITE_ADV) { + fprintf(stderr, "wait for RDMA_WRITE_ADV state %d\n", + cb->state); + ret = -1; + break; + } + DEBUG_LOG("server received sink adv\n"); + + /* RDMA Write echo data */ + cb->rdma_sq_wr.opcode = IBV_WR_RDMA_WRITE; + cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; + cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; + cb->rdma_sq_wr.sg_list->length = strlen(cb->rdma_buf) + 1; + DEBUG_LOG("rdma write from lkey %x laddr %" PRIx64 " len %d\n", + cb->rdma_sq_wr.sg_list->lkey, + cb->rdma_sq_wr.sg_list->addr, + cb->rdma_sq_wr.sg_list->length); + + ret = ibv_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr); + if (ret) { + fprintf(stderr, "post send error %d\n", ret); + break; + } + + /* Wait for completion */ + cq_thread(cb); +// ret = sem_wait(&cb->sem); + if (cb->state != RDMA_WRITE_COMPLETE) { + fprintf(stderr, "wait for RDMA_WRITE_COMPLETE state %d\n", + cb->state); + ret = -1; + break; + } + DEBUG_LOG("server rdma write complete \n"); + + /* Tell client to begin again */ + ret = ibv_post_send(cb->qp, &cb->sq_wr, &bad_wr); + if (ret) { + fprintf(stderr, "post send error %d\n", ret); + break; + } + DEBUG_LOG("server posted go ahead\n"); + } + + return ret; +} + +static int rping_bind_server(struct rping_cb *cb) +{ + int ret; + + cb->sin.sin_port = cb->port; + ret = rdma_bind_addr(cb->cm_id, (struct sockaddr *) &cb->sin); + if (ret) { + fprintf(stderr, "rdma_bind_addr error %d\n", ret); + return ret; + } + DEBUG_LOG("rdma_bind_addr successful\n"); + + DEBUG_LOG("rdma_listen\n"); + ret = rdma_listen(cb->cm_id, 3); + if (ret) { + fprintf(stderr, "rdma_listen failed: %d\n", ret); + return ret; + } + + return 0; +} + +static struct rping_cb *clone_cb(struct rping_cb *listening_cb) +{ + struct rping_cb *cb = malloc(sizeof *cb); + if (!cb) + return NULL; + *cb = *listening_cb; + cb->child_cm_id->context = cb; + return cb; +} + +static void free_cb(struct rping_cb *cb) +{ + free(cb); +} + +static int rping_run_server(struct rping_cb *cb) +{ + struct ibv_recv_wr *bad_wr; + int ret; + + ret = rping_bind_server(cb); + if (ret) + return ret; + + cm_thread(cb); +// sem_wait(&cb->sem); + if (cb->state != CONNECT_REQUEST) { + fprintf(stderr, "wait for CONNECT_REQUEST state %d\n", + cb->state); + return -1; + } + + ret = rping_setup_qp(cb, cb->child_cm_id); + if (ret) { + fprintf(stderr, "setup_qp failed: %d\n", ret); + return ret; + } + + ret = rping_setup_buffers(cb); + if (ret) { + fprintf(stderr, "rping_setup_buffers failed: %d\n", ret); + goto err1; + } + + ret = ibv_post_recv(cb->qp, &cb->rq_wr, &bad_wr); + if (ret) { + fprintf(stderr, "ibv_post_recv failed: %d\n", ret); + goto err2; + } + +// pthread_create(&cb->cqthread, NULL, cq_thread, cb); + + ret = rping_accept(cb); + if (ret) { + fprintf(stderr, "connect error %d\n", ret); + goto err2; + } + + rping_test_server(cb); + rdma_disconnect(cb->child_cm_id); + rdma_destroy_id(cb->child_cm_id); +err2: + rping_free_buffers(cb); +err1: + rping_free_qp(cb); + + return ret; +} + +static int rping_test_client(struct rping_cb *cb) +{ + int ping, start, cc, i, ret = 0; + struct ibv_send_wr *bad_wr; + unsigned char c; + + start = 65; + for (ping = 0; !cb->count || ping < cb->count; ping++) { + cb->state = RDMA_READ_ADV; + + /* Put some ascii text in the buffer. */ + cc = sprintf(cb->start_buf, RPING_MSG_FMT, ping); + for (i = cc, c = start; i < cb->size; i++) { + cb->start_buf[i] = c; + c++; + if (c > 122) + c = 65; + } + start++; + if (start > 122) + start = 65; + cb->start_buf[cb->size - 1] = 0; + + rping_format_send(cb, cb->start_buf, cb->start_mr); + ret = ibv_post_send(cb->qp, &cb->sq_wr, &bad_wr); + if (ret) { + fprintf(stderr, "post send error %d\n", ret); + break; + } + + /* Wait for server to ACK */ + cq_thread(cb); +// sem_wait(&cb->sem); + if (cb->state != RDMA_WRITE_ADV) { + fprintf(stderr, "wait for RDMA_WRITE_ADV state %d\n", + cb->state); + ret = -1; + break; + } + + rping_format_send(cb, cb->rdma_buf, cb->rdma_mr); + ret = ibv_post_send(cb->qp, &cb->sq_wr, &bad_wr); + if (ret) { + fprintf(stderr, "post send error %d\n", ret); + break; + } + + /* Wait for the server to say the RDMA Write is complete. */ + cq_thread(cb); +// sem_wait(&cb->sem); + if (cb->state != RDMA_WRITE_COMPLETE) { + fprintf(stderr, "wait for RDMA_WRITE_COMPLETE state %d\n", + cb->state); + ret = -1; + break; + } + + if (cb->validate) + if (memcmp(cb->start_buf, cb->rdma_buf, cb->size)) { + fprintf(stderr, "data mismatch!\n"); + ret = -1; + break; + } + + if (cb->verbose) + printf("ping data: %s\n", cb->rdma_buf); + } + + return ret; +} + +static int rping_connect_client(struct rping_cb *cb) +{ + struct rdma_conn_param conn_param; + int ret; + + memset(&conn_param, 0, sizeof conn_param); + conn_param.responder_resources = 1; + conn_param.initiator_depth = 1; + conn_param.retry_count = 10; + + ret = rdma_connect(cb->cm_id, &conn_param); + if (ret) { + fprintf(stderr, "rdma_connect error %d\n", ret); + return ret; + } + + cm_thread(cb); +// sem_wait(&cb->sem); + if (cb->state != CONNECTED) { + fprintf(stderr, "wait for CONNECTED state %d\n", cb->state); + return -1; + } + + DEBUG_LOG("rmda_connect successful\n"); + return 0; +} + +static int rping_bind_client(struct rping_cb *cb) +{ + int ret; + + cb->sin.sin_port = cb->port; + ret = rdma_resolve_addr(cb->cm_id, NULL, (struct sockaddr *) &cb->sin, 2000); + if (ret) { + fprintf(stderr, "rdma_resolve_addr error %d\n", ret); + return ret; + } + + cm_thread(cb); +// sem_wait(&cb->sem); + if (cb->state != ROUTE_RESOLVED) { + fprintf(stderr, "waiting for addr/route resolution state %d\n", + cb->state); + return -1; + } + + DEBUG_LOG("rdma_resolve_addr - rdma_resolve_route successful\n"); + return 0; +} + +static int rping_run_client(struct rping_cb *cb) +{ + struct ibv_recv_wr *bad_wr; + int ret; + + ret = rping_bind_client(cb); + if (ret) + return ret; + + ret = rping_setup_qp(cb, cb->cm_id); + if (ret) { + fprintf(stderr, "setup_qp failed: %d\n", ret); + return ret; + } + + ret = rping_setup_buffers(cb); + if (ret) { + fprintf(stderr, "rping_setup_buffers failed: %d\n", ret); + goto err1; + } + + ret = ibv_post_recv(cb->qp, &cb->rq_wr, &bad_wr); + if (ret) { + fprintf(stderr, "ibv_post_recv failed: %d\n", ret); + goto err2; + } + +// pthread_create(&cb->cqthread, NULL, cq_thread, cb); + + ret = rping_connect_client(cb); + if (ret) { + fprintf(stderr, "connect error %d\n", ret); + goto err2; + } + + rping_test_client(cb); + rdma_disconnect(cb->cm_id); +err2: + rping_free_buffers(cb); +err1: + rping_free_qp(cb); + + return ret; +} + +static int get_addr(char *dst, struct sockaddr_in *addr) +{ + struct addrinfo *res; + int ret; + + ret = getaddrinfo(dst, NULL, NULL, &res); + if (ret) { + printf("getaddrinfo failed - invalid hostname or IP address\n"); + return ret; + } + + if (res->ai_family != PF_INET) { + ret = -1; + goto out; + } + + *addr = *(struct sockaddr_in *) res->ai_addr; +out: + freeaddrinfo(res); + return ret; +} + +static void usage() +{ + printf("rdma_rping -s [-vVd] [-S size] [-C count] [-a addr] [-p port]\n"); + printf("rdma_rping -c [-vVd] [-S size] [-C count] -a addr [-p port]\n"); + printf("\t-c\t\tclient side\n"); + printf("\t-s\t\tserver side\n"); + printf("\t-v\t\tdisplay ping data to stdout\n"); + printf("\t-V\t\tvalidate ping data\n"); + printf("\t-d\t\tdebug printfs\n"); + printf("\t-S size \tping data size\n"); + printf("\t-C count\tping count times\n"); + printf("\t-a addr\t\taddress\n"); + printf("\t-p port\t\tport\n"); +} + +int main(int argc, char *argv[]) +{ +// struct rping_cb *cb; + int op; + int ret = 0; + + cb = malloc(sizeof(*cb)); + if (!cb) + return -ENOMEM; + + memset(cb, 0, sizeof(*cb)); + cb->server = -1; + cb->state = IDLE; + cb->size = 64; + cb->sin.sin_family = PF_INET; + cb->port = htons(7174); +// sem_init(&cb->sem, 0, 0); + + opterr = 0; + while ((op=getopt(argc, argv, "a:Pp:C:S:t:scvVd")) != -1) { + switch (op) { + case 'a': + ret = get_addr(optarg, &cb->sin); + break; + case 'p': + cb->port = htons(atoi(optarg)); + DEBUG_LOG("port %d\n", (int) atoi(optarg)); + break; + case 's': + cb->server = 1; + DEBUG_LOG("server\n"); + break; + case 'c': + cb->server = 0; + DEBUG_LOG("client\n"); + break; + case 'S': + cb->size = atoi(optarg); + if (cb->size < RPING_MIN_BUFSIZE) { + fprintf(stderr, "Invalid size (minimum is %d) " RPING_MIN_BUFSIZE); + ret = EINVAL; + } else + DEBUG_LOG("size %d\n", (int) atoi(optarg)); + break; + case 'C': + cb->count = atoi(optarg); + if (cb->count < 0) { + fprintf(stderr, "Invalid count %d\n", cb->count); + ret = EINVAL; + } else + DEBUG_LOG("count %d\n", (int) cb->count); + break; + case 'v': + cb->verbose++; + DEBUG_LOG("verbose\n"); + break; + case 'V': + cb->validate++; + DEBUG_LOG("validate data\n"); + break; + case 'd': + debug++; + break; + default: + usage(); + ret = EINVAL; + goto out; + } + } + if (ret) + goto out; + + if (cb->server == -1) { + usage(); + ret = EINVAL; + goto out; + } + + cb->cm_channel = rdma_create_event_channel(); + if (!cb->cm_channel) { + ret = errno; + fprintf(stderr, "rdma_create_event_channel error %d\n", ret); + goto out; + } + + ret = rdma_create_id(cb->cm_channel, &cb->cm_id, cb, RDMA_PS_TCP); + if (ret) { + ret = errno; + fprintf(stderr, "rdma_create_id error %d\n", ret); + goto out2; + } + DEBUG_LOG("created cm_id %p\n", cb->cm_id); + +// pthread_create(&cb->cmthread, NULL, cm_thread, cb); + + if (cb->server) + ret = rping_run_server(cb); + else + ret = rping_run_client(cb); + + DEBUG_LOG("destroy cm_id %p\n", cb->cm_id); + rdma_destroy_id(cb->cm_id); +out2: + rdma_destroy_event_channel(cb->cm_channel); +out: + free(cb); + return ret; +} diff --git a/ulp/librdmacm/examples/udaddy/udaddy.c b/ulp/librdmacm/examples/udaddy/udaddy.c new file mode 100644 index 00000000..9e74a228 --- /dev/null +++ b/ulp/librdmacm/examples/udaddy/udaddy.c @@ -0,0 +1,701 @@ +/* + * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +struct cmatest_node { + int id; + struct rdma_cm_id *cma_id; + int connected; + struct ibv_pd *pd; + struct ibv_cq *cq; + struct ibv_mr *mr; + struct ibv_ah *ah; + uint32_t remote_qpn; + uint32_t remote_qkey; + void *mem; +}; + +struct cmatest { + struct rdma_event_channel *channel; + struct cmatest_node *nodes; + int conn_index; + int connects_left; + + struct sockaddr_in dst_in; + struct sockaddr *dst_addr; + struct sockaddr_in src_in; + struct sockaddr *src_addr; +}; + +static struct cmatest test; +static int connections = 1; +static int message_size = 100; +static int message_count = 10; +static uint16_t port = 7174; +static uint8_t set_tos = 0; +static uint8_t tos; +static char *dst_addr; +static char *src_addr; +static enum rdma_port_space port_space = RDMA_PS_UDP; + +static int create_message(struct cmatest_node *node) +{ + if (!message_size) + message_count = 0; + + if (!message_count) + return 0; + + node->mem = malloc(message_size + sizeof(struct ibv_grh)); + if (!node->mem) { + printf("failed message allocation\n"); + return -1; + } + node->mr = ibv_reg_mr(node->pd, node->mem, + message_size + sizeof(struct ibv_grh), + IBV_ACCESS_LOCAL_WRITE); + if (!node->mr) { + printf("failed to reg MR\n"); + goto err; + } + return 0; +err: + free(node->mem); + return -1; +} + +static int verify_test_params(struct cmatest_node *node) +{ + struct ibv_port_attr port_attr; + int ret; + + ret = ibv_query_port(node->cma_id->verbs, node->cma_id->port_num, + &port_attr); + if (ret) + return ret; + + if (message_count && message_size > (1 << (port_attr.active_mtu + 7))) { + printf("udaddy: message_size %d is larger than active mtu %d\n", + message_size, 1 << (port_attr.active_mtu + 7)); + return -EINVAL; + } + + return 0; +} + +static int init_node(struct cmatest_node *node) +{ + struct ibv_qp_init_attr init_qp_attr; + int cqe, ret; + + node->pd = ibv_alloc_pd(node->cma_id->verbs); + if (!node->pd) { + ret = -ENOMEM; + printf("udaddy: unable to allocate PD\n"); + goto out; + } + + cqe = message_count ? message_count * 2 : 2; + node->cq = ibv_create_cq(node->cma_id->verbs, cqe, node, 0, 0); + if (!node->cq) { + ret = -ENOMEM; + printf("udaddy: unable to create CQ\n"); + goto out; + } + + memset(&init_qp_attr, 0, sizeof init_qp_attr); + init_qp_attr.cap.max_send_wr = message_count ? message_count : 1; + init_qp_attr.cap.max_recv_wr = message_count ? message_count : 1; + init_qp_attr.cap.max_send_sge = 1; + init_qp_attr.cap.max_recv_sge = 1; + init_qp_attr.qp_context = node; + init_qp_attr.sq_sig_all = 0; + init_qp_attr.qp_type = IBV_QPT_UD; + init_qp_attr.send_cq = node->cq; + init_qp_attr.recv_cq = node->cq; + ret = rdma_create_qp(node->cma_id, node->pd, &init_qp_attr); + if (ret) { + printf("udaddy: unable to create QP: %d\n", ret); + goto out; + } + + ret = create_message(node); + if (ret) { + printf("udaddy: failed to create messages: %d\n", ret); + goto out; + } +out: + return ret; +} + +static int post_recvs(struct cmatest_node *node) +{ + struct ibv_recv_wr recv_wr, *recv_failure; + struct ibv_sge sge; + int i, ret = 0; + + if (!message_count) + return 0; + + recv_wr.next = NULL; + recv_wr.sg_list = &sge; + recv_wr.num_sge = 1; + recv_wr.wr_id = (uintptr_t) node; + + sge.length = message_size + sizeof(struct ibv_grh); + sge.lkey = node->mr->lkey; + sge.addr = (uintptr_t) node->mem; + + for (i = 0; i < message_count && !ret; i++ ) { + ret = ibv_post_recv(node->cma_id->qp, &recv_wr, &recv_failure); + if (ret) { + printf("failed to post receives: %d\n", ret); + break; + } + } + return ret; +} + +static int post_sends(struct cmatest_node *node, int signal_flag) +{ + struct ibv_send_wr send_wr, *bad_send_wr; + struct ibv_sge sge; + int i, ret = 0; + + if (!node->connected || !message_count) + return 0; + + send_wr.next = NULL; + send_wr.sg_list = &sge; + send_wr.num_sge = 1; + send_wr.opcode = IBV_WR_SEND_WITH_IMM; + send_wr.send_flags = signal_flag; + send_wr.wr_id = (unsigned long)node; + send_wr.imm_data = htonl(node->cma_id->qp->qp_num); + + send_wr.wr.ud.ah = node->ah; + send_wr.wr.ud.remote_qpn = node->remote_qpn; + send_wr.wr.ud.remote_qkey = node->remote_qkey; + + sge.length = message_size; + sge.lkey = node->mr->lkey; + sge.addr = (uintptr_t) node->mem; + + for (i = 0; i < message_count && !ret; i++) { + ret = ibv_post_send(node->cma_id->qp, &send_wr, &bad_send_wr); + if (ret) + printf("failed to post sends: %d\n", ret); + } + return ret; +} + +static void connect_error(void) +{ + test.connects_left--; +} + +static int addr_handler(struct cmatest_node *node) +{ + int ret; + + if (set_tos) { + ret = rdma_set_option(node->cma_id, RDMA_OPTION_ID, + RDMA_OPTION_ID_TOS, &tos, sizeof tos); + if (ret) + printf("udaddy: set TOS option failed: %d\n", ret); + } + + ret = rdma_resolve_route(node->cma_id, 2000); + if (ret) { + printf("udaddy: resolve route failed: %d\n", ret); + connect_error(); + } + return ret; +} + +static int route_handler(struct cmatest_node *node) +{ + struct rdma_conn_param conn_param; + int ret; + + ret = verify_test_params(node); + if (ret) + goto err; + + ret = init_node(node); + if (ret) + goto err; + + ret = post_recvs(node); + if (ret) + goto err; + + memset(&conn_param, 0, sizeof conn_param); + ret = rdma_connect(node->cma_id, &conn_param); + if (ret) { + printf("udaddy: failure connecting: %d\n", ret); + goto err; + } + return 0; +err: + connect_error(); + return ret; +} + +static int connect_handler(struct rdma_cm_id *cma_id) +{ + struct cmatest_node *node; + struct rdma_conn_param conn_param; + int ret; + + if (test.conn_index == connections) { + ret = -ENOMEM; + goto err1; + } + node = &test.nodes[test.conn_index++]; + + node->cma_id = cma_id; + cma_id->context = node; + + ret = verify_test_params(node); + if (ret) + goto err2; + + ret = init_node(node); + if (ret) + goto err2; + + ret = post_recvs(node); + if (ret) + goto err2; + + memset(&conn_param, 0, sizeof conn_param); + conn_param.qp_num = node->cma_id->qp->qp_num; + ret = rdma_accept(node->cma_id, &conn_param); + if (ret) { + printf("udaddy: failure accepting: %d\n", ret); + goto err2; + } + node->connected = 1; + test.connects_left--; + return 0; + +err2: + node->cma_id = NULL; + connect_error(); +err1: + printf("udaddy: failing connection request\n"); + rdma_reject(cma_id, NULL, 0); + return ret; +} + +static int resolved_handler(struct cmatest_node *node, + struct rdma_cm_event *event) +{ + node->remote_qpn = event->param.ud.qp_num; + node->remote_qkey = event->param.ud.qkey; + node->ah = ibv_create_ah(node->pd, &event->param.ud.ah_attr); + if (!node->ah) { + printf("udaddy: failure creating address handle\n"); + goto err; + } + + node->connected = 1; + test.connects_left--; + return 0; +err: + connect_error(); + return -1; +} + +static int cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) +{ + int ret = 0; + + switch (event->event) { + case RDMA_CM_EVENT_ADDR_RESOLVED: + ret = addr_handler(cma_id->context); + break; + case RDMA_CM_EVENT_ROUTE_RESOLVED: + ret = route_handler(cma_id->context); + break; + case RDMA_CM_EVENT_CONNECT_REQUEST: + ret = connect_handler(cma_id); + break; + case RDMA_CM_EVENT_ESTABLISHED: + ret = resolved_handler(cma_id->context, event); + break; + case RDMA_CM_EVENT_ADDR_ERROR: + case RDMA_CM_EVENT_ROUTE_ERROR: + case RDMA_CM_EVENT_CONNECT_ERROR: + case RDMA_CM_EVENT_UNREACHABLE: + case RDMA_CM_EVENT_REJECTED: + printf("udaddy: event: %s, error: %d\n", + rdma_event_str(event->event), event->status); + connect_error(); + ret = event->status; + break; + case RDMA_CM_EVENT_DEVICE_REMOVAL: + /* Cleanup will occur after test completes. */ + break; + default: + break; + } + return ret; +} + +static void destroy_node(struct cmatest_node *node) +{ + if (!node->cma_id) + return; + + if (node->ah) + ibv_destroy_ah(node->ah); + + if (node->cma_id->qp) + rdma_destroy_qp(node->cma_id); + + if (node->cq) + ibv_destroy_cq(node->cq); + + if (node->mem) { + ibv_dereg_mr(node->mr); + free(node->mem); + } + + if (node->pd) + ibv_dealloc_pd(node->pd); + + /* Destroy the RDMA ID after all device resources */ + rdma_destroy_id(node->cma_id); +} + +static int alloc_nodes(void) +{ + int ret, i; + + test.nodes = malloc(sizeof *test.nodes * connections); + if (!test.nodes) { + printf("udaddy: unable to allocate memory for test nodes\n"); + return -ENOMEM; + } + memset(test.nodes, 0, sizeof *test.nodes * connections); + + for (i = 0; i < connections; i++) { + test.nodes[i].id = i; + if (dst_addr) { + ret = rdma_create_id(test.channel, + &test.nodes[i].cma_id, + &test.nodes[i], port_space); + if (ret) + goto err; + } + } + return 0; +err: + while (--i >= 0) + rdma_destroy_id(test.nodes[i].cma_id); + free(test.nodes); + return ret; +} + +static void destroy_nodes(void) +{ + int i; + + for (i = 0; i < connections; i++) + destroy_node(&test.nodes[i]); + free(test.nodes); +} + +static void create_reply_ah(struct cmatest_node *node, struct ibv_wc *wc) +{ + struct ibv_qp_attr attr; + struct ibv_qp_init_attr init_attr; + + node->ah = ibv_create_ah_from_wc(node->pd, wc, node->mem, + node->cma_id->port_num); + node->remote_qpn = ntohl(wc->imm_data); + + ibv_query_qp(node->cma_id->qp, &attr, IBV_QP_QKEY, &init_attr); + node->remote_qkey = attr.qkey; +} + +static int poll_cqs(void) +{ + struct ibv_wc wc[8]; + int done, i, ret; + + for (i = 0; i < connections; i++) { + if (!test.nodes[i].connected) + continue; + + for (done = 0; done < message_count; done += ret) { + ret = ibv_poll_cq(test.nodes[i].cq, 8, wc); + if (ret < 0) { + printf("udaddy: failed polling CQ: %d\n", ret); + return ret; + } + + if (ret && !test.nodes[i].ah) + create_reply_ah(&test.nodes[i], wc); + } + } + return 0; +} + +static int connect_events(void) +{ + struct rdma_cm_event *event; + int ret = 0; + + while (test.connects_left && !ret) { + ret = rdma_get_cm_event(test.channel, &event); + if (!ret) { + ret = cma_handler(event->id, event); + rdma_ack_cm_event(event); + } + } + return ret; +} + +static int get_addr(char *dst, struct sockaddr_in *addr) +{ + struct addrinfo *res; + int ret; + + ret = getaddrinfo(dst, NULL, NULL, &res); + if (ret) { + printf("getaddrinfo failed - invalid hostname or IP address\n"); + return ret; + } + + if (res->ai_family != PF_INET) { + ret = -1; + goto out; + } + + *addr = *(struct sockaddr_in *) res->ai_addr; +out: + freeaddrinfo(res); + return ret; +} + +static int run_server(void) +{ + struct rdma_cm_id *listen_id; + int i, ret; + + printf("udaddy: starting server\n"); + ret = rdma_create_id(test.channel, &listen_id, &test, port_space); + if (ret) { + printf("udaddy: listen request failed\n"); + return ret; + } + + if (src_addr) { + ret = get_addr(src_addr, &test.src_in); + if (ret) + goto out; + } else + test.src_in.sin_family = PF_INET; + + test.src_in.sin_port = port; + ret = rdma_bind_addr(listen_id, test.src_addr); + if (ret) { + printf("udaddy: bind address failed: %d\n", ret); + return ret; + } + + ret = rdma_listen(listen_id, 0); + if (ret) { + printf("udaddy: failure trying to listen: %d\n", ret); + goto out; + } + + connect_events(); + + if (message_count) { + printf("receiving data transfers\n"); + ret = poll_cqs(); + if (ret) + goto out; + + printf("sending replies\n"); + for (i = 0; i < connections; i++) { + ret = post_sends(&test.nodes[i], IBV_SEND_SIGNALED); + if (ret) + goto out; + } + + ret = poll_cqs(); + if (ret) + goto out; + printf("data transfers complete\n"); + } +out: + rdma_destroy_id(listen_id); + return ret; +} + +static int run_client(void) +{ + int i, ret; + + printf("udaddy: starting client\n"); + if (src_addr) { + ret = get_addr(src_addr, &test.src_in); + if (ret) + return ret; + } + + ret = get_addr(dst_addr, &test.dst_in); + if (ret) + return ret; + + test.dst_in.sin_port = port; + + printf("udaddy: connecting\n"); + for (i = 0; i < connections; i++) { + ret = rdma_resolve_addr(test.nodes[i].cma_id, + src_addr ? test.src_addr : NULL, + test.dst_addr, 2000); + if (ret) { + printf("udaddy: failure getting addr: %d\n", ret); + connect_error(); + return ret; + } + } + + ret = connect_events(); + if (ret) + goto out; + + if (message_count) { + printf("initiating data transfers\n"); + for (i = 0; i < connections; i++) { + ret = post_sends(&test.nodes[i], 0); + if (ret) + goto out; + } + printf("receiving data transfers\n"); + ret = poll_cqs(); + if (ret) + goto out; + + printf("data transfers complete\n"); + } +out: + return ret; +} + +int main(int argc, char **argv) +{ + int op, ret; + + while ((op = getopt(argc, argv, "s:b:c:C:S:t:p:")) != -1) { + switch (op) { + case 's': + dst_addr = optarg; + break; + case 'b': + src_addr = optarg; + break; + case 'c': + connections = atoi(optarg); + break; + case 'C': + message_count = atoi(optarg); + break; + case 'S': + message_size = atoi(optarg); + break; + case 't': + set_tos = 1; + tos = (uint8_t) atoi(optarg); + break; + case 'p': + port_space = strtol(optarg, NULL, 0); + break; + default: + printf("usage: %s\n", argv[0]); + printf("\t[-s server_address]\n"); + printf("\t[-b bind_address]\n"); + printf("\t[-c connections]\n"); + printf("\t[-C message_count]\n"); + printf("\t[-S message_size]\n"); + printf("\t[-t type_of_service]\n"); + printf("\t[-p port_space - %#x for UDP (default), " + "%#x for IPOIB]\n", RDMA_PS_UDP, RDMA_PS_IPOIB); + exit(1); + } + } + + test.dst_addr = (struct sockaddr *) &test.dst_in; + test.src_addr = (struct sockaddr *) &test.src_in; + test.connects_left = connections; + + test.channel = rdma_create_event_channel(); + if (!test.channel) { + printf("failed to create event channel\n"); + exit(1); + } + + if (alloc_nodes()) + exit(1); + + if (dst_addr) + ret = run_client(); + else + ret = run_server(); + + printf("test complete\n"); + destroy_nodes(); + rdma_destroy_event_channel(test.channel); + + printf("return status %d\n", ret); + return ret; +} diff --git a/ulp/librdmacm/include/rdma/rdma_cma.h b/ulp/librdmacm/include/rdma/rdma_cma.h new file mode 100644 index 00000000..3a5a4bb4 --- /dev/null +++ b/ulp/librdmacm/include/rdma/rdma_cma.h @@ -0,0 +1,637 @@ +/* + * Copyright (c) 2005-2009 Intel Corporation. All rights reserved. + * + * This software is available to you under the OpenFabrics.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once + +#if !defined(RDMA_CMA_H) +#define RDMA_CMA_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Interfaces based on librdmacm 1.0.8. + */ + +typedef unsigned __int8 uint8_t; +typedef unsigned __int16 uint16_t; +typedef unsigned __int32 uint32_t; +typedef unsigned __int64 uint64_t; + +/* + * Upon receiving a device removal event, users must destroy the associated + * RDMA identifier and release all resources allocated with the device. + */ +enum rdma_cm_event_type +{ + RDMA_CM_EVENT_ADDR_RESOLVED, + RDMA_CM_EVENT_ADDR_ERROR, + RDMA_CM_EVENT_ROUTE_RESOLVED, + RDMA_CM_EVENT_ROUTE_ERROR, + RDMA_CM_EVENT_CONNECT_REQUEST, + RDMA_CM_EVENT_CONNECT_RESPONSE, + RDMA_CM_EVENT_CONNECT_ERROR, + RDMA_CM_EVENT_UNREACHABLE, + RDMA_CM_EVENT_REJECTED, + RDMA_CM_EVENT_ESTABLISHED, + RDMA_CM_EVENT_DISCONNECTED, + RDMA_CM_EVENT_DEVICE_REMOVAL, + RDMA_CM_EVENT_MULTICAST_JOIN, + RDMA_CM_EVENT_MULTICAST_ERROR, + RDMA_CM_EVENT_ADDR_CHANGE, + RDMA_CM_EVENT_TIMEWAIT_EXIT +}; + +enum rdma_port_space +{ + RDMA_PS_IPOIB = 0x0002, + RDMA_PS_TCP = 0x0106, + RDMA_PS_UDP = 0x0111, +}; + +/* + * Global qkey value for UDP QPs and multicast groups created via the + * RDMA CM. + */ +#define RDMA_UDP_QKEY 0x01234567 + +struct ib_addr +{ + union ibv_gid sgid; + union ibv_gid dgid; + uint16_t pkey; +}; + +struct rdma_addr +{ + struct sockaddr src_addr; + uint8_t src_pad[sizeof(SOCKADDR_IN6) - + sizeof(struct sockaddr)]; + struct sockaddr dst_addr; + uint8_t dst_pad[sizeof(SOCKADDR_IN6) - + sizeof(struct sockaddr)]; + union + { + struct ib_addr ibaddr; + } addr; +}; + +struct ibv_sa_path_rec +{ + uint8_t data[64]; +}; + +struct rdma_route +{ + struct rdma_addr addr; + struct ibv_sa_path_rec *path_rec; + int num_paths; +}; + +struct rdma_event_channel +{ + uint32_t timeout; +}; + +struct rdma_cm_id +{ + struct ibv_context *verbs; + struct rdma_event_channel *channel; + void *context; + struct ibv_qp *qp; + struct rdma_route route; + enum rdma_port_space ps; + uint8_t port_num; + + union { + IWVConnectEndpoint *connect; + IWVDatagramEndpoint *datagram; + } ep; + OVERLAPPED overlap; + uint32_t events_completed; +}; + +struct rdma_conn_param +{ + const void *private_data; + uint8_t private_data_len; + uint8_t responder_resources; + uint8_t initiator_depth; + uint8_t flow_control; + uint8_t retry_count; /* ignored when accepting */ + uint8_t rnr_retry_count; + /* Fields below ignored if a QP is created on the rdma_cm_id. */ + uint8_t srq; + uint32_t qp_num; +}; + +struct rdma_ud_param +{ + const void *private_data; + uint8_t private_data_len; + struct ibv_ah_attr ah_attr; + uint32_t qp_num; + uint32_t qkey; +}; + +struct rdma_cm_event +{ + struct rdma_cm_id *id; + struct rdma_cm_id *listen_id; + enum rdma_cm_event_type event; + int status; + union + { + struct rdma_conn_param conn; + struct rdma_ud_param ud; + + } param; +}; + +/** + * rdma_create_event_channel - Open a channel used to report communication events. + * Description: + * Asynchronous events are reported to users through event channels. Each + * event channel maps to a file descriptor. + * Notes: + * All created event channels must be destroyed by calling + * rdma_destroy_event_channel. Users should call rdma_get_cm_event to + * retrieve events on an event channel. + * See also: + * rdma_get_cm_event, rdma_destroy_event_channel + */ +__declspec(dllexport) +struct rdma_event_channel *rdma_create_event_channel(void); + +/** + * rdma_destroy_event_channel - Close an event communication channel. + * @channel: The communication channel to destroy. + * Description: + * Release all resources associated with an event channel and closes the + * associated file descriptor. + * Notes: + * All rdma_cm_id's associated with the event channel must be destroyed, + * and all returned events must be acked before calling this function. + * See also: + * rdma_create_event_channel, rdma_get_cm_event, rdma_ack_cm_event + */ +__declspec(dllexport) +void rdma_destroy_event_channel(struct rdma_event_channel *channel); + +/** + * rdma_create_id - Allocate a communication identifier. + * @channel: The communication channel that events associated with the + * allocated rdma_cm_id will be reported on. + * @id: A reference where the allocated communication identifier will be + * returned. + * @context: User specified context associated with the rdma_cm_id. + * @ps: RDMA port space. + * Description: + * Creates an identifier that is used to track communication information. + * Notes: + * Rdma_cm_id's are conceptually equivalent to a socket for RDMA + * communication. The difference is that RDMA communication requires + * explicitly binding to a specified RDMA device before communication + * can occur, and most operations are asynchronous in nature. Communication + * events on an rdma_cm_id are reported through the associated event + * channel. Users must release the rdma_cm_id by calling rdma_destroy_id. + * See also: + * rdma_create_event_channel, rdma_destroy_id, rdma_get_devices, + * rdma_bind_addr, rdma_resolve_addr, rdma_connect, rdma_listen, + */ +__declspec(dllexport) +int rdma_create_id(struct rdma_event_channel *channel, + struct rdma_cm_id **id, void *context, + enum rdma_port_space ps); + +/** + * rdma_destroy_id - Release a communication identifier. + * @id: The communication identifier to destroy. + * Description: + * Destroys the specified rdma_cm_id and cancels any outstanding + * asynchronous operation. + * Notes: + * Users must free any associated QP with the rdma_cm_id before + * calling this routine and ack an related events. + * See also: + * rdma_create_id, rdma_destroy_qp, rdma_ack_cm_event + */ +__declspec(dllexport) +int rdma_destroy_id(struct rdma_cm_id *id); + +/** + * rdma_bind_addr - Bind an RDMA identifier to a source address. + * @id: RDMA identifier. + * @addr: Local address information. Wildcard values are permitted. + * Description: + * Associates a source address with an rdma_cm_id. The address may be + * wildcarded. If binding to a specific local address, the rdma_cm_id + * will also be bound to a local RDMA device. + * Notes: + * Typically, this routine is called before calling rdma_listen to bind + * to a specific port number, but it may also be called on the active side + * of a connection before calling rdma_resolve_addr to bind to a specific + * address. + * See also: + * rdma_create_id, rdma_listen, rdma_resolve_addr, rdma_create_qp + */ +__declspec(dllexport) +int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr); + +/** + * rdma_resolve_addr - Resolve destination and optional source addresses. + * @id: RDMA identifier. + * @src_addr: Source address information. This parameter may be NULL. + * @dst_addr: Destination address information. + * @timeout_ms: Time to wait for resolution to complete. + * Description: + * Resolve destination and optional source addresses from IP addresses + * to an RDMA address. If successful, the specified rdma_cm_id will + * be bound to a local device. + * Notes: + * This call is used to map a given destination IP address to a usable RDMA + * address. If a source address is given, the rdma_cm_id is bound to that + * address, the same as if rdma_bind_addr were called. If no source + * address is given, and the rdma_cm_id has not yet been bound to a device, + * then the rdma_cm_id will be bound to a source address based on the + * local routing tables. After this call, the rdma_cm_id will be bound to + * an RDMA device. This call is typically made from the active side of a + * connection before calling rdma_resolve_route and rdma_connect. + * See also: + * rdma_create_id, rdma_resolve_route, rdma_connect, rdma_create_qp, + * rdma_get_cm_event, rdma_bind_addr + */ +__declspec(dllexport) +int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, + struct sockaddr *dst_addr, int timeout_ms); + +/** + * rdma_resolve_route - Resolve the route information needed to establish a connection. + * @id: RDMA identifier. + * @timeout_ms: Time to wait for resolution to complete. + * Description: + * Resolves an RDMA route to the destination address in order to establish + * a connection. The destination address must have already been resolved + * by calling rdma_resolve_addr. + * Notes: + * This is called on the client side of a connection after calling + * rdma_resolve_addr, but before calling rdma_connect. + * See also: + * rdma_resolve_addr, rdma_connect, rdma_get_cm_event + */ +__declspec(dllexport) +int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms); + +/** + * rdma_create_qp - Allocate a QP. + * @id: RDMA identifier. + * @pd: protection domain for the QP. + * @qp_init_attr: initial QP attributes. + * Description: + * Allocate a QP associated with the specified rdma_cm_id and transition it + * for sending and receiving. + * Notes: + * The rdma_cm_id must be bound to a local RDMA device before calling this + * function, and the protection domain must be for that same device. + * QPs allocated to an rdma_cm_id are automatically transitioned by the + * librdmacm through their states. After being allocated, the QP will be + * ready to handle posting of receives. If the QP is unconnected, it will + * be ready to post sends. + * See also: + * rdma_bind_addr, rdma_resolve_addr, rdma_destroy_qp, ibv_create_qp, + * ibv_modify_qp + */ +__declspec(dllexport) +int rdma_create_qp(struct rdma_cm_id *id, struct ibv_pd *pd, + struct ibv_qp_init_attr *qp_init_attr); + +/** + * rdma_destroy_qp - Deallocate a QP. + * @id: RDMA identifier. + * Description: + * Destroy a QP allocated on the rdma_cm_id. + * Notes: + * Users must destroy any QP associated with an rdma_cm_id before + * destroying the ID. + * See also: + * rdma_create_qp, rdma_destroy_id, ibv_destroy_qp + */ +__declspec(dllexport) +void rdma_destroy_qp(struct rdma_cm_id *id); + +/** + * rdma_connect - Initiate an active connection request. + * @id: RDMA identifier. + * @conn_param: connection parameters. + * Description: + * For a connected rdma_cm_id, this call initiates a connection request + * to a remote destination. For an unconnected rdma_cm_id, it initiates + * a lookup of the remote QP providing the datagram service. + * Notes: + * Users must have resolved a route to the destination address + * by having called rdma_resolve_route before calling this routine. + * See also: + * rdma_resolve_route, rdma_disconnect, rdma_listen, rdma_get_cm_event + */ +__declspec(dllexport) +int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param); + +/** + * rdma_listen - Listen for incoming connection requests. + * @id: RDMA identifier. + * @backlog: backlog of incoming connection requests. + * Description: + * Initiates a listen for incoming connection requests or datagram service + * lookup. The listen will be restricted to the locally bound source + * address. + * Notes: + * Users must have bound the rdma_cm_id to a local address by calling + * rdma_bind_addr before calling this routine. If the rdma_cm_id is + * bound to a specific IP address, the listen will be restricted to that + * address and the associated RDMA device. If the rdma_cm_id is bound + * to an RDMA port number only, the listen will occur across all RDMA + * devices. + * See also: + * rdma_bind_addr, rdma_connect, rdma_accept, rdma_reject, rdma_get_cm_event + */ +__declspec(dllexport) +int rdma_listen(struct rdma_cm_id *id, int backlog); + +/** + * rdma_accept - Called to accept a connection request. + * @id: Connection identifier associated with the request. + * @conn_param: Information needed to establish the connection. + * Description: + * Called from the listening side to accept a connection or datagram + * service lookup request. + * Notes: + * Unlike the socket accept routine, rdma_accept is not called on a + * listening rdma_cm_id. Instead, after calling rdma_listen, the user + * waits for a connection request event to occur. Connection request + * events give the user a newly created rdma_cm_id, similar to a new + * socket, but the rdma_cm_id is bound to a specific RDMA device. + * rdma_accept is called on the new rdma_cm_id. + * See also: + * rdma_listen, rdma_reject, rdma_get_cm_event + */ +__declspec(dllexport) +int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param); + +/** + * rdma_reject - Called to reject a connection request. + * @id: Connection identifier associated with the request. + * @private_data: Optional private data to send with the reject message. + * @private_data_len: Size of the private_data to send, in bytes. + * Description: + * Called from the listening side to reject a connection or datagram + * service lookup request. + * Notes: + * After receiving a connection request event, a user may call rdma_reject + * to reject the request. If the underlying RDMA transport supports + * private data in the reject message, the specified data will be passed to + * the remote side. + * See also: + * rdma_listen, rdma_accept, rdma_get_cm_event + */ +__declspec(dllexport) +int rdma_reject(struct rdma_cm_id *id, const void *private_data, + uint8_t private_data_len); + +/** + * rdma_notify - Notifies the librdmacm of an asynchronous event. + * @id: RDMA identifier. + * @event: Asynchronous event. + * Description: + * Used to notify the librdmacm of asynchronous events that have occurred + * on a QP associated with the rdma_cm_id. + * Notes: + * Asynchronous events that occur on a QP are reported through the user's + * device event handler. This routine is used to notify the librdmacm of + * communication events. In most cases, use of this routine is not + * necessary, however if connection establishment is done out of band + * (such as done through Infiniband), it's possible to receive data on a + * QP that is not yet considered connected. This routine forces the + * connection into an established state in this case in order to handle + * the rare situation where the connection never forms on its own. + * Events that should be reported to the CM are: IB_EVENT_COMM_EST. + * See also: + * rdma_connect, rdma_accept, rdma_listen + */ +__declspec(dllexport) +int rdma_notify(struct rdma_cm_id *id, enum ibv_event_type event); + +/** + * rdma_disconnect - This function disconnects a connection. + * @id: RDMA identifier. + * Description: + * Disconnects a connection and transitions any associated QP to the + * error state. + * See also: + * rdma_connect, rdma_listen, rdma_accept + */ +__declspec(dllexport) +int rdma_disconnect(struct rdma_cm_id *id); + +/** + * rdma_join_multicast - Joins a multicast group. + * @id: Communication identifier associated with the request. + * @addr: Multicast address identifying the group to join. + * @context: User-defined context associated with the join request. + * Description: + * Joins a multicast group and attaches an associated QP to the group. + * Notes: + * Before joining a multicast group, the rdma_cm_id must be bound to + * an RDMA device by calling rdma_bind_addr or rdma_resolve_addr. Use of + * rdma_resolve_addr requires the local routing tables to resolve the + * multicast address to an RDMA device. The user must call + * rdma_leave_multicast to leave the multicast group and release any + * multicast resources. The context is returned to the user through + * the private_data field in the rdma_cm_event. + * See also: + * rdma_leave_multicast, rdma_bind_addr, rdma_resolve_addr, rdma_create_qp + */ +__declspec(dllexport) +int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, + void *context); + +/** + * rdma_leave_multicast - Leaves a multicast group. + * @id: Communication identifier associated with the request. + * @addr: Multicast address identifying the group to leave. + * Description: + * Leaves a multicast group and detaches an associated QP from the group. + * Notes: + * Calling this function before a group has been fully joined results in + * canceling the join operation. Users should be aware that messages + * received from the multicast group may stilled be queued for + * completion processing immediately after leaving a multicast group. + * Destroying an rdma_cm_id will automatically leave all multicast groups. + * See also: + * rdma_join_multicast, rdma_destroy_qp + */ +__declspec(dllexport) +int rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr); + +/** + * rdma_get_cm_event - Retrieves the next pending communication event. + * @channel: Event channel to check for events. + * @event: Allocated information about the next communication event. + * Description: + * Retrieves a communication event. If no events are pending, by default, + * the call will block until an event is received. + * Notes: + * The default synchronous behavior of this routine can be changed by + * modifying the file descriptor associated with the given channel. All + * events that are reported must be acknowledged by calling rdma_ack_cm_event. + * Destruction of an rdma_cm_id will block until related events have been + * acknowledged. + * See also: + * rdma_ack_cm_event, rdma_create_event_channel, rdma_event_str + */ +__declspec(dllexport) +int rdma_get_cm_event(struct rdma_event_channel *channel, + struct rdma_cm_event **event); + +/** + * rdma_ack_cm_event - Free a communication event. + * @event: Event to be released. + * Description: + * All events which are allocated by rdma_get_cm_event must be released, + * there should be a one-to-one correspondence between successful gets + * and acks. + * See also: + * rdma_get_cm_event, rdma_destroy_id + */ +__declspec(dllexport) +int rdma_ack_cm_event(struct rdma_cm_event *event); + +static uint16_t rdma_get_src_port(struct rdma_cm_id *id) +{ + return id->route.addr.src_addr.sa_family == PF_INET6 ? + ((struct sockaddr_in6 *) &id->route.addr.src_addr)->sin6_port : + ((struct sockaddr_in *) &id->route.addr.src_addr)->sin_port; +} + +static uint16_t rdma_get_dst_port(struct rdma_cm_id *id) +{ + return id->route.addr.dst_addr.sa_family == PF_INET6 ? + ((struct sockaddr_in6 *) &id->route.addr.dst_addr)->sin6_port : + ((struct sockaddr_in *) &id->route.addr.dst_addr)->sin_port; +} + +static struct sockaddr *rdma_get_local_addr(struct rdma_cm_id *id) +{ + return &id->route.addr.src_addr; +} + +static struct sockaddr *rdma_get_peer_addr(struct rdma_cm_id *id) +{ + return &id->route.addr.dst_addr; +} + +/** + * rdma_get_devices - Get list of RDMA devices currently available. + * @num_devices: If non-NULL, set to the number of devices returned. + * Description: + * Return a NULL-terminated array of opened RDMA devices. Callers can use + * this routine to allocate resources on specific RDMA devices that will be + * shared across multiple rdma_cm_id's. + * Notes: + * The returned array must be released by calling rdma_free_devices. Devices + * remain opened while the librdmacm is loaded. + * See also: + * rdma_free_devices + */ +__declspec(dllexport) +struct ibv_context **rdma_get_devices(int *num_devices); + +/** + * rdma_free_devices - Frees the list of devices returned by rdma_get_devices. + * @list: List of devices returned from rdma_get_devices. + * Description: + * Frees the device array returned by rdma_get_devices. + * See also: + * rdma_get_devices + */ +__declspec(dllexport) +void rdma_free_devices(struct ibv_context **list); + +/** + * rdma_event_str - Returns a string representation of an rdma cm event. + * @event: Asynchronous event. + * Description: + * Returns a string representation of an asynchronous event. + * See also: + * rdma_get_cm_event + */ +__declspec(dllexport) +const char *rdma_event_str(enum rdma_cm_event_type event); + +/* Option levels */ +enum +{ + RDMA_OPTION_ID = 0 +}; + +/* Option details */ +enum +{ + RDMA_OPTION_ID_TOS = 0 /* uint8_t: RFC 2474 */ +}; + +/** + * rdma_set_option - Set options for an rdma_cm_id. + * @id: Communication identifier to set option for. + * @level: Protocol level of the option to set. + * @optname: Name of the option to set. + * @optval: Reference to the option data. + * @optlen: The size of the %optval buffer. + */ +__declspec(dllexport) +int rdma_set_option(struct rdma_cm_id *id, int level, int optname, + void *optval, size_t optlen); + +/** + * rdma_migrate_id - Move an rdma_cm_id to a new event channel. + * @id: Communication identifier to migrate. + * @channel: New event channel for rdma_cm_id events. + */ +__declspec(dllexport) +int rdma_migrate_id(struct rdma_cm_id *id, struct rdma_event_channel *channel); + +#ifdef __cplusplus +} +#endif + +#endif /* RDMA_CMA_H */ diff --git a/ulp/librdmacm/src/Sources b/ulp/librdmacm/src/Sources new file mode 100644 index 00000000..3a771696 --- /dev/null +++ b/ulp/librdmacm/src/Sources @@ -0,0 +1,40 @@ +!if $(FREEBUILD) +TARGETNAME = librdmacm +!else +TARGETNAME = librdmacmd +!endif + +TARGETPATH = ..\..\..\bin\user\obj$(BUILD_ALT_DIR) +TARGETTYPE = DYNLINK + +!if $(_NT_TOOLS_VERSION) == 0x700 +DLLDEF = $O\cma_exports.def +!else +DLLDEF = $(OBJ_PATH)\$O\cma_exports.def +!endif + +DLLENTRY = DllMain +USE_MSVCRT = 1 + +SOURCES = \ + cma.rc \ + cma_main.cpp \ + cma.cpp + +INCLUDES = ..\include;..\..\..\inc;..\..\..\inc\user;..\..\libibverbs\include + +USER_C_FLAGS = $(USER_C_FLAGS) -DEXPORT_CMA_SYMBOLS + +TARGETLIBS = \ + $(SDK_LIB_PATH)\kernel32.lib \ + $(SDK_LIB_PATH)\uuid.lib \ + $(SDK_LIB_PATH)\ws2_32.lib \ + $(SDK_LIB_PATH)\iphlpapi.lib \ + $(TARGETPATH)\*\ibat.lib \ +!if $(FREEBUILD) + $(TARGETPATH)\*\libibverbs.lib \ + $(TARGETPATH)\*\winverbs.lib +!else + $(TARGETPATH)\*\libibverbsd.lib \ + $(TARGETPATH)\*\winverbsd.lib +!endif diff --git a/ulp/librdmacm/src/cma.cpp b/ulp/librdmacm/src/cma.cpp new file mode 100644 index 00000000..2ce0cd24 --- /dev/null +++ b/ulp/librdmacm/src/cma.cpp @@ -0,0 +1,1084 @@ +/* + * Copyright (c) 2005-2009 Intel Corporation. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include "cma.h" + +IWVProvider *prov; +__declspec(dllexport) +IWVProvider *ibv_get_winverbs(void); + +enum cma_state +{ + cma_idle, + cma_listening, + cma_get_request, + cma_addr_resolve, + cma_route_resolve, + cma_passive_connect, + cma_active_connect, + cma_active_accept, + cma_accepting, + cma_connected, + cma_active_disconnect, + cma_passive_disconnect, + cma_disconnected +}; + +#define CMA_DEFAULT_BACKLOG 16 + +struct cma_id_private +{ + struct rdma_cm_id id; + enum cma_state state; + int channel_index; + struct cma_device *cma_dev; + int backlog; + int index; + struct rdma_cm_id **req_list; +}; + +struct cma_event_channel +{ + struct rdma_event_channel channel; + CRITICAL_SECTION lock; + struct cma_id_private *id[MAXIMUM_WAIT_OBJECTS]; + HANDLE event[MAXIMUM_WAIT_OBJECTS]; + int count; +}; + +struct cma_device +{ + struct ibv_context *verbs; + uint64_t guid; + int port_cnt; + uint8_t max_initiator_depth; + uint8_t max_responder_resources; +}; + +struct cma_event { + struct rdma_cm_event event; + uint8_t private_data[56]; + struct cma_id_private *id_priv; +}; + +static struct cma_device *cma_dev_array; +static int cma_dev_cnt; + +static void ucma_cleanup(void) +{ + if (cma_dev_cnt > 0) { + while (cma_dev_cnt > 0) { + ibv_close_device(cma_dev_array[--cma_dev_cnt].verbs); + } + delete cma_dev_array; + cma_dev_cnt = 0; + } + if (prov != NULL) { + prov->Release(); + prov = NULL; + } +} + +static int ucma_init(void) +{ + struct ibv_device **dev_list = NULL; + struct cma_device *cma_dev; + struct ibv_device_attr attr; + int i, ret; + + EnterCriticalSection(&lock); + if (cma_dev_cnt > 0) { + goto out; + } + + prov = ibv_get_winverbs(); + if (prov == NULL) { + ret = -1; + goto err; + } + + dev_list = ibv_get_device_list(&cma_dev_cnt); + if (dev_list == NULL) { + ret = -1; + goto err; + } + + cma_dev_array = new struct cma_device[cma_dev_cnt]; + if (cma_dev_array == NULL) { + ret = -1; + goto err; + } + + for (i = 0; dev_list[i]; ++i) { + cma_dev = &cma_dev_array[i]; + + cma_dev->guid = ibv_get_device_guid(dev_list[i]); + cma_dev->verbs = ibv_open_device(dev_list[i]); + if (cma_dev->verbs == NULL) { + ret = -1; + goto err; + } + + ret = ibv_query_device(cma_dev->verbs, &attr); + if (ret) { + goto err; + } + + cma_dev->port_cnt = attr.phys_port_cnt; + cma_dev->max_initiator_depth = (uint8_t) attr.max_qp_init_rd_atom; + cma_dev->max_responder_resources = (uint8_t) attr.max_qp_rd_atom; + } + ibv_free_device_list(dev_list); +out: + LeaveCriticalSection(&lock); + return 0; + +err: + ucma_cleanup(); + LeaveCriticalSection(&lock); + if (dev_list) { + ibv_free_device_list(dev_list); + } + return ret; +} + +__declspec(dllexport) +struct ibv_context **rdma_get_devices(int *num_devices) +{ + struct ibv_context **devs = NULL; + int i; + + if (!cma_dev_cnt && ucma_init()) { + goto out; + } + + devs = new struct ibv_context *[cma_dev_cnt + 1]; + if (devs == NULL) { + goto out; + } + + for (i = 0; i < cma_dev_cnt; i++) { + devs[i] = cma_dev_array[i].verbs; + } + devs[i] = NULL; +out: + if (num_devices != NULL) { + *num_devices = devs ? cma_dev_cnt : 0; + } + return devs; +} + +__declspec(dllexport) +void rdma_free_devices(struct ibv_context **list) +{ + delete list; +} + +__declspec(dllexport) +struct rdma_event_channel *rdma_create_event_channel(void) +{ + struct cma_event_channel *chan; + + if (!cma_dev_cnt && ucma_init()) { + return NULL; + } + + chan = new struct cma_event_channel; + if (chan == NULL) { + return NULL; + } + + InitializeCriticalSection(&chan->lock); + chan->count = 0; + chan->channel.timeout = INFINITE; + + return &chan->channel; +} + +__declspec(dllexport) +void rdma_destroy_event_channel(struct rdma_event_channel *channel) +{ + struct cma_event_channel *chan; + + chan = CONTAINING_RECORD(channel, struct cma_event_channel, channel); + DeleteCriticalSection(&chan->lock); + delete chan; +} + +static int cma_event_channel_insert_id(struct rdma_event_channel *channel, + struct cma_id_private *id_priv) +{ + struct cma_event_channel *chan; + int ret = 0; + + chan = CONTAINING_RECORD(channel, struct cma_event_channel, channel); + + EnterCriticalSection(&chan->lock); + if (chan->count == MAXIMUM_WAIT_OBJECTS) { + ret = -1; + goto out; + } + + chan->id[chan->count] = id_priv; + chan->event[chan->count] = id_priv->id.overlap.hEvent; + id_priv->channel_index = chan->count++; +out: + LeaveCriticalSection(&chan->lock); + return ret; +} + +/* + * TODO: we cannot call cma_event_channel_remove_id() while another + * thread is calling rdma_get_event(). If this is needed, then we + * need to halt the rdma_get_event() thread, modify the event list, + * then restart the rdma_get_event() thread. + */ +static void cma_event_channel_remove_id(struct rdma_event_channel *channel, + struct cma_id_private *id_priv) +{ + struct cma_event_channel *chan; + + chan = CONTAINING_RECORD(channel, struct cma_event_channel, channel); + + EnterCriticalSection(&chan->lock); + chan->count--; + chan->id[id_priv->channel_index] = chan->id[chan->count]; + chan->event[id_priv->channel_index] = chan->event[chan->count]; + chan->id[id_priv->channel_index]->channel_index = id_priv->channel_index; + LeaveCriticalSection(&chan->lock); +} + +__declspec(dllexport) +int rdma_create_id(struct rdma_event_channel *channel, + struct rdma_cm_id **id, void *context, + enum rdma_port_space ps) +{ + struct cma_id_private *id_priv; + HRESULT hr; + + hr = cma_dev_cnt ? 0 : ucma_init(); + if (hr) { + return hr; + } + + id_priv = new struct cma_id_private; + if (id_priv == NULL) { + return NULL; + } + + RtlZeroMemory(id_priv, sizeof(struct cma_id_private)); + id_priv->id.overlap.hEvent = CreateEvent(NULL, FALSE, FALSE, NULL); + if (id_priv->id.overlap.hEvent == NULL) { + goto err1; + } + + id_priv->id.context = context; + id_priv->id.channel = channel; + id_priv->id.ps = ps; + + if (ps == RDMA_PS_TCP) { + hr = prov->CreateConnectEndpoint(&id_priv->id.ep.connect); + } else { + hr = prov->CreateDatagramEndpoint(&id_priv->id.ep.datagram); + } + if (FAILED(hr)) { + goto err2; + } + + hr = cma_event_channel_insert_id(channel, id_priv); + if (FAILED(hr)) { + goto err3; + } + + *id = &id_priv->id; + return 0; + +err3: + if (ps == RDMA_PS_TCP) { + id_priv->id.ep.connect->Release(); + } else { + id_priv->id.ep.datagram->Release(); + } +err2: + CloseHandle(id_priv->id.overlap.hEvent); +err1: + delete id_priv; + return -1; +} + +static void ucma_destroy_listen(struct cma_id_private *id_priv) +{ + while (--id_priv->backlog >= 0) { + if (id_priv->req_list[id_priv->backlog] != NULL) { + rdma_destroy_id(id_priv->req_list[id_priv->backlog]); + } + } + + delete id_priv->req_list; +} + +__declspec(dllexport) +int rdma_destroy_id(struct rdma_cm_id *id) +{ + struct cma_id_private *id_priv; + + id_priv = CONTAINING_RECORD(id, struct cma_id_private, id); + if (id->ps == RDMA_PS_TCP) { + id->ep.connect->CancelOverlappedRequests(); + } else { + id->ep.datagram->CancelOverlappedRequests(); + } + + cma_event_channel_remove_id(id->channel, id_priv); + + if (id_priv->backlog > 0) { + ucma_destroy_listen(id_priv); + } + + if (id_priv->id.ps == RDMA_PS_TCP) { + id_priv->id.ep.connect->Release(); + } else { + id_priv->id.ep.datagram->Release(); + } + + delete id_priv; + return 0; +} + +static int ucma_addrlen(struct sockaddr *addr) +{ + if (addr->sa_family == PF_INET) { + return sizeof(struct sockaddr_in); + } else { + return sizeof(struct sockaddr_in6); + } +} + +static int ucma_get_device(struct cma_id_private *id_priv, uint64_t guid) +{ + struct cma_device *cma_dev; + int i; + + for (i = 0; i < cma_dev_cnt; i++) { + cma_dev = &cma_dev_array[i]; + if (cma_dev->guid == guid) { + id_priv->cma_dev = cma_dev; + id_priv->id.verbs = cma_dev->verbs; + return 0; + } + } + return -1; +} + +static int ucma_query_connect(struct rdma_cm_id *id, struct rdma_conn_param *param) +{ + struct cma_id_private *id_priv; + WV_CONNECT_ATTRIBUTES attr; + HRESULT hr; + + id_priv = CONTAINING_RECORD(id, struct cma_id_private, id); + hr = id->ep.connect->Query(&attr); + if (FAILED(hr)) { + return hr; + } + + RtlCopyMemory(&id->route.addr.src_addr, &attr.LocalAddress, + sizeof attr.LocalAddress); + RtlCopyMemory(&id->route.addr.dst_addr, &attr.PeerAddress, + sizeof attr.PeerAddress); + + if (param != NULL) { + RtlCopyMemory((void *) param->private_data, attr.Param.Data, + attr.Param.DataLength); + param->private_data_len = (uint8_t) attr.Param.DataLength; + param->responder_resources = (uint8_t) attr.Param.ResponderResources; + param->initiator_depth = (uint8_t) attr.Param.InitiatorDepth; + param->flow_control = 1; + param->retry_count = attr.Param.RetryCount; + param->rnr_retry_count = attr.Param.RnrRetryCount; + } + + if (id_priv->cma_dev == NULL && attr.Device.DeviceGuid != 0) { + hr = ucma_get_device(id_priv, attr.Device.DeviceGuid); + if (FAILED(hr)) { + return hr; + } + + id->route.addr.addr.ibaddr.pkey = attr.Device.Pkey; + id_priv->id.port_num = attr.Device.PortNumber; + } + + return 0; +} + +static int ucma_query_datagram(struct rdma_cm_id *id, struct rdma_ud_param *param) +{ + struct cma_id_private *id_priv; + WV_DATAGRAM_ATTRIBUTES attr; + HRESULT hr; + + id_priv = CONTAINING_RECORD(id, struct cma_id_private, id); + hr = id->ep.datagram->Query(&attr); + if (FAILED(hr)) { + return hr; + } + + RtlCopyMemory(&id->route.addr.src_addr, &attr.LocalAddress, + sizeof attr.LocalAddress); + RtlCopyMemory(&id->route.addr.dst_addr, &attr.PeerAddress, + sizeof attr.PeerAddress); + + if (param != NULL) { + RtlCopyMemory((void *) param->private_data, attr.Param.Data, + attr.Param.DataLength); + param->private_data_len = (uint8_t) attr.Param.DataLength; + // ucma_convert_av(&attr.Param.AddressVector, param->ah_attr) + param->qp_num = attr.Param.Qpn; + param->qkey = attr.Param.Qkey; + } + + if (id_priv->cma_dev == NULL && attr.Device.DeviceGuid != 0) { + hr = ucma_get_device(id_priv, attr.Device.DeviceGuid); + if (FAILED(hr)) + return hr; + id->route.addr.addr.ibaddr.pkey = attr.Device.Pkey; + id_priv->id.port_num = attr.Device.PortNumber; + } + return 0; +} + +__declspec(dllexport) +int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) +{ + HRESULT hr; + + if (id->ps == RDMA_PS_TCP) { + hr = id->ep.connect->BindAddress(addr); + if (SUCCEEDED(hr)) { + hr = ucma_query_connect(id, NULL); + } + } else { + hr = id->ep.datagram->BindAddress(addr); + if (SUCCEEDED(hr)) { + hr = ucma_query_datagram(id, NULL); + } + } + + return hr; +} + +__declspec(dllexport) +int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, + struct sockaddr *dst_addr, int timeout_ms) +{ + struct cma_id_private *id_priv; + WV_SOCKADDR addr; + SOCKET s; + DWORD size; + HRESULT hr; + + if (src_addr == NULL) { + if (id->ps == RDMA_PS_TCP) { + s = socket(dst_addr->sa_family, SOCK_STREAM, IPPROTO_TCP); + } else { + s = socket(dst_addr->sa_family, SOCK_DGRAM, IPPROTO_UDP); + } + if (s == INVALID_SOCKET) { + return WSAGetLastError(); + } + + hr = WSAIoctl(s, SIO_ROUTING_INTERFACE_QUERY, dst_addr, ucma_addrlen(dst_addr), + &addr, sizeof addr, &size, NULL, NULL); + closesocket(s); + if (FAILED(hr)) { + return WSAGetLastError(); + } + src_addr = &addr.Sa; + } + + hr = rdma_bind_addr(id, src_addr); + if (FAILED(hr)) { + return hr; + } + + RtlCopyMemory(&id->route.addr.dst_addr, dst_addr, ucma_addrlen(dst_addr)); + id_priv = CONTAINING_RECORD(id, struct cma_id_private, id); + id_priv->state = cma_addr_resolve; + SetEvent(id->overlap.hEvent); + return 0; +} + +__declspec(dllexport) +int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) +{ + struct cma_id_private *id_priv; + IBAT_PATH_BLOB path; + HRESULT hr; + + hr = IBAT::Resolve(&id->route.addr.src_addr, &id->route.addr.dst_addr, &path); + if (FAILED(hr)) { + return hr; + } + + hr = (id->ps == RDMA_PS_TCP) ? + id->ep.connect->Modify(WV_EP_OPTION_ROUTE, &path, sizeof path) : + id->ep.datagram->Modify(WV_EP_OPTION_ROUTE, &path, sizeof path); + if (FAILED(hr)) { + return hr; + } + + id_priv = CONTAINING_RECORD(id, struct cma_id_private, id); + id_priv->state = cma_route_resolve; + SetEvent(id->overlap.hEvent); + return 0; +} + +static int ucma_modify_qp_init(struct cma_id_private *id_priv, struct ibv_qp *qp) +{ + struct ibv_qp_attr qp_attr; + DWORD index; + HRESULT hr; + + RtlZeroMemory(&qp_attr, sizeof qp_attr); + qp_attr.qp_state = IBV_QPS_INIT; + qp_attr.port_num = id_priv->id.port_num; + hr = qp->context->cmd_if->FindPkey(id_priv->id.port_num, + id_priv->id.route.addr.addr.ibaddr.pkey, + &index); + if (FAILED(hr)) { + return hr; + } + + qp_attr.pkey_index = (uint16_t) index; + return ibv_modify_qp(qp, &qp_attr, (enum ibv_qp_attr_mask) + (IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT)); +} + +static int ucma_init_ud_qp(struct cma_id_private *id_priv, struct ibv_qp *qp) +{ + struct ibv_qp_attr qp_attr; + int qp_attr_mask, ret; + + ret = ucma_modify_qp_init(id_priv, qp); + if (ret) { + return ret; + } + + qp_attr.qp_state = IBV_QPS_RTR; + ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE); + if (ret) { + return ret; + } + + qp_attr.qp_state = IBV_QPS_RTS; + qp_attr.sq_psn = 0; + return ibv_modify_qp(qp, &qp_attr, (enum ibv_qp_attr_mask) + (IBV_QP_STATE | IBV_QP_SQ_PSN)); +} + +__declspec(dllexport) +int rdma_create_qp(struct rdma_cm_id *id, struct ibv_pd *pd, + struct ibv_qp_init_attr *qp_init_attr) +{ + struct cma_id_private *id_priv; + struct ibv_qp *qp; + int ret; + + id_priv = CONTAINING_RECORD(id, struct cma_id_private, id); + if (id->verbs != pd->context) { + return -1; + } + + qp = ibv_create_qp(pd, qp_init_attr); + if (!qp) { + return -1; + } + + if (id->ps == RDMA_PS_TCP) { + ret = ucma_modify_qp_init(id_priv, qp); + } else { + ret = ucma_init_ud_qp(id_priv, qp); + } + if (ret) { + goto err; + } + + id->qp = qp; + return 0; +err: + ibv_destroy_qp(qp); + return ret; +} + +__declspec(dllexport) +void rdma_destroy_qp(struct rdma_cm_id *id) +{ + ibv_destroy_qp(id->qp); +} + +static int ucma_valid_param(struct cma_id_private *id_priv, + struct rdma_conn_param *conn_param) +{ + if (id_priv->id.ps != RDMA_PS_TCP) { + return 0; + } + + if ((conn_param->responder_resources > id_priv->cma_dev->max_responder_resources) || + (conn_param->initiator_depth > id_priv->cma_dev->max_initiator_depth)) { + return -1; + } + + return 0; +} + +__declspec(dllexport) +int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) +{ + struct cma_id_private *id_priv; + WV_CONNECT_PARAM attr; + HRESULT hr; + + id_priv = CONTAINING_RECORD(id, struct cma_id_private, id); + hr = ucma_valid_param(id_priv, conn_param); + if (FAILED(hr)) { + return hr; + } + + RtlZeroMemory(&attr, sizeof attr); + attr.ResponderResources = conn_param->responder_resources; + attr.InitiatorDepth = conn_param->initiator_depth; + attr.RetryCount = conn_param->retry_count; + attr.RnrRetryCount = conn_param->rnr_retry_count; + if ((attr.DataLength = conn_param->private_data_len)) { + RtlCopyMemory(attr.Data, conn_param->private_data, attr.DataLength); + } + + id_priv->state = cma_active_connect; + hr = id->ep.connect->Connect(id->qp->conn_handle, &id->route.addr.dst_addr, + &attr, &id->overlap); + if (FAILED(hr) && hr != WV_IO_PENDING) { + id_priv->state = cma_route_resolve; + return hr; + } + + return 0; +} + +static int ucma_get_request(struct cma_id_private *listen, int index) +{ + struct cma_id_private *id_priv; + HRESULT hr; + + hr = rdma_create_id(listen->id.channel, &listen->req_list[index], + listen, listen->id.ps); + if (FAILED(hr)) { + return hr; + } + + id_priv = CONTAINING_RECORD(listen->req_list[index], struct cma_id_private, id); + id_priv->index = index; + id_priv->state = cma_get_request; + + if (listen->id.ps == RDMA_PS_TCP) { + hr = listen->id.ep.connect->GetRequest(id_priv->id.ep.connect, + &id_priv->id.overlap); + } else { + hr = listen->id.ep.datagram->GetRequest(id_priv->id.ep.datagram, + &id_priv->id.overlap); + } + + return (FAILED(hr) && hr != WV_IO_PENDING) ? hr : 0; +} + +__declspec(dllexport) +int rdma_listen(struct rdma_cm_id *id, int backlog) +{ + struct cma_id_private *id_priv, *req_id; + HRESULT hr; + int i; + + if (backlog <= 0 || backlog > CMA_DEFAULT_BACKLOG) { + backlog = CMA_DEFAULT_BACKLOG; + } + + id_priv = CONTAINING_RECORD(id, struct cma_id_private, id); + id_priv->req_list = new struct rdma_cm_id*[backlog]; + if (id_priv->req_list == NULL) { + return -1; + } + + RtlZeroMemory(id_priv->req_list, sizeof(struct rdma_cm_id *) * backlog); + id_priv->backlog = backlog; + + id_priv->state = cma_listening; + hr = (id->ps == RDMA_PS_TCP) ? + id->ep.connect->Listen(backlog) : id->ep.datagram->Listen(backlog); + if (FAILED(hr)) { + return hr; + } + + for (i = 0; i < backlog; i++) { + hr = ucma_get_request(id_priv, i); + if (FAILED(hr)) { + return hr; + } + } + + return 0; +} + +__declspec(dllexport) +int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) +{ + struct cma_id_private *id_priv; + WV_CONNECT_PARAM attr; + HRESULT hr; + + id_priv = CONTAINING_RECORD(id, struct cma_id_private, id); + hr = ucma_valid_param(id_priv, conn_param); + if (FAILED(hr)) { + return hr; + } + + RtlZeroMemory(&attr, sizeof attr); + attr.ResponderResources = conn_param->responder_resources; + attr.InitiatorDepth = conn_param->initiator_depth; + attr.RetryCount = conn_param->retry_count; + attr.RnrRetryCount = conn_param->rnr_retry_count; + if ((attr.DataLength = conn_param->private_data_len)) { + RtlCopyMemory(attr.Data, conn_param->private_data, attr.DataLength); + } + + id_priv->state = cma_accepting; + hr = id->ep.connect->Accept(id->qp->conn_handle, &attr, &id->overlap); + if (FAILED(hr) && hr != WV_IO_PENDING) { + id_priv->state = cma_disconnected; + return hr; + } + + return 0; +} + +__declspec(dllexport) +int rdma_reject(struct rdma_cm_id *id, const void *private_data, + uint8_t private_data_len) +{ + struct cma_id_private *id_priv; + HRESULT hr; + + id_priv = CONTAINING_RECORD(id, struct cma_id_private, id); + id_priv->state = cma_disconnected; + hr = id->ep.connect->Reject(private_data, private_data_len); + if (FAILED(hr)) { + return hr; + } + return 0; +} + +__declspec(dllexport) +int rdma_notify(struct rdma_cm_id *id, enum ibv_event_type event) +{ + return 0; +} + +__declspec(dllexport) +int rdma_disconnect(struct rdma_cm_id *id) +{ + struct cma_id_private *id_priv; + HRESULT hr; + + id_priv = CONTAINING_RECORD(id, struct cma_id_private, id); + if (id_priv->state == cma_connected) { + id_priv->state = cma_active_disconnect; + } else { + id_priv->state = cma_disconnected; + } + hr = id->ep.connect->Disconnect(); + if (FAILED(hr)) { + return hr; + } + + return 0; +} + +__declspec(dllexport) +int rdma_ack_cm_event(struct rdma_cm_event *event) +{ + struct cma_event *evt; + + evt = CONTAINING_RECORD(event, struct cma_event, event); + delete evt; + return 0; +} + +static int ucma_process_conn_req(struct cma_event *event) +{ + struct cma_id_private *listen; + HRESULT hr = 0; + + listen = (struct cma_id_private *) event->id_priv->id.context; + + if (SUCCEEDED(event->event.status)) { + event->event.status = ucma_query_connect(&event->id_priv->id, + &event->event.param.conn); + } + + if (SUCCEEDED(event->event.status)) { + event->event.event = RDMA_CM_EVENT_CONNECT_REQUEST; + event->id_priv->state = cma_passive_connect; + + listen->req_list[event->id_priv->index] = NULL; + ucma_get_request(listen, event->id_priv->index); + } else { + hr = listen->id.ep.connect->GetRequest(event->id_priv->id.ep.connect, + &event->id_priv->id.overlap); + if (hr == WV_IO_PENDING) { + hr = 0; + } + } + + return hr; +} + +static int ucma_process_conn_resp(struct cma_event *event) +{ + struct rdma_cm_id *id; + WV_CONNECT_PARAM attr; + HRESULT hr; + + if (FAILED(event->event.status)) { + goto err; + } + + RtlZeroMemory(&attr, sizeof(attr)); + event->id_priv->state = cma_accepting; + + id = &event->id_priv->id; + hr = id->ep.connect->Accept(id->qp->conn_handle, &attr, &id->overlap); + if (FAILED(hr) && hr != WV_IO_PENDING) { + event->event.status = hr; + goto err; + } + + return WV_IO_PENDING; + +err: + event->event.event = (event->event.status == WV_REJECTED) ? + RDMA_CM_EVENT_REJECTED : + RDMA_CM_EVENT_CONNECT_ERROR; + event->id_priv->state = cma_disconnected; + return 0; +} + +static void ucma_process_establish(struct cma_event *event) +{ + if (SUCCEEDED(event->event.status)) { + event->event.status = ucma_query_connect(&event->id_priv->id, + &event->event.param.conn); + } + + if (SUCCEEDED(event->event.status)) { + event->event.event = RDMA_CM_EVENT_ESTABLISHED; + event->id_priv->state = cma_connected; + + event->id_priv->id.ep.connect->NotifyDisconnect(&event->id_priv->id.overlap); + } else { + event->event.event = RDMA_CM_EVENT_CONNECT_ERROR; + event->id_priv->state = cma_disconnected; + } +} + +static int ucma_process_event(struct cma_event *event) +{ + WV_CONNECT_ATTRIBUTES attr; + HRESULT hr = 0; + + switch (event->id_priv->state) { + case cma_get_request: + hr = ucma_process_conn_req(event); + break; + case cma_addr_resolve: + event->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; + break; + case cma_route_resolve: + event->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; + break; + case cma_active_connect: + hr = ucma_process_conn_resp(event); + break; + case cma_accepting: + ucma_process_establish(event); + break; + case cma_connected: + event->event.event = RDMA_CM_EVENT_DISCONNECTED; + event->id_priv->state = cma_passive_disconnect; + break; + case cma_active_disconnect: + event->event.event = RDMA_CM_EVENT_DISCONNECTED; + event->id_priv->state = cma_disconnected; + break; + default: + return -1; + } + + return hr; +} + +__declspec(dllexport) +int rdma_get_cm_event(struct rdma_event_channel *channel, + struct rdma_cm_event **event) +{ + struct cma_event_channel *chan; + struct cma_event *evt; + struct cma_id_private *id_priv; + struct rdma_cm_id *id; + DWORD bytes; + HRESULT hr; + + evt = new struct cma_event; + if (evt == NULL) { + return -1; + } + + do { + RtlZeroMemory(evt, sizeof(struct cma_event)); + + chan = CONTAINING_RECORD(channel, struct cma_event_channel, channel); + hr = WaitForMultipleObjects(chan->count, chan->event, FALSE, + chan->channel.timeout); + if (hr == WAIT_TIMEOUT) { + return hr; + } else if (hr == WAIT_FAILED) { + return HRESULT_FROM_WIN32(GetLastError()); + } + + EnterCriticalSection(&chan->lock); + evt->id_priv = chan->id[hr]; + LeaveCriticalSection(&chan->lock); + + id = &evt->id_priv->id; + evt->event.id = id; + evt->event.param.conn.private_data = evt->private_data; + if (id->ep.connect->GetOverlappedResult(&id->overlap, &bytes, FALSE) == 0) { + evt->event.status = HRESULT_FROM_WIN32(GetLastError()); + } + + hr = ucma_process_event(evt); + } while (FAILED(hr)); + + *event = &evt->event; + return 0; +} + + +__declspec(dllexport) +int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, + void *context) +{ + return WV_NOT_SUPPORTED; +} + +__declspec(dllexport) +int rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) +{ + return WV_NOT_SUPPORTED; +} + +__declspec(dllexport) +const char *rdma_event_str(enum rdma_cm_event_type event) +{ + switch (event) { + case RDMA_CM_EVENT_ADDR_RESOLVED: + return "RDMA_CM_EVENT_ADDR_RESOLVED"; + case RDMA_CM_EVENT_ADDR_ERROR: + return "RDMA_CM_EVENT_ADDR_ERROR"; + case RDMA_CM_EVENT_ROUTE_RESOLVED: + return "RDMA_CM_EVENT_ROUTE_RESOLVED"; + case RDMA_CM_EVENT_ROUTE_ERROR: + return "RDMA_CM_EVENT_ROUTE_ERROR"; + case RDMA_CM_EVENT_CONNECT_REQUEST: + return "RDMA_CM_EVENT_CONNECT_REQUEST"; + case RDMA_CM_EVENT_CONNECT_RESPONSE: + return "RDMA_CM_EVENT_CONNECT_RESPONSE"; + case RDMA_CM_EVENT_CONNECT_ERROR: + return "RDMA_CM_EVENT_CONNECT_ERROR"; + case RDMA_CM_EVENT_UNREACHABLE: + return "RDMA_CM_EVENT_UNREACHABLE"; + case RDMA_CM_EVENT_REJECTED: + return "RDMA_CM_EVENT_REJECTED"; + case RDMA_CM_EVENT_ESTABLISHED: + return "RDMA_CM_EVENT_ESTABLISHED"; + case RDMA_CM_EVENT_DISCONNECTED: + return "RDMA_CM_EVENT_DISCONNECTED"; + case RDMA_CM_EVENT_DEVICE_REMOVAL: + return "RDMA_CM_EVENT_DEVICE_REMOVAL"; + case RDMA_CM_EVENT_MULTICAST_JOIN: + return "RDMA_CM_EVENT_MULTICAST_JOIN"; + case RDMA_CM_EVENT_MULTICAST_ERROR: + return "RDMA_CM_EVENT_MULTICAST_ERROR"; + case RDMA_CM_EVENT_ADDR_CHANGE: + return "RDMA_CM_EVENT_ADDR_CHANGE"; + case RDMA_CM_EVENT_TIMEWAIT_EXIT: + return "RDMA_CM_EVENT_TIMEWAIT_EXIT"; + default: + return "UNKNOWN EVENT"; + } +} + +__declspec(dllexport) +int rdma_set_option(struct rdma_cm_id *id, int level, int optname, + void *optval, size_t optlen) +{ + return WV_NOT_SUPPORTED; +} + +__declspec(dllexport) +int rdma_migrate_id(struct rdma_cm_id *id, struct rdma_event_channel *channel) +{ + struct cma_id_private *id_priv; + + id_priv = CONTAINING_RECORD(id, struct cma_id_private, id); + cma_event_channel_remove_id(id->channel, id_priv); + /* + * TODO: To support calling this routine while processing events on the old + * channel, we need to wait for all old events to be acknowledged. + */ + id->channel = channel; + cma_event_channel_insert_id(channel, id_priv); + + return 0; +} diff --git a/ulp/librdmacm/src/cma.h b/ulp/librdmacm/src/cma.h new file mode 100644 index 00000000..fb65026b --- /dev/null +++ b/ulp/librdmacm/src/cma.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2008-2009 Intel Corp. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef CMA_H +#define CMA_H + +extern CRITICAL_SECTION lock; + +__inline void* __cdecl operator new(size_t size) +{ + return HeapAlloc(GetProcessHeap(), 0, size); +} + +__inline void __cdecl operator delete(void *pObj) +{ + HeapFree(GetProcessHeap(), 0, pObj); +} + +#endif /* CMA_H */ diff --git a/ulp/librdmacm/src/cma.rc b/ulp/librdmacm/src/cma.rc new file mode 100644 index 00000000..fd205458 --- /dev/null +++ b/ulp/librdmacm/src/cma.rc @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2008 Intel Corporation. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#include + +#define VER_FILETYPE VFT_DLL +#define VER_FILESUBTYPE VFT2_UNKNOWN + +#ifdef _DEBUG_ +#define VER_FILEDESCRIPTION_STR "LibRdmaCm (Debug)" +#define VER_INTERNALNAME_STR "librdmacmd.dll" +#define VER_ORIGINALFILENAME_STR "librdmacmd.dll" +#else +#define VER_FILEDESCRIPTION_STR "LibRdmaCm" +#define VER_INTERNALNAME_STR "librdmacm.dll" +#define VER_ORIGINALFILENAME_STR "librdmacm.dll" +#endif + +#include diff --git a/ulp/librdmacm/src/cma_export.def b/ulp/librdmacm/src/cma_export.def new file mode 100644 index 00000000..7a2b1ec4 --- /dev/null +++ b/ulp/librdmacm/src/cma_export.def @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2008 Intel Corporation. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +LIBRARY LIBRDMACM.DLL + +EXPORTS + DllCanUnloadNow PRIVATE + DllGetClassObject PRIVATE diff --git a/ulp/librdmacm/src/cma_exports.src b/ulp/librdmacm/src/cma_exports.src new file mode 100644 index 00000000..853173b3 --- /dev/null +++ b/ulp/librdmacm/src/cma_exports.src @@ -0,0 +1,33 @@ +#if DBG +LIBRARY librdmacmd.dll +#else +LIBRARY librdmacm.dll +#endif + +#ifndef _WIN64 +EXPORTS +rdma_create_event_channel +rdma_destroy_event_channel +rdma_create_id +rdma_destroy_id +rdma_bind_addr +rdma_resolve_addr +rdma_resolve_route +rdma_create_qp +rdma_destroy_qp +rdma_connect +rdma_listen +rdma_accept +rdma_reject +rdma_notify +rdma_disconnect +rdma_join_multicast +rdma_leave_multicast +rdma_get_cm_event +rdma_ack_cm_event +rdma_get_devices +rdma_free_devices +rdma_event_str +rdma_set_option +rdma_migrate_id +#endif diff --git a/ulp/librdmacm/src/cma_main.cpp b/ulp/librdmacm/src/cma_main.cpp new file mode 100644 index 00000000..2daead7a --- /dev/null +++ b/ulp/librdmacm/src/cma_main.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2008-2009 Intel Corporation. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include "cma.h" + +CRITICAL_SECTION lock; + +BOOLEAN WINAPI DllMain(HINSTANCE hInstance, DWORD dwReason, LPVOID lpReserved) +{ + UNREFERENCED_PARAMETER(hInstance); + UNREFERENCED_PARAMETER(dwReason); + UNREFERENCED_PARAMETER(lpReserved); + + InitializeCriticalSection(&lock); + + return TRUE; +} diff --git a/ulp/librdmacm/src/makefile b/ulp/librdmacm/src/makefile new file mode 100644 index 00000000..bffacaa7 --- /dev/null +++ b/ulp/librdmacm/src/makefile @@ -0,0 +1,7 @@ +# +# DO NOT EDIT THIS FILE!!! Edit .\sources. if you want to add a new source +# file to this component. This file merely indirects to the real make file +# that is shared by all the driver components of the OpenIB Windows project. +# + +!INCLUDE ..\..\..\inc\openib.def