librdmacm: provide OFED compatibility library.
[mirror/winof/.git] / ulp / librdmacm / examples / rping / rping.c
1 /*\r
2  * Copyright (c) 2005 Ammasso, Inc. All rights reserved.\r
3  * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.\r
4  * Copyright (c) 2009 Intel Corp.  All rights reserved.\r
5  *\r
6  * This software is available to you under the OpenIB.org BSD license\r
7  * below:\r
8  *\r
9  *     Redistribution and use in source and binary forms, with or\r
10  *     without modification, are permitted provided that the following\r
11  *     conditions are met:\r
12  *\r
13  *      - Redistributions of source code must retain the above\r
14  *        copyright notice, this list of conditions and the following\r
15  *        disclaimer.\r
16  *\r
17  *      - Redistributions in binary form must reproduce the above\r
18  *        copyright notice, this list of conditions and the following\r
19  *        disclaimer in the documentation and/or other materials\r
20  *        provided with the distribution.\r
21  *\r
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
23  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
24  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV\r
25  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
26  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
27  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
28  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
29  * SOFTWARE.\r
30  */\r
31 \r
32 #include <stdio.h>\r
33 #include <stdlib.h>\r
34 #include <string.h>\r
35 #include <ws2tcpip.h>\r
36 #include <winsock2.h>\r
37 #include <time.h>\r
38 \r
39 #include "..\..\..\..\etc\user\getopt.c"\r
40 #include <rdma/rdma_cma.h>\r
41 \r
42 #include <rdma/rdma_cma.h>\r
43 \r
44 static int debug = 0;\r
45 #define DEBUG_LOG if (debug) printf\r
46 \r
47 /*\r
48  * rping "ping/pong" loop:\r
49  *      client sends source rkey/addr/len\r
50  *      server receives source rkey/add/len\r
51  *      server rdma reads "ping" data from source\r
52  *      server sends "go ahead" on rdma read completion\r
53  *      client sends sink rkey/addr/len\r
54  *      server receives sink rkey/addr/len\r
55  *      server rdma writes "pong" data to sink\r
56  *      server sends "go ahead" on rdma write completion\r
57  *      <repeat loop>\r
58  */\r
59 \r
60 /*\r
61  * These states are used to signal events between the completion handler\r
62  * and the main client or server thread.\r
63  *\r
64  * Once CONNECTED, they cycle through RDMA_READ_ADV, RDMA_WRITE_ADV, \r
65  * and RDMA_WRITE_COMPLETE for each ping.\r
66  */\r
67 enum test_state {\r
68         IDLE = 1,\r
69         CONNECT_REQUEST,\r
70         ADDR_RESOLVED,\r
71         ROUTE_RESOLVED,\r
72         CONNECTED,\r
73         RDMA_READ_ADV,\r
74         RDMA_READ_COMPLETE,\r
75         RDMA_WRITE_ADV,\r
76         RDMA_WRITE_COMPLETE,\r
77         RDMA_ERROR\r
78 };\r
79 \r
80 struct rping_rdma_info {\r
81         uint64_t buf;\r
82         uint32_t rkey;\r
83         uint32_t size;\r
84 };\r
85 \r
86 #define RPING_SQ_DEPTH 16\r
87 \r
88 #define RPING_MSG_FMT           "rdma-ping-%d: "\r
89 #define RPING_MIN_BUFSIZE       16\r
90 \r
91 /*\r
92  * Control block struct.\r
93  */\r
94 struct rping_cb {\r
95         int server;                     /* 0 iff client */\r
96         pthread_t cqthread;\r
97         struct ibv_comp_channel *channel;\r
98         struct ibv_cq *cq;\r
99         struct ibv_pd *pd;\r
100         struct ibv_qp *qp;\r
101 \r
102         struct ibv_recv_wr rq_wr;       /* recv work request record */\r
103         struct ibv_sge recv_sgl;        /* recv single SGE */\r
104         struct rping_rdma_info recv_buf;/* malloc'd buffer */\r
105         struct ibv_mr *recv_mr;         /* MR associated with this buffer */\r
106 \r
107         struct ibv_send_wr sq_wr;       /* send work request record */\r
108         struct ibv_sge send_sgl;\r
109         struct rping_rdma_info send_buf;/* single send buf */\r
110         struct ibv_mr *send_mr;\r
111 \r
112         struct ibv_send_wr rdma_sq_wr;  /* rdma work request record */\r
113         struct ibv_sge rdma_sgl;        /* rdma single SGE */\r
114         char *rdma_buf;                 /* used as rdma sink */\r
115         struct ibv_mr *rdma_mr;\r
116 \r
117         uint32_t remote_rkey;           /* remote guys RKEY */\r
118         uint64_t remote_addr;           /* remote guys TO */\r
119         uint32_t remote_len;            /* remote guys LEN */\r
120 \r
121         char *start_buf;                /* rdma read src */\r
122         struct ibv_mr *start_mr;\r
123 \r
124         enum test_state state;          /* used for cond/signalling */\r
125 //      sem_t sem;\r
126 \r
127         struct sockaddr_in sin;\r
128         uint16_t port;                  /* dst port in NBO */\r
129         int verbose;                    /* verbose logging */\r
130         int count;                      /* ping count */\r
131         int size;                       /* ping data size */\r
132         int validate;                   /* validate ping data */\r
133 \r
134         /* CM stuff */\r
135 //      pthread_t cmthread;\r
136         struct rdma_event_channel *cm_channel;\r
137         struct rdma_cm_id *cm_id;       /* connection on client side,*/\r
138                                         /* listener on service side. */\r
139         struct rdma_cm_id *child_cm_id; /* connection on server side */\r
140 };\r
141 \r
142 struct rping_cb *cb;\r
143 static void *cm_thread(void *arg);\r
144 static void *cq_thread(void *arg);\r
145 \r
146 static int rping_cma_event_handler(struct rdma_cm_id *cma_id,\r
147                                     struct rdma_cm_event *event)\r
148 {\r
149         int ret = 0;\r
150         struct rping_cb *cb = cma_id->context;\r
151 \r
152         DEBUG_LOG("cma_event type %s cma_id %p (%s)\n",\r
153                   rdma_event_str(event->event), cma_id,\r
154                   (cma_id == cb->cm_id) ? "parent" : "child");\r
155 \r
156         switch (event->event) {\r
157         case RDMA_CM_EVENT_ADDR_RESOLVED:\r
158                 cb->state = ADDR_RESOLVED;\r
159                 ret = rdma_resolve_route(cma_id, 2000);\r
160                 if (ret) {\r
161                         cb->state = RDMA_ERROR;\r
162                         fprintf(stderr, "rdma_resolve_route error %d\n", ret);\r
163 //                      sem_post(&cb->sem);\r
164                 }\r
165                 break;\r
166 \r
167         case RDMA_CM_EVENT_ROUTE_RESOLVED:\r
168                 cb->state = ROUTE_RESOLVED;\r
169 //              sem_post(&cb->sem);\r
170                 break;\r
171 \r
172         case RDMA_CM_EVENT_CONNECT_REQUEST:\r
173                 cb->state = CONNECT_REQUEST;\r
174                 cb->child_cm_id = cma_id;\r
175                 DEBUG_LOG("child cma %p\n", cb->child_cm_id);\r
176 //              sem_post(&cb->sem);\r
177                 break;\r
178 \r
179         case RDMA_CM_EVENT_ESTABLISHED:\r
180                 DEBUG_LOG("ESTABLISHED\n");\r
181 \r
182                 /*\r
183                  * Server will wake up when first RECV completes.\r
184                  */\r
185                 if (!cb->server) {\r
186                         cb->state = CONNECTED;\r
187                 }\r
188 //              sem_post(&cb->sem);\r
189                 break;\r
190 \r
191         case RDMA_CM_EVENT_ADDR_ERROR:\r
192         case RDMA_CM_EVENT_ROUTE_ERROR:\r
193         case RDMA_CM_EVENT_CONNECT_ERROR:\r
194         case RDMA_CM_EVENT_UNREACHABLE:\r
195         case RDMA_CM_EVENT_REJECTED:\r
196                 fprintf(stderr, "cma event %s, error %d\n",\r
197                         rdma_event_str(event->event), event->status);\r
198 //              sem_post(&cb->sem);\r
199                 ret = -1;\r
200                 break;\r
201 \r
202         case RDMA_CM_EVENT_DISCONNECTED:\r
203                 fprintf(stderr, "%s DISCONNECT EVENT...\n",\r
204                         cb->server ? "server" : "client");\r
205 //              sem_post(&cb->sem);\r
206                 break;\r
207 \r
208         case RDMA_CM_EVENT_DEVICE_REMOVAL:\r
209                 fprintf(stderr, "cma detected device removal!!!!\n");\r
210                 ret = -1;\r
211                 break;\r
212 \r
213         default:\r
214                 fprintf(stderr, "unhandled event: %s, ignoring\n",\r
215                         rdma_event_str(event->event));\r
216                 break;\r
217         }\r
218 \r
219         return ret;\r
220 }\r
221 \r
222 static int server_recv(struct rping_cb *cb, struct ibv_wc *wc)\r
223 {\r
224         if (wc->byte_len != sizeof(cb->recv_buf)) {\r
225                 fprintf(stderr, "Received bogus data, size %d\n", wc->byte_len);\r
226                 return -1;\r
227         }\r
228 \r
229         cb->remote_rkey = ntohl(cb->recv_buf.rkey);\r
230         cb->remote_addr = ntohll(cb->recv_buf.buf);\r
231         cb->remote_len  = ntohl(cb->recv_buf.size);\r
232         DEBUG_LOG("Received rkey %x addr %" PRIx64 " len %d from peer\n",\r
233                   cb->remote_rkey, cb->remote_addr, cb->remote_len);\r
234 \r
235         if (cb->state <= CONNECTED || cb->state == RDMA_WRITE_COMPLETE)\r
236                 cb->state = RDMA_READ_ADV;\r
237         else\r
238                 cb->state = RDMA_WRITE_ADV;\r
239 \r
240         return 0;\r
241 }\r
242 \r
243 static int client_recv(struct rping_cb *cb, struct ibv_wc *wc)\r
244 {\r
245         if (wc->byte_len != sizeof(cb->recv_buf)) {\r
246                 fprintf(stderr, "Received bogus data, size %d\n", wc->byte_len);\r
247                 return -1;\r
248         }\r
249 \r
250         if (cb->state == RDMA_READ_ADV)\r
251                 cb->state = RDMA_WRITE_ADV;\r
252         else\r
253                 cb->state = RDMA_WRITE_COMPLETE;\r
254 \r
255         return 0;\r
256 }\r
257 \r
258 static int rping_cq_event_handler(struct rping_cb *cb)\r
259 {\r
260         struct ibv_wc wc;\r
261         struct ibv_recv_wr *bad_wr;\r
262         int ret;\r
263 \r
264         while ((ret = ibv_poll_cq(cb->cq, 1, &wc)) == 1) {\r
265                 ret = 0;\r
266 \r
267                 if (wc.status) {\r
268                         fprintf(stderr, "cq completion failed status %d\n",\r
269                                 wc.status);\r
270                         if (wc.status != IBV_WC_WR_FLUSH_ERR)\r
271                                 ret = -1;\r
272                         goto error;\r
273                 }\r
274 \r
275                 switch (wc.opcode) {\r
276                 case IBV_WC_SEND:\r
277                         DEBUG_LOG("send completion\n");\r
278                         break;\r
279 \r
280                 case IBV_WC_RDMA_WRITE:\r
281                         DEBUG_LOG("rdma write completion\n");\r
282                         cb->state = RDMA_WRITE_COMPLETE;\r
283 //                      sem_post(&cb->sem);\r
284                         break;\r
285 \r
286                 case IBV_WC_RDMA_READ:\r
287                         DEBUG_LOG("rdma read completion\n");\r
288                         cb->state = RDMA_READ_COMPLETE;\r
289 //                      sem_post(&cb->sem);\r
290                         break;\r
291 \r
292                 case IBV_WC_RECV:\r
293                         DEBUG_LOG("recv completion\n");\r
294                         ret = cb->server ? server_recv(cb, &wc) :\r
295                                            client_recv(cb, &wc);\r
296                         if (ret) {\r
297                                 fprintf(stderr, "recv wc error: %d\n", ret);\r
298                                 goto error;\r
299                         }\r
300 \r
301                         ret = ibv_post_recv(cb->qp, &cb->rq_wr, &bad_wr);\r
302                         if (ret) {\r
303                                 fprintf(stderr, "post recv error: %d\n", ret);\r
304                                 goto error;\r
305                         }\r
306 //                      sem_post(&cb->sem);\r
307                         break;\r
308 \r
309                 default:\r
310                         DEBUG_LOG("unknown!!!!! completion\n");\r
311                         ret = -1;\r
312                         goto error;\r
313                 }\r
314         }\r
315         if (ret) {\r
316                 fprintf(stderr, "poll error %d\n", ret);\r
317                 goto error;\r
318         }\r
319         return 0;\r
320 \r
321 error:\r
322         cb->state = RDMA_ERROR;\r
323 //      sem_post(&cb->sem);\r
324         return ret;\r
325 }\r
326 \r
327 static int rping_accept(struct rping_cb *cb)\r
328 {\r
329         struct rdma_conn_param conn_param;\r
330         int ret;\r
331 \r
332         DEBUG_LOG("accepting client connection request\n");\r
333 \r
334         memset(&conn_param, 0, sizeof conn_param);\r
335         conn_param.responder_resources = 1;\r
336         conn_param.initiator_depth = 1;\r
337 \r
338         ret = rdma_accept(cb->child_cm_id, &conn_param);\r
339         if (ret) {\r
340                 fprintf(stderr, "rdma_accept error: %d\n", ret);\r
341                 return ret;\r
342         }\r
343 \r
344         cm_thread(cb);\r
345 //      sem_wait(&cb->sem);\r
346         if (cb->state == RDMA_ERROR) {\r
347                 fprintf(stderr, "wait for CONNECTED state %d\n", cb->state);\r
348                 return -1;\r
349         }\r
350         return 0;\r
351 }\r
352 \r
353 static void rping_setup_wr(struct rping_cb *cb)\r
354 {\r
355         cb->recv_sgl.addr = (uint64_t) (unsigned long) &cb->recv_buf;\r
356         cb->recv_sgl.length = sizeof cb->recv_buf;\r
357         cb->recv_sgl.lkey = cb->recv_mr->lkey;\r
358         cb->rq_wr.sg_list = &cb->recv_sgl;\r
359         cb->rq_wr.num_sge = 1;\r
360 \r
361         cb->send_sgl.addr = (uint64_t) (unsigned long) &cb->send_buf;\r
362         cb->send_sgl.length = sizeof cb->send_buf;\r
363         cb->send_sgl.lkey = cb->send_mr->lkey;\r
364 \r
365         cb->sq_wr.opcode = IBV_WR_SEND;\r
366         cb->sq_wr.send_flags = IBV_SEND_SIGNALED;\r
367         cb->sq_wr.sg_list = &cb->send_sgl;\r
368         cb->sq_wr.num_sge = 1;\r
369 \r
370         cb->rdma_sgl.addr = (uint64_t) (unsigned long) cb->rdma_buf;\r
371         cb->rdma_sgl.lkey = cb->rdma_mr->lkey;\r
372         cb->rdma_sq_wr.send_flags = IBV_SEND_SIGNALED;\r
373         cb->rdma_sq_wr.sg_list = &cb->rdma_sgl;\r
374         cb->rdma_sq_wr.num_sge = 1;\r
375 }\r
376 \r
377 static int rping_setup_buffers(struct rping_cb *cb)\r
378 {\r
379         int ret;\r
380 \r
381         DEBUG_LOG("rping_setup_buffers called on cb %p\n", cb);\r
382 \r
383         cb->recv_mr = ibv_reg_mr(cb->pd, &cb->recv_buf, sizeof cb->recv_buf,\r
384                                  IBV_ACCESS_LOCAL_WRITE);\r
385         if (!cb->recv_mr) {\r
386                 fprintf(stderr, "recv_buf reg_mr failed\n");\r
387                 return errno;\r
388         }\r
389 \r
390         cb->send_mr = ibv_reg_mr(cb->pd, &cb->send_buf, sizeof cb->send_buf, 0);\r
391         if (!cb->send_mr) {\r
392                 fprintf(stderr, "send_buf reg_mr failed\n");\r
393                 ret = errno;\r
394                 goto err1;\r
395         }\r
396 \r
397         cb->rdma_buf = malloc(cb->size);\r
398         if (!cb->rdma_buf) {\r
399                 fprintf(stderr, "rdma_buf malloc failed\n");\r
400                 ret = -ENOMEM;\r
401                 goto err2;\r
402         }\r
403 \r
404         cb->rdma_mr = ibv_reg_mr(cb->pd, cb->rdma_buf, cb->size,\r
405                                  IBV_ACCESS_LOCAL_WRITE |\r
406                                  IBV_ACCESS_REMOTE_READ |\r
407                                  IBV_ACCESS_REMOTE_WRITE);\r
408         if (!cb->rdma_mr) {\r
409                 fprintf(stderr, "rdma_buf reg_mr failed\n");\r
410                 ret = errno;\r
411                 goto err3;\r
412         }\r
413 \r
414         if (!cb->server) {\r
415                 cb->start_buf = malloc(cb->size);\r
416                 if (!cb->start_buf) {\r
417                         fprintf(stderr, "start_buf malloc failed\n");\r
418                         ret = -ENOMEM;\r
419                         goto err4;\r
420                 }\r
421 \r
422                 cb->start_mr = ibv_reg_mr(cb->pd, cb->start_buf, cb->size,\r
423                                           IBV_ACCESS_LOCAL_WRITE | \r
424                                           IBV_ACCESS_REMOTE_READ |\r
425                                           IBV_ACCESS_REMOTE_WRITE);\r
426                 if (!cb->start_mr) {\r
427                         fprintf(stderr, "start_buf reg_mr failed\n");\r
428                         ret = errno;\r
429                         goto err5;\r
430                 }\r
431         }\r
432 \r
433         rping_setup_wr(cb);\r
434         DEBUG_LOG("allocated & registered buffers...\n");\r
435         return 0;\r
436 \r
437 err5:\r
438         free(cb->start_buf);\r
439 err4:\r
440         ibv_dereg_mr(cb->rdma_mr);\r
441 err3:\r
442         free(cb->rdma_buf);\r
443 err2:\r
444         ibv_dereg_mr(cb->send_mr);\r
445 err1:\r
446         ibv_dereg_mr(cb->recv_mr);\r
447         return ret;\r
448 }\r
449 \r
450 static void rping_free_buffers(struct rping_cb *cb)\r
451 {\r
452         DEBUG_LOG("rping_free_buffers called on cb %p\n", cb);\r
453         ibv_dereg_mr(cb->recv_mr);\r
454         ibv_dereg_mr(cb->send_mr);\r
455         ibv_dereg_mr(cb->rdma_mr);\r
456         free(cb->rdma_buf);\r
457         if (!cb->server) {\r
458                 ibv_dereg_mr(cb->start_mr);\r
459                 free(cb->start_buf);\r
460         }\r
461 }\r
462 \r
463 static int rping_create_qp(struct rping_cb *cb)\r
464 {\r
465         struct ibv_qp_init_attr init_attr;\r
466         int ret;\r
467 \r
468         memset(&init_attr, 0, sizeof(init_attr));\r
469         init_attr.cap.max_send_wr = RPING_SQ_DEPTH;\r
470         init_attr.cap.max_recv_wr = 2;\r
471         init_attr.cap.max_recv_sge = 1;\r
472         init_attr.cap.max_send_sge = 1;\r
473         init_attr.qp_type = IBV_QPT_RC;\r
474         init_attr.send_cq = cb->cq;\r
475         init_attr.recv_cq = cb->cq;\r
476 \r
477         if (cb->server) {\r
478                 ret = rdma_create_qp(cb->child_cm_id, cb->pd, &init_attr);\r
479                 if (!ret)\r
480                         cb->qp = cb->child_cm_id->qp;\r
481         } else {\r
482                 ret = rdma_create_qp(cb->cm_id, cb->pd, &init_attr);\r
483                 if (!ret)\r
484                         cb->qp = cb->cm_id->qp;\r
485         }\r
486 \r
487         return ret;\r
488 }\r
489 \r
490 static void rping_free_qp(struct rping_cb *cb)\r
491 {\r
492         ibv_destroy_qp(cb->qp);\r
493         ibv_destroy_cq(cb->cq);\r
494         ibv_destroy_comp_channel(cb->channel);\r
495         ibv_dealloc_pd(cb->pd);\r
496 }\r
497 \r
498 static int rping_setup_qp(struct rping_cb *cb, struct rdma_cm_id *cm_id)\r
499 {\r
500         int ret;\r
501 \r
502         cb->pd = ibv_alloc_pd(cm_id->verbs);\r
503         if (!cb->pd) {\r
504                 fprintf(stderr, "ibv_alloc_pd failed\n");\r
505                 return errno;\r
506         }\r
507         DEBUG_LOG("created pd %p\n", cb->pd);\r
508 \r
509         cb->channel = ibv_create_comp_channel(cm_id->verbs);\r
510         if (!cb->channel) {\r
511                 fprintf(stderr, "ibv_create_comp_channel failed\n");\r
512                 ret = errno;\r
513                 goto err1;\r
514         }\r
515         DEBUG_LOG("created channel %p\n", cb->channel);\r
516 \r
517         cb->cq = ibv_create_cq(cm_id->verbs, RPING_SQ_DEPTH * 2, cb,\r
518                                 cb->channel, 0);\r
519         if (!cb->cq) {\r
520                 fprintf(stderr, "ibv_create_cq failed\n");\r
521                 ret = errno;\r
522                 goto err2;\r
523         }\r
524         DEBUG_LOG("created cq %p\n", cb->cq);\r
525 \r
526         ret = ibv_req_notify_cq(cb->cq, 0);\r
527         if (ret) {\r
528                 fprintf(stderr, "ibv_create_cq failed\n");\r
529                 ret = errno;\r
530                 goto err3;\r
531         }\r
532 \r
533         ret = rping_create_qp(cb);\r
534         if (ret) {\r
535                 fprintf(stderr, "rping_create_qp failed: %d\n", ret);\r
536                 goto err3;\r
537         }\r
538         DEBUG_LOG("created qp %p\n", cb->qp);\r
539         return 0;\r
540 \r
541 err3:\r
542         ibv_destroy_cq(cb->cq);\r
543 err2:\r
544         ibv_destroy_comp_channel(cb->channel);\r
545 err1:\r
546         ibv_dealloc_pd(cb->pd);\r
547         return ret;\r
548 }\r
549 \r
550 static void cm_thread(void *arg)\r
551 {\r
552         struct rping_cb *cb = arg;\r
553         struct rdma_cm_event *event;\r
554         int ret;\r
555 \r
556 //      while (1) {\r
557         ret = rdma_get_cm_event(cb->cm_channel, &event);\r
558         if (ret) {\r
559                 fprintf(stderr, "rdma_get_cm_event err %d\n", ret);\r
560                 return;\r
561 //                      exit(ret);\r
562         }\r
563         ret = rping_cma_event_handler(event->id, event);\r
564         rdma_ack_cm_event(event);\r
565 //              if (ret)\r
566 //                      exit(ret);\r
567 //      }\r
568 }\r
569 \r
570 static void cq_thread(void *arg)\r
571 {\r
572         struct rping_cb *cb = arg;\r
573         struct ibv_cq *ev_cq;\r
574         void *ev_ctx;\r
575         int ret;\r
576         \r
577 //      DEBUG_LOG("cq_thread started.\n");\r
578 \r
579 //      while (1) {     \r
580 //              pthread_testcancel();\r
581 \r
582         ret = ibv_get_cq_event(cb->channel, &ev_cq, &ev_ctx);\r
583         if (ret) {\r
584                 fprintf(stderr, "Failed to get cq event!\n");\r
585                 return;\r
586 //              pthread_exit(NULL);\r
587         }\r
588         if (ev_cq != cb->cq) {\r
589                 fprintf(stderr, "Unknown CQ!\n");\r
590                 return;\r
591 //              pthread_exit(NULL);\r
592         }\r
593         ret = ibv_req_notify_cq(cb->cq, 0);\r
594         if (ret) {\r
595                 fprintf(stderr, "Failed to set notify!\n");\r
596                 return;\r
597 //              pthread_exit(NULL);\r
598         }\r
599         ret = rping_cq_event_handler(cb);\r
600         ibv_ack_cq_events(cb->cq, 1);\r
601 //              pthread_exit(NULL);\r
602 //      }\r
603 }\r
604 \r
605 static void rping_format_send(struct rping_cb *cb, char *buf, struct ibv_mr *mr)\r
606 {\r
607         struct rping_rdma_info *info = &cb->send_buf;\r
608 \r
609         info->buf = htonll((uint64_t) (unsigned long) buf);\r
610         info->rkey = htonl(mr->rkey);\r
611         info->size = htonl(cb->size);\r
612 \r
613         DEBUG_LOG("RDMA addr %" PRIx64" rkey %x len %d\n",\r
614                   ntohll(info->buf), ntohl(info->rkey), ntohl(info->size));\r
615 }\r
616 \r
617 static int rping_test_server(struct rping_cb *cb)\r
618 {\r
619         struct ibv_send_wr *bad_wr;\r
620         int ret;\r
621 \r
622         while (1) {\r
623                 /* Wait for client's Start STAG/TO/Len */\r
624                 cq_thread(cb);\r
625 //              sem_wait(&cb->sem);\r
626                 if (cb->state != RDMA_READ_ADV) {\r
627                         fprintf(stderr, "wait for RDMA_READ_ADV state %d\n",\r
628                                 cb->state);\r
629                         ret = -1;\r
630                         break;\r
631                 }\r
632 \r
633                 DEBUG_LOG("server received sink adv\n");\r
634 \r
635                 /* Issue RDMA Read. */\r
636                 cb->rdma_sq_wr.opcode = IBV_WR_RDMA_READ;\r
637                 cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey;\r
638                 cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;\r
639                 cb->rdma_sq_wr.sg_list->length = cb->remote_len;\r
640 \r
641                 ret = ibv_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr);\r
642                 if (ret) {\r
643                         fprintf(stderr, "post send error %d\n", ret);\r
644                         break;\r
645                 }\r
646                 DEBUG_LOG("server posted rdma read req \n");\r
647 \r
648                 /* Wait for read completion */\r
649                 cq_thread(cb);\r
650 //              sem_wait(&cb->sem);\r
651                 if (cb->state != RDMA_READ_COMPLETE) {\r
652                         fprintf(stderr, "wait for RDMA_READ_COMPLETE state %d\n",\r
653                                 cb->state);\r
654                         ret = -1;\r
655                         break;\r
656                 }\r
657                 DEBUG_LOG("server received read complete\n");\r
658 \r
659                 /* Display data in recv buf */\r
660                 if (cb->verbose)\r
661                         printf("server ping data: %s\n", cb->rdma_buf);\r
662 \r
663                 /* Tell client to continue */\r
664                 ret = ibv_post_send(cb->qp, &cb->sq_wr, &bad_wr);\r
665                 if (ret) {\r
666                         fprintf(stderr, "post send error %d\n", ret);\r
667                         break;\r
668                 }\r
669                 DEBUG_LOG("server posted go ahead\n");\r
670 \r
671                 /* Wait for client's RDMA STAG/TO/Len */\r
672                 cq_thread(cb);\r
673 //              sem_wait(&cb->sem);\r
674                 if (cb->state != RDMA_WRITE_ADV) {\r
675                         fprintf(stderr, "wait for RDMA_WRITE_ADV state %d\n",\r
676                                 cb->state);\r
677                         ret = -1;\r
678                         break;\r
679                 }\r
680                 DEBUG_LOG("server received sink adv\n");\r
681 \r
682                 /* RDMA Write echo data */\r
683                 cb->rdma_sq_wr.opcode = IBV_WR_RDMA_WRITE;\r
684                 cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey;\r
685                 cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;\r
686                 cb->rdma_sq_wr.sg_list->length = strlen(cb->rdma_buf) + 1;\r
687                 DEBUG_LOG("rdma write from lkey %x laddr %" PRIx64 " len %d\n",\r
688                           cb->rdma_sq_wr.sg_list->lkey,\r
689                           cb->rdma_sq_wr.sg_list->addr,\r
690                           cb->rdma_sq_wr.sg_list->length);\r
691 \r
692                 ret = ibv_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr);\r
693                 if (ret) {\r
694                         fprintf(stderr, "post send error %d\n", ret);\r
695                         break;\r
696                 }\r
697 \r
698                 /* Wait for completion */\r
699                 cq_thread(cb);\r
700 //              ret = sem_wait(&cb->sem);\r
701                 if (cb->state != RDMA_WRITE_COMPLETE) {\r
702                         fprintf(stderr, "wait for RDMA_WRITE_COMPLETE state %d\n",\r
703                                 cb->state);\r
704                         ret = -1;\r
705                         break;\r
706                 }\r
707                 DEBUG_LOG("server rdma write complete \n");\r
708 \r
709                 /* Tell client to begin again */\r
710                 ret = ibv_post_send(cb->qp, &cb->sq_wr, &bad_wr);\r
711                 if (ret) {\r
712                         fprintf(stderr, "post send error %d\n", ret);\r
713                         break;\r
714                 }\r
715                 DEBUG_LOG("server posted go ahead\n");\r
716         }\r
717 \r
718         return ret;\r
719 }\r
720 \r
721 static int rping_bind_server(struct rping_cb *cb)\r
722 {\r
723         int ret;\r
724 \r
725         cb->sin.sin_port = cb->port;\r
726         ret = rdma_bind_addr(cb->cm_id, (struct sockaddr *) &cb->sin);\r
727         if (ret) {\r
728                 fprintf(stderr, "rdma_bind_addr error %d\n", ret);\r
729                 return ret;\r
730         }\r
731         DEBUG_LOG("rdma_bind_addr successful\n");\r
732 \r
733         DEBUG_LOG("rdma_listen\n");\r
734         ret = rdma_listen(cb->cm_id, 3);\r
735         if (ret) {\r
736                 fprintf(stderr, "rdma_listen failed: %d\n", ret);\r
737                 return ret;\r
738         }\r
739 \r
740         return 0;\r
741 }\r
742 \r
743 static struct rping_cb *clone_cb(struct rping_cb *listening_cb)\r
744 {\r
745         struct rping_cb *cb = malloc(sizeof *cb);\r
746         if (!cb)\r
747                 return NULL;\r
748         *cb = *listening_cb;\r
749         cb->child_cm_id->context = cb;\r
750         return cb;\r
751 }\r
752 \r
753 static void free_cb(struct rping_cb *cb)\r
754 {\r
755         free(cb);\r
756 }\r
757 \r
758 static int rping_run_server(struct rping_cb *cb)\r
759 {\r
760         struct ibv_recv_wr *bad_wr;\r
761         int ret;\r
762 \r
763         ret = rping_bind_server(cb);\r
764         if (ret)\r
765                 return ret;\r
766 \r
767         cm_thread(cb);\r
768 //      sem_wait(&cb->sem);\r
769         if (cb->state != CONNECT_REQUEST) {\r
770                 fprintf(stderr, "wait for CONNECT_REQUEST state %d\n",\r
771                         cb->state);\r
772                 return -1;\r
773         }\r
774 \r
775         ret = rping_setup_qp(cb, cb->child_cm_id);\r
776         if (ret) {\r
777                 fprintf(stderr, "setup_qp failed: %d\n", ret);\r
778                 return ret;\r
779         }\r
780 \r
781         ret = rping_setup_buffers(cb);\r
782         if (ret) {\r
783                 fprintf(stderr, "rping_setup_buffers failed: %d\n", ret);\r
784                 goto err1;\r
785         }\r
786 \r
787         ret = ibv_post_recv(cb->qp, &cb->rq_wr, &bad_wr);\r
788         if (ret) {\r
789                 fprintf(stderr, "ibv_post_recv failed: %d\n", ret);\r
790                 goto err2;\r
791         }\r
792 \r
793 //      pthread_create(&cb->cqthread, NULL, cq_thread, cb);\r
794 \r
795         ret = rping_accept(cb);\r
796         if (ret) {\r
797                 fprintf(stderr, "connect error %d\n", ret);\r
798                 goto err2;\r
799         }\r
800 \r
801         rping_test_server(cb);\r
802         rdma_disconnect(cb->child_cm_id);\r
803         rdma_destroy_id(cb->child_cm_id);\r
804 err2:\r
805         rping_free_buffers(cb);\r
806 err1:\r
807         rping_free_qp(cb);\r
808 \r
809         return ret;\r
810 }\r
811 \r
812 static int rping_test_client(struct rping_cb *cb)\r
813 {\r
814         int ping, start, cc, i, ret = 0;\r
815         struct ibv_send_wr *bad_wr;\r
816         unsigned char c;\r
817 \r
818         start = 65;\r
819         for (ping = 0; !cb->count || ping < cb->count; ping++) {\r
820                 cb->state = RDMA_READ_ADV;\r
821 \r
822                 /* Put some ascii text in the buffer. */\r
823                 cc = sprintf(cb->start_buf, RPING_MSG_FMT, ping);\r
824                 for (i = cc, c = start; i < cb->size; i++) {\r
825                         cb->start_buf[i] = c;\r
826                         c++;\r
827                         if (c > 122)\r
828                                 c = 65;\r
829                 }\r
830                 start++;\r
831                 if (start > 122)\r
832                         start = 65;\r
833                 cb->start_buf[cb->size - 1] = 0;\r
834 \r
835                 rping_format_send(cb, cb->start_buf, cb->start_mr);\r
836                 ret = ibv_post_send(cb->qp, &cb->sq_wr, &bad_wr);\r
837                 if (ret) {\r
838                         fprintf(stderr, "post send error %d\n", ret);\r
839                         break;\r
840                 }\r
841 \r
842                 /* Wait for server to ACK */\r
843                 cq_thread(cb);\r
844 //              sem_wait(&cb->sem);\r
845                 if (cb->state != RDMA_WRITE_ADV) {\r
846                         fprintf(stderr, "wait for RDMA_WRITE_ADV state %d\n",\r
847                                 cb->state);\r
848                         ret = -1;\r
849                         break;\r
850                 }\r
851 \r
852                 rping_format_send(cb, cb->rdma_buf, cb->rdma_mr);\r
853                 ret = ibv_post_send(cb->qp, &cb->sq_wr, &bad_wr);\r
854                 if (ret) {\r
855                         fprintf(stderr, "post send error %d\n", ret);\r
856                         break;\r
857                 }\r
858 \r
859                 /* Wait for the server to say the RDMA Write is complete. */\r
860                 cq_thread(cb);\r
861 //              sem_wait(&cb->sem);\r
862                 if (cb->state != RDMA_WRITE_COMPLETE) {\r
863                         fprintf(stderr, "wait for RDMA_WRITE_COMPLETE state %d\n",\r
864                                 cb->state);\r
865                         ret = -1;\r
866                         break;\r
867                 }\r
868 \r
869                 if (cb->validate)\r
870                         if (memcmp(cb->start_buf, cb->rdma_buf, cb->size)) {\r
871                                 fprintf(stderr, "data mismatch!\n");\r
872                                 ret = -1;\r
873                                 break;\r
874                         }\r
875 \r
876                 if (cb->verbose)\r
877                         printf("ping data: %s\n", cb->rdma_buf);\r
878         }\r
879 \r
880         return ret;\r
881 }\r
882 \r
883 static int rping_connect_client(struct rping_cb *cb)\r
884 {\r
885         struct rdma_conn_param conn_param;\r
886         int ret;\r
887 \r
888         memset(&conn_param, 0, sizeof conn_param);\r
889         conn_param.responder_resources = 1;\r
890         conn_param.initiator_depth = 1;\r
891         conn_param.retry_count = 10;\r
892 \r
893         ret = rdma_connect(cb->cm_id, &conn_param);\r
894         if (ret) {\r
895                 fprintf(stderr, "rdma_connect error %d\n", ret);\r
896                 return ret;\r
897         }\r
898 \r
899         cm_thread(cb);\r
900 //      sem_wait(&cb->sem);\r
901         if (cb->state != CONNECTED) {\r
902                 fprintf(stderr, "wait for CONNECTED state %d\n", cb->state);\r
903                 return -1;\r
904         }\r
905 \r
906         DEBUG_LOG("rmda_connect successful\n");\r
907         return 0;\r
908 }\r
909 \r
910 static int rping_bind_client(struct rping_cb *cb)\r
911 {\r
912         int ret;\r
913 \r
914         cb->sin.sin_port = cb->port;\r
915         ret = rdma_resolve_addr(cb->cm_id, NULL, (struct sockaddr *) &cb->sin, 2000);\r
916         if (ret) {\r
917                 fprintf(stderr, "rdma_resolve_addr error %d\n", ret);\r
918                 return ret;\r
919         }\r
920 \r
921         cm_thread(cb);\r
922 //      sem_wait(&cb->sem);\r
923         if (cb->state != ROUTE_RESOLVED) {\r
924                 fprintf(stderr, "waiting for addr/route resolution state %d\n",\r
925                         cb->state);\r
926                 return -1;\r
927         }\r
928 \r
929         DEBUG_LOG("rdma_resolve_addr - rdma_resolve_route successful\n");\r
930         return 0;\r
931 }\r
932 \r
933 static int rping_run_client(struct rping_cb *cb)\r
934 {\r
935         struct ibv_recv_wr *bad_wr;\r
936         int ret;\r
937 \r
938         ret = rping_bind_client(cb);\r
939         if (ret)\r
940                 return ret;\r
941 \r
942         ret = rping_setup_qp(cb, cb->cm_id);\r
943         if (ret) {\r
944                 fprintf(stderr, "setup_qp failed: %d\n", ret);\r
945                 return ret;\r
946         }\r
947 \r
948         ret = rping_setup_buffers(cb);\r
949         if (ret) {\r
950                 fprintf(stderr, "rping_setup_buffers failed: %d\n", ret);\r
951                 goto err1;\r
952         }\r
953 \r
954         ret = ibv_post_recv(cb->qp, &cb->rq_wr, &bad_wr);\r
955         if (ret) {\r
956                 fprintf(stderr, "ibv_post_recv failed: %d\n", ret);\r
957                 goto err2;\r
958         }\r
959 \r
960 //      pthread_create(&cb->cqthread, NULL, cq_thread, cb);\r
961 \r
962         ret = rping_connect_client(cb);\r
963         if (ret) {\r
964                 fprintf(stderr, "connect error %d\n", ret);\r
965                 goto err2;\r
966         }\r
967 \r
968         rping_test_client(cb);\r
969         rdma_disconnect(cb->cm_id);\r
970 err2:\r
971         rping_free_buffers(cb);\r
972 err1:\r
973         rping_free_qp(cb);\r
974 \r
975         return ret;\r
976 }\r
977 \r
978 static int get_addr(char *dst, struct sockaddr_in *addr)\r
979 {\r
980         struct addrinfo *res;\r
981         int ret;\r
982 \r
983         ret = getaddrinfo(dst, NULL, NULL, &res);\r
984         if (ret) {\r
985                 printf("getaddrinfo failed - invalid hostname or IP address\n");\r
986                 return ret;\r
987         }\r
988 \r
989         if (res->ai_family != PF_INET) {\r
990                 ret = -1;\r
991                 goto out;\r
992         }\r
993 \r
994         *addr = *(struct sockaddr_in *) res->ai_addr;\r
995 out:\r
996         freeaddrinfo(res);\r
997         return ret;\r
998 }\r
999 \r
1000 static void usage()\r
1001 {\r
1002         printf("rdma_rping -s [-vVd] [-S size] [-C count] [-a addr] [-p port]\n");\r
1003         printf("rdma_rping -c [-vVd] [-S size] [-C count] -a addr [-p port]\n");\r
1004         printf("\t-c\t\tclient side\n");\r
1005         printf("\t-s\t\tserver side\n");\r
1006         printf("\t-v\t\tdisplay ping data to stdout\n");\r
1007         printf("\t-V\t\tvalidate ping data\n");\r
1008         printf("\t-d\t\tdebug printfs\n");\r
1009         printf("\t-S size \tping data size\n");\r
1010         printf("\t-C count\tping count times\n");\r
1011         printf("\t-a addr\t\taddress\n");\r
1012         printf("\t-p port\t\tport\n");\r
1013 }\r
1014 \r
1015 int main(int argc, char *argv[])\r
1016 {\r
1017 //      struct rping_cb *cb;\r
1018         int op;\r
1019         int ret = 0;\r
1020 \r
1021         cb = malloc(sizeof(*cb));\r
1022         if (!cb)\r
1023                 return -ENOMEM;\r
1024 \r
1025         memset(cb, 0, sizeof(*cb));\r
1026         cb->server = -1;\r
1027         cb->state = IDLE;\r
1028         cb->size = 64;\r
1029         cb->sin.sin_family = PF_INET;\r
1030         cb->port = htons(7174);\r
1031 //      sem_init(&cb->sem, 0, 0);\r
1032 \r
1033         opterr = 0;\r
1034         while ((op=getopt(argc, argv, "a:Pp:C:S:t:scvVd")) != -1) {\r
1035                 switch (op) {\r
1036                 case 'a':\r
1037                         ret = get_addr(optarg, &cb->sin);\r
1038                         break;\r
1039                 case 'p':\r
1040                         cb->port = htons(atoi(optarg));\r
1041                         DEBUG_LOG("port %d\n", (int) atoi(optarg));\r
1042                         break;\r
1043                 case 's':\r
1044                         cb->server = 1;\r
1045                         DEBUG_LOG("server\n");\r
1046                         break;\r
1047                 case 'c':\r
1048                         cb->server = 0;\r
1049                         DEBUG_LOG("client\n");\r
1050                         break;\r
1051                 case 'S':\r
1052                         cb->size = atoi(optarg);\r
1053                         if (cb->size < RPING_MIN_BUFSIZE) {\r
1054                                 fprintf(stderr, "Invalid size (minimum is %d) " RPING_MIN_BUFSIZE);\r
1055                                 ret = EINVAL;\r
1056                         } else\r
1057                                 DEBUG_LOG("size %d\n", (int) atoi(optarg));\r
1058                         break;\r
1059                 case 'C':\r
1060                         cb->count = atoi(optarg);\r
1061                         if (cb->count < 0) {\r
1062                                 fprintf(stderr, "Invalid count %d\n", cb->count);\r
1063                                 ret = EINVAL;\r
1064                         } else\r
1065                                 DEBUG_LOG("count %d\n", (int) cb->count);\r
1066                         break;\r
1067                 case 'v':\r
1068                         cb->verbose++;\r
1069                         DEBUG_LOG("verbose\n");\r
1070                         break;\r
1071                 case 'V':\r
1072                         cb->validate++;\r
1073                         DEBUG_LOG("validate data\n");\r
1074                         break;\r
1075                 case 'd':\r
1076                         debug++;\r
1077                         break;\r
1078                 default:\r
1079                         usage();\r
1080                         ret = EINVAL;\r
1081                         goto out;\r
1082                 }\r
1083         }\r
1084         if (ret)\r
1085                 goto out;\r
1086 \r
1087         if (cb->server == -1) {\r
1088                 usage();\r
1089                 ret = EINVAL;\r
1090                 goto out;\r
1091         }\r
1092 \r
1093         cb->cm_channel = rdma_create_event_channel();\r
1094         if (!cb->cm_channel) {\r
1095                 ret = errno;\r
1096                 fprintf(stderr, "rdma_create_event_channel error %d\n", ret);\r
1097                 goto out;\r
1098         }\r
1099 \r
1100         ret = rdma_create_id(cb->cm_channel, &cb->cm_id, cb, RDMA_PS_TCP);\r
1101         if (ret) {\r
1102                 ret = errno;\r
1103                 fprintf(stderr, "rdma_create_id error %d\n", ret);\r
1104                 goto out2;\r
1105         }\r
1106         DEBUG_LOG("created cm_id %p\n", cb->cm_id);\r
1107 \r
1108 //      pthread_create(&cb->cmthread, NULL, cm_thread, cb);\r
1109 \r
1110         if (cb->server)\r
1111                 ret = rping_run_server(cb);\r
1112         else\r
1113                 ret = rping_run_client(cb);\r
1114 \r
1115         DEBUG_LOG("destroy cm_id %p\n", cb->cm_id);\r
1116         rdma_destroy_id(cb->cm_id);\r
1117 out2:\r
1118         rdma_destroy_event_channel(cb->cm_channel);\r
1119 out:\r
1120         free(cb);\r
1121         return ret;\r
1122 }\r